1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2009-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9
10 #include "unicode/utypes.h"
11 #include "unicode/ures.h"
12 #include "unicode/putil.h"
13 #include "unicode/uloc.h"
14 #include "ustr_imp.h"
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "putilimp.h"
19 #include "uinvchar.h"
20 #include "ulocimp.h"
21 #include "uvector.h"
22 #include "uassert.h"
23
24
25 /* struct holding a single variant */
26 typedef struct VariantListEntry {
27 const char *variant;
28 struct VariantListEntry *next;
29 } VariantListEntry;
30
31 /* struct holding a single attribute value */
32 typedef struct AttributeListEntry {
33 const char *attribute;
34 struct AttributeListEntry *next;
35 } AttributeListEntry;
36
37 /* struct holding a single extension */
38 typedef struct ExtensionListEntry {
39 const char *key;
40 const char *value;
41 struct ExtensionListEntry *next;
42 } ExtensionListEntry;
43
44 #define MAXEXTLANG 3
45 typedef struct ULanguageTag {
46 char *buf; /* holding parsed subtags */
47 const char *language;
48 const char *extlang[MAXEXTLANG];
49 const char *script;
50 const char *region;
51 VariantListEntry *variants;
52 ExtensionListEntry *extensions;
53 const char *privateuse;
54 const char *grandfathered;
55 } ULanguageTag;
56
57 #define MINLEN 2
58 #define SEP '-'
59 #define PRIVATEUSE 'x'
60 #define LDMLEXT 'u'
61
62 #define LOCALE_SEP '_'
63 #define LOCALE_EXT_SEP '@'
64 #define LOCALE_KEYWORD_SEP ';'
65 #define LOCALE_KEY_TYPE_SEP '='
66
67 #define ISALPHA(c) uprv_isASCIILetter(c)
68 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
69
70 static const char EMPTY[] = "";
71 static const char LANG_UND[] = "und";
72 static const char PRIVATEUSE_KEY[] = "x";
73 static const char _POSIX[] = "_POSIX";
74 static const char POSIX_KEY[] = "va";
75 static const char POSIX_VALUE[] = "posix";
76 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
77 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
78 static const char LOCALE_TYPE_YES[] = "yes";
79
80 #define LANG_UND_LEN 3
81
82 /*
83 Updated on 2018-09-12 from
84 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
85
86 This table has 2 parts. The parts for Grandfathered tags is generated by the
87 following scripts from the IANA language tag registry.
88
89 curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
90 egrep -A 7 'Type: grandfathered' | \
91 egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
92 awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
93 tr 'A-Z' 'a-z'
94
95
96 The 2nd part is made of five ICU-specific entries. They're kept for
97 the backward compatibility for now, even though there are no preferred
98 values. They may have to be removed for the strict BCP 47 compliance.
99
100 */
101 static const char* const GRANDFATHERED[] = {
102 /* grandfathered preferred */
103 "art-lojban", "jbo",
104 "en-gb-oed", "en-gb-oxendict",
105 "i-ami", "ami",
106 "i-bnn", "bnn",
107 "i-hak", "hak",
108 "i-klingon", "tlh",
109 "i-lux", "lb",
110 "i-navajo", "nv",
111 "i-pwn", "pwn",
112 "i-tao", "tao",
113 "i-tay", "tay",
114 "i-tsu", "tsu",
115 "no-bok", "nb",
116 "no-nyn", "nn",
117 "sgn-be-fr", "sfb",
118 "sgn-be-nl", "vgt",
119 "sgn-ch-de", "sgg",
120 "zh-guoyu", "cmn",
121 "zh-hakka", "hak",
122 "zh-min-nan", "nan",
123 "zh-xiang", "hsn",
124
125 // Grandfathered tags with no preferred value in the IANA
126 // registry. Kept for now for the backward compatibility
127 // because ICU has mapped them this way.
128 "cel-gaulish", "xtg-x-cel-gaulish",
129 "i-default", "en-x-i-default",
130 "i-enochian", "und-x-i-enochian",
131 "i-mingo", "see-x-i-mingo",
132 "zh-min", "nan-x-zh-min",
133 };
134
135 /*
136 Updated on 2018-09-12 from
137 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
138
139 The table lists redundant tags with preferred value in the IANA languate tag registry.
140 It's generated with the following command:
141
142 curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
143 grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
144 awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
145 tr 'A-Z' 'a-z'
146
147 In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
148 a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
149 */
150
151 static const char* const REDUNDANT[] = {
152 // redundant preferred
153 "sgn-br", "bzs",
154 "sgn-co", "csn",
155 "sgn-de", "gsg",
156 "sgn-dk", "dsl",
157 "sgn-es", "ssp",
158 "sgn-fr", "fsl",
159 "sgn-gb", "bfi",
160 "sgn-gr", "gss",
161 "sgn-ie", "isg",
162 "sgn-it", "ise",
163 "sgn-jp", "jsl",
164 "sgn-mx", "mfs",
165 "sgn-ni", "ncs",
166 "sgn-nl", "dse",
167 "sgn-no", "nsl",
168 "sgn-pt", "psr",
169 "sgn-se", "swl",
170 "sgn-us", "ase",
171 "sgn-za", "sfs",
172 "zh-cmn", "cmn",
173 "zh-cmn-hans", "cmn-hans",
174 "zh-cmn-hant", "cmn-hant",
175 "zh-gan", "gan",
176 "zh-wuu", "wuu",
177 "zh-yue", "yue",
178
179 // variant tag with preferred value
180 "ja-latn-hepburn-heploc", "ja-latn-alalc97",
181 };
182
183 /*
184 Updated on 2018-09-12 from
185 https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
186
187 grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
188 grep -B1 'Preferred' | grep -v '^--' | \
189 awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
190
191 Make sure that 2-letter language subtags come before 3-letter subtags.
192 */
193 static const char DEPRECATEDLANGS[][4] = {
194 /* deprecated new */
195 "in", "id",
196 "iw", "he",
197 "ji", "yi",
198 "jw", "jv",
199 "mo", "ro",
200 "aam", "aas",
201 "adp", "dz",
202 "aue", "ktz",
203 "ayx", "nun",
204 "bgm", "bcg",
205 "bjd", "drl",
206 "ccq", "rki",
207 "cjr", "mom",
208 "cka", "cmr",
209 "cmk", "xch",
210 "coy", "pij",
211 "cqu", "quh",
212 "drh", "khk",
213 "drw", "prs",
214 "gav", "dev",
215 "gfx", "vaj",
216 "ggn", "gvr",
217 "gti", "nyc",
218 "guv", "duz",
219 "hrr", "jal",
220 "ibi", "opa",
221 "ilw", "gal",
222 "jeg", "oyb",
223 "kgc", "tdf",
224 "kgh", "kml",
225 "koj", "kwv",
226 "krm", "bmf",
227 "ktr", "dtp",
228 "kvs", "gdj",
229 "kwq", "yam",
230 "kxe", "tvd",
231 "kzj", "dtp",
232 "kzt", "dtp",
233 "lii", "raq",
234 "lmm", "rmx",
235 "meg", "cir",
236 "mst", "mry",
237 "mwj", "vaj",
238 "myt", "mry",
239 "nad", "xny",
240 "ncp", "kdz",
241 "nnx", "ngv",
242 "nts", "pij",
243 "oun", "vaj",
244 "pcr", "adx",
245 "pmc", "huw",
246 "pmu", "phr",
247 "ppa", "bfy",
248 "ppr", "lcq",
249 "pry", "prt",
250 "puz", "pub",
251 "sca", "hle",
252 "skk", "oyb",
253 "tdu", "dtp",
254 "thc", "tpo",
255 "thx", "oyb",
256 "tie", "ras",
257 "tkk", "twm",
258 "tlw", "weo",
259 "tmp", "tyj",
260 "tne", "kak",
261 "tnf", "prs",
262 "tsf", "taj",
263 "uok", "ema",
264 "xba", "cax",
265 "xia", "acn",
266 "xkh", "waw",
267 "xsj", "suj",
268 "ybd", "rki",
269 "yma", "lrr",
270 "ymt", "mtm",
271 "yos", "zom",
272 "yuu", "yug",
273 };
274
275 /*
276 Updated on 2018-04-24 from
277
278 curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
279 grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
280 grep -B1 'Preferred' | \
281 awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
282 */
283 static const char DEPRECATEDREGIONS[][3] = {
284 /* deprecated new */
285 "BU", "MM",
286 "DD", "DE",
287 "FX", "FR",
288 "TP", "TL",
289 "YD", "YE",
290 "ZR", "CD",
291 };
292
293 /*
294 * -------------------------------------------------
295 *
296 * These ultag_ functions may be exposed as APIs later
297 *
298 * -------------------------------------------------
299 */
300
301 static ULanguageTag*
302 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
303
304 static void
305 ultag_close(ULanguageTag* langtag);
306
307 static const char*
308 ultag_getLanguage(const ULanguageTag* langtag);
309
310 #if 0
311 static const char*
312 ultag_getJDKLanguage(const ULanguageTag* langtag);
313 #endif
314
315 static const char*
316 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
317
318 static int32_t
319 ultag_getExtlangSize(const ULanguageTag* langtag);
320
321 static const char*
322 ultag_getScript(const ULanguageTag* langtag);
323
324 static const char*
325 ultag_getRegion(const ULanguageTag* langtag);
326
327 static const char*
328 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
329
330 static int32_t
331 ultag_getVariantsSize(const ULanguageTag* langtag);
332
333 static const char*
334 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
335
336 static const char*
337 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
338
339 static int32_t
340 ultag_getExtensionsSize(const ULanguageTag* langtag);
341
342 static const char*
343 ultag_getPrivateUse(const ULanguageTag* langtag);
344
345 #if 0
346 static const char*
347 ultag_getGrandfathered(const ULanguageTag* langtag);
348 #endif
349
350 namespace {
351
352 // Helper class to memory manage CharString objects.
353 // Only ever stack-allocated, does not need to inherit UMemory.
354 class CharStringPool {
355 public:
CharStringPool()356 CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
357 ~CharStringPool() = default;
358
359 CharStringPool(const CharStringPool&) = delete;
360 CharStringPool& operator=(const CharStringPool&) = delete;
361
create()362 icu::CharString* create() {
363 if (U_FAILURE(status)) {
364 return nullptr;
365 }
366 icu::CharString* const obj = new icu::CharString;
367 if (obj == nullptr) {
368 status = U_MEMORY_ALLOCATION_ERROR;
369 return nullptr;
370 }
371 pool.addElement(obj, status);
372 if (U_FAILURE(status)) {
373 delete obj;
374 return nullptr;
375 }
376 return obj;
377 }
378
379 private:
deleter(void * obj)380 static void U_CALLCONV deleter(void* obj) {
381 delete static_cast<icu::CharString*>(obj);
382 }
383
384 UErrorCode status;
385 icu::UVector pool;
386 };
387
388 } // namespace
389
390 /*
391 * -------------------------------------------------
392 *
393 * Language subtag syntax validation functions
394 *
395 * -------------------------------------------------
396 */
397
398 static UBool
_isAlphaString(const char * s,int32_t len)399 _isAlphaString(const char* s, int32_t len) {
400 int32_t i;
401 for (i = 0; i < len; i++) {
402 if (!ISALPHA(*(s + i))) {
403 return FALSE;
404 }
405 }
406 return TRUE;
407 }
408
409 static UBool
_isNumericString(const char * s,int32_t len)410 _isNumericString(const char* s, int32_t len) {
411 int32_t i;
412 for (i = 0; i < len; i++) {
413 if (!ISNUMERIC(*(s + i))) {
414 return FALSE;
415 }
416 }
417 return TRUE;
418 }
419
420 static UBool
_isAlphaNumericString(const char * s,int32_t len)421 _isAlphaNumericString(const char* s, int32_t len) {
422 int32_t i;
423 for (i = 0; i < len; i++) {
424 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
425 return FALSE;
426 }
427 }
428 return TRUE;
429 }
430
431 static UBool
_isLanguageSubtag(const char * s,int32_t len)432 _isLanguageSubtag(const char* s, int32_t len) {
433 /*
434 * language = 2*3ALPHA ; shortest ISO 639 code
435 * ["-" extlang] ; sometimes followed by
436 * ; extended language subtags
437 * / 4ALPHA ; or reserved for future use
438 * / 5*8ALPHA ; or registered language subtag
439 */
440 if (len < 0) {
441 len = (int32_t)uprv_strlen(s);
442 }
443 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
444 return TRUE;
445 }
446 return FALSE;
447 }
448
449 static UBool
_isExtlangSubtag(const char * s,int32_t len)450 _isExtlangSubtag(const char* s, int32_t len) {
451 /*
452 * extlang = 3ALPHA ; selected ISO 639 codes
453 * *2("-" 3ALPHA) ; permanently reserved
454 */
455 if (len < 0) {
456 len = (int32_t)uprv_strlen(s);
457 }
458 if (len == 3 && _isAlphaString(s, len)) {
459 return TRUE;
460 }
461 return FALSE;
462 }
463
464 static UBool
_isScriptSubtag(const char * s,int32_t len)465 _isScriptSubtag(const char* s, int32_t len) {
466 /*
467 * script = 4ALPHA ; ISO 15924 code
468 */
469 if (len < 0) {
470 len = (int32_t)uprv_strlen(s);
471 }
472 if (len == 4 && _isAlphaString(s, len)) {
473 return TRUE;
474 }
475 return FALSE;
476 }
477
478 static UBool
_isRegionSubtag(const char * s,int32_t len)479 _isRegionSubtag(const char* s, int32_t len) {
480 /*
481 * region = 2ALPHA ; ISO 3166-1 code
482 * / 3DIGIT ; UN M.49 code
483 */
484 if (len < 0) {
485 len = (int32_t)uprv_strlen(s);
486 }
487 if (len == 2 && _isAlphaString(s, len)) {
488 return TRUE;
489 }
490 if (len == 3 && _isNumericString(s, len)) {
491 return TRUE;
492 }
493 return FALSE;
494 }
495
496 static UBool
_isVariantSubtag(const char * s,int32_t len)497 _isVariantSubtag(const char* s, int32_t len) {
498 /*
499 * variant = 5*8alphanum ; registered variants
500 * / (DIGIT 3alphanum)
501 */
502 if (len < 0) {
503 len = (int32_t)uprv_strlen(s);
504 }
505 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
506 return TRUE;
507 }
508 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
509 return TRUE;
510 }
511 return FALSE;
512 }
513
514 static UBool
_isPrivateuseVariantSubtag(const char * s,int32_t len)515 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
516 /*
517 * variant = 1*8alphanum ; registered variants
518 * / (DIGIT 3alphanum)
519 */
520 if (len < 0) {
521 len = (int32_t)uprv_strlen(s);
522 }
523 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
524 return TRUE;
525 }
526 return FALSE;
527 }
528
529 static UBool
_isExtensionSingleton(const char * s,int32_t len)530 _isExtensionSingleton(const char* s, int32_t len) {
531 /*
532 * extension = singleton 1*("-" (2*8alphanum))
533 */
534 if (len < 0) {
535 len = (int32_t)uprv_strlen(s);
536 }
537 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
538 return TRUE;
539 }
540 return FALSE;
541 }
542
543 static UBool
_isExtensionSubtag(const char * s,int32_t len)544 _isExtensionSubtag(const char* s, int32_t len) {
545 /*
546 * extension = singleton 1*("-" (2*8alphanum))
547 */
548 if (len < 0) {
549 len = (int32_t)uprv_strlen(s);
550 }
551 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
552 return TRUE;
553 }
554 return FALSE;
555 }
556
557 static UBool
_isExtensionSubtags(const char * s,int32_t len)558 _isExtensionSubtags(const char* s, int32_t len) {
559 const char *p = s;
560 const char *pSubtag = NULL;
561
562 if (len < 0) {
563 len = (int32_t)uprv_strlen(s);
564 }
565
566 while ((p - s) < len) {
567 if (*p == SEP) {
568 if (pSubtag == NULL) {
569 return FALSE;
570 }
571 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
572 return FALSE;
573 }
574 pSubtag = NULL;
575 } else if (pSubtag == NULL) {
576 pSubtag = p;
577 }
578 p++;
579 }
580 if (pSubtag == NULL) {
581 return FALSE;
582 }
583 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
584 }
585
586 static UBool
_isPrivateuseValueSubtag(const char * s,int32_t len)587 _isPrivateuseValueSubtag(const char* s, int32_t len) {
588 /*
589 * privateuse = "x" 1*("-" (1*8alphanum))
590 */
591 if (len < 0) {
592 len = (int32_t)uprv_strlen(s);
593 }
594 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
595 return TRUE;
596 }
597 return FALSE;
598 }
599
600 static UBool
_isPrivateuseValueSubtags(const char * s,int32_t len)601 _isPrivateuseValueSubtags(const char* s, int32_t len) {
602 const char *p = s;
603 const char *pSubtag = NULL;
604
605 if (len < 0) {
606 len = (int32_t)uprv_strlen(s);
607 }
608
609 while ((p - s) < len) {
610 if (*p == SEP) {
611 if (pSubtag == NULL) {
612 return FALSE;
613 }
614 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
615 return FALSE;
616 }
617 pSubtag = NULL;
618 } else if (pSubtag == NULL) {
619 pSubtag = p;
620 }
621 p++;
622 }
623 if (pSubtag == NULL) {
624 return FALSE;
625 }
626 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
627 }
628
629 U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char * s,int32_t len)630 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
631 if (len < 0) {
632 len = (int32_t)uprv_strlen(s);
633 }
634 if (len == 2 && _isAlphaNumericString(s, len)) {
635 return TRUE;
636 }
637 return FALSE;
638 }
639
640 U_CFUNC UBool
ultag_isUnicodeLocaleType(const char * s,int32_t len)641 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
642 const char* p;
643 int32_t subtagLen = 0;
644
645 if (len < 0) {
646 len = (int32_t)uprv_strlen(s);
647 }
648
649 for (p = s; len > 0; p++, len--) {
650 if (*p == SEP) {
651 if (subtagLen < 3) {
652 return FALSE;
653 }
654 subtagLen = 0;
655 } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
656 subtagLen++;
657 if (subtagLen > 8) {
658 return FALSE;
659 }
660 } else {
661 return FALSE;
662 }
663 }
664
665 return (subtagLen >= 3);
666 }
667 /*
668 * -------------------------------------------------
669 *
670 * Helper functions
671 *
672 * -------------------------------------------------
673 */
674
675 static UBool
_addVariantToList(VariantListEntry ** first,VariantListEntry * var)676 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
677 UBool bAdded = TRUE;
678
679 if (*first == NULL) {
680 var->next = NULL;
681 *first = var;
682 } else {
683 VariantListEntry *prev, *cur;
684 int32_t cmp;
685
686 /* variants order should be preserved */
687 prev = NULL;
688 cur = *first;
689 while (TRUE) {
690 if (cur == NULL) {
691 prev->next = var;
692 var->next = NULL;
693 break;
694 }
695
696 /* Checking for duplicate variant */
697 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
698 if (cmp == 0) {
699 /* duplicated variant */
700 bAdded = FALSE;
701 break;
702 }
703 prev = cur;
704 cur = cur->next;
705 }
706 }
707
708 return bAdded;
709 }
710
711 static UBool
_addAttributeToList(AttributeListEntry ** first,AttributeListEntry * attr)712 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
713 UBool bAdded = TRUE;
714
715 if (*first == NULL) {
716 attr->next = NULL;
717 *first = attr;
718 } else {
719 AttributeListEntry *prev, *cur;
720 int32_t cmp;
721
722 /* reorder variants in alphabetical order */
723 prev = NULL;
724 cur = *first;
725 while (TRUE) {
726 if (cur == NULL) {
727 prev->next = attr;
728 attr->next = NULL;
729 break;
730 }
731 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
732 if (cmp < 0) {
733 if (prev == NULL) {
734 *first = attr;
735 } else {
736 prev->next = attr;
737 }
738 attr->next = cur;
739 break;
740 }
741 if (cmp == 0) {
742 /* duplicated variant */
743 bAdded = FALSE;
744 break;
745 }
746 prev = cur;
747 cur = cur->next;
748 }
749 }
750
751 return bAdded;
752 }
753
754
755 static UBool
_addExtensionToList(ExtensionListEntry ** first,ExtensionListEntry * ext,UBool localeToBCP)756 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
757 UBool bAdded = TRUE;
758
759 if (*first == NULL) {
760 ext->next = NULL;
761 *first = ext;
762 } else {
763 ExtensionListEntry *prev, *cur;
764 int32_t cmp;
765
766 /* reorder variants in alphabetical order */
767 prev = NULL;
768 cur = *first;
769 while (TRUE) {
770 if (cur == NULL) {
771 prev->next = ext;
772 ext->next = NULL;
773 break;
774 }
775 if (localeToBCP) {
776 /* special handling for locale to bcp conversion */
777 int32_t len, curlen;
778
779 len = (int32_t)uprv_strlen(ext->key);
780 curlen = (int32_t)uprv_strlen(cur->key);
781
782 if (len == 1 && curlen == 1) {
783 if (*(ext->key) == *(cur->key)) {
784 cmp = 0;
785 } else if (*(ext->key) == PRIVATEUSE) {
786 cmp = 1;
787 } else if (*(cur->key) == PRIVATEUSE) {
788 cmp = -1;
789 } else {
790 cmp = *(ext->key) - *(cur->key);
791 }
792 } else if (len == 1) {
793 cmp = *(ext->key) - LDMLEXT;
794 } else if (curlen == 1) {
795 cmp = LDMLEXT - *(cur->key);
796 } else {
797 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
798 /* Both are u extension keys - we need special handling for 'attribute' */
799 if (cmp != 0) {
800 if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
801 cmp = 1;
802 } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
803 cmp = -1;
804 }
805 }
806 }
807 } else {
808 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
809 }
810 if (cmp < 0) {
811 if (prev == NULL) {
812 *first = ext;
813 } else {
814 prev->next = ext;
815 }
816 ext->next = cur;
817 break;
818 }
819 if (cmp == 0) {
820 /* duplicated extension key */
821 bAdded = FALSE;
822 break;
823 }
824 prev = cur;
825 cur = cur->next;
826 }
827 }
828
829 return bAdded;
830 }
831
832 static void
_initializeULanguageTag(ULanguageTag * langtag)833 _initializeULanguageTag(ULanguageTag* langtag) {
834 int32_t i;
835
836 langtag->buf = NULL;
837
838 langtag->language = EMPTY;
839 for (i = 0; i < MAXEXTLANG; i++) {
840 langtag->extlang[i] = NULL;
841 }
842
843 langtag->script = EMPTY;
844 langtag->region = EMPTY;
845
846 langtag->variants = NULL;
847 langtag->extensions = NULL;
848
849 langtag->grandfathered = EMPTY;
850 langtag->privateuse = EMPTY;
851 }
852
853 static int32_t
_appendLanguageToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)854 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
855 char buf[ULOC_LANG_CAPACITY];
856 UErrorCode tmpStatus = U_ZERO_ERROR;
857 int32_t len, i;
858 int32_t reslen = 0;
859
860 if (U_FAILURE(*status)) {
861 return 0;
862 }
863
864 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
865 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
866 if (strict) {
867 *status = U_ILLEGAL_ARGUMENT_ERROR;
868 return 0;
869 }
870 len = 0;
871 }
872
873 /* Note: returned language code is in lower case letters */
874
875 if (len == 0) {
876 if (reslen < capacity) {
877 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
878 }
879 reslen += LANG_UND_LEN;
880 } else if (!_isLanguageSubtag(buf, len)) {
881 /* invalid language code */
882 if (strict) {
883 *status = U_ILLEGAL_ARGUMENT_ERROR;
884 return 0;
885 }
886 if (reslen < capacity) {
887 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
888 }
889 reslen += LANG_UND_LEN;
890 } else {
891 /* resolve deprecated */
892 for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
893 // 2-letter deprecated subtags are listede before 3-letter
894 // ones in DEPRECATEDLANGS[]. Get out of loop on coming
895 // across the 1st 3-letter subtag, if the input is a 2-letter code.
896 // to avoid continuing to try when there's no match.
897 if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
898 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
899 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
900 len = (int32_t)uprv_strlen(buf);
901 break;
902 }
903 }
904 if (reslen < capacity) {
905 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
906 }
907 reslen += len;
908 }
909 u_terminateChars(appendAt, capacity, reslen, status);
910 return reslen;
911 }
912
913 static int32_t
_appendScriptToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)914 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
915 char buf[ULOC_SCRIPT_CAPACITY];
916 UErrorCode tmpStatus = U_ZERO_ERROR;
917 int32_t len;
918 int32_t reslen = 0;
919
920 if (U_FAILURE(*status)) {
921 return 0;
922 }
923
924 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
925 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
926 if (strict) {
927 *status = U_ILLEGAL_ARGUMENT_ERROR;
928 }
929 return 0;
930 }
931
932 if (len > 0) {
933 if (!_isScriptSubtag(buf, len)) {
934 /* invalid script code */
935 if (strict) {
936 *status = U_ILLEGAL_ARGUMENT_ERROR;
937 }
938 return 0;
939 } else {
940 if (reslen < capacity) {
941 *(appendAt + reslen) = SEP;
942 }
943 reslen++;
944 if (reslen < capacity) {
945 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
946 }
947 reslen += len;
948 }
949 }
950 u_terminateChars(appendAt, capacity, reslen, status);
951 return reslen;
952 }
953
954 static int32_t
_appendRegionToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)955 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
956 char buf[ULOC_COUNTRY_CAPACITY];
957 UErrorCode tmpStatus = U_ZERO_ERROR;
958 int32_t len;
959 int32_t reslen = 0;
960
961 if (U_FAILURE(*status)) {
962 return 0;
963 }
964
965 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
966 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
967 if (strict) {
968 *status = U_ILLEGAL_ARGUMENT_ERROR;
969 }
970 return 0;
971 }
972
973 if (len > 0) {
974 if (!_isRegionSubtag(buf, len)) {
975 /* invalid region code */
976 if (strict) {
977 *status = U_ILLEGAL_ARGUMENT_ERROR;
978 }
979 return 0;
980 } else {
981 if (reslen < capacity) {
982 *(appendAt + reslen) = SEP;
983 }
984 reslen++;
985 /* resolve deprecated */
986 for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
987 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
988 uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
989 len = (int32_t)uprv_strlen(buf);
990 break;
991 }
992 }
993
994 if (reslen < capacity) {
995 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
996 }
997 reslen += len;
998 }
999 }
1000 u_terminateChars(appendAt, capacity, reslen, status);
1001 return reslen;
1002 }
1003
1004 static int32_t
_appendVariantsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool * hadPosix,UErrorCode * status)1005 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
1006 char buf[ULOC_FULLNAME_CAPACITY];
1007 UErrorCode tmpStatus = U_ZERO_ERROR;
1008 int32_t len, i;
1009 int32_t reslen = 0;
1010
1011 if (U_FAILURE(*status)) {
1012 return 0;
1013 }
1014
1015 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1016 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1017 if (strict) {
1018 *status = U_ILLEGAL_ARGUMENT_ERROR;
1019 }
1020 return 0;
1021 }
1022
1023 if (len > 0) {
1024 char *p, *pVar;
1025 UBool bNext = TRUE;
1026 VariantListEntry *var;
1027 VariantListEntry *varFirst = NULL;
1028
1029 pVar = NULL;
1030 p = buf;
1031 while (bNext) {
1032 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1033 if (*p == 0) {
1034 bNext = FALSE;
1035 } else {
1036 *p = 0; /* terminate */
1037 }
1038 if (pVar == NULL) {
1039 if (strict) {
1040 *status = U_ILLEGAL_ARGUMENT_ERROR;
1041 break;
1042 }
1043 /* ignore empty variant */
1044 } else {
1045 /* ICU uses upper case letters for variants, but
1046 the canonical format is lowercase in BCP47 */
1047 for (i = 0; *(pVar + i) != 0; i++) {
1048 *(pVar + i) = uprv_tolower(*(pVar + i));
1049 }
1050
1051 /* validate */
1052 if (_isVariantSubtag(pVar, -1)) {
1053 if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
1054 /* emit the variant to the list */
1055 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1056 if (var == NULL) {
1057 *status = U_MEMORY_ALLOCATION_ERROR;
1058 break;
1059 }
1060 var->variant = pVar;
1061 if (!_addVariantToList(&varFirst, var)) {
1062 /* duplicated variant */
1063 uprv_free(var);
1064 if (strict) {
1065 *status = U_ILLEGAL_ARGUMENT_ERROR;
1066 break;
1067 }
1068 }
1069 } else {
1070 /* Special handling for POSIX variant, need to remember that we had it and then */
1071 /* treat it like an extension later. */
1072 *hadPosix = TRUE;
1073 }
1074 } else if (strict) {
1075 *status = U_ILLEGAL_ARGUMENT_ERROR;
1076 break;
1077 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
1078 /* Handle private use subtags separately */
1079 break;
1080 }
1081 }
1082 /* reset variant starting position */
1083 pVar = NULL;
1084 } else if (pVar == NULL) {
1085 pVar = p;
1086 }
1087 p++;
1088 }
1089
1090 if (U_SUCCESS(*status)) {
1091 if (varFirst != NULL) {
1092 int32_t varLen;
1093
1094 /* write out validated/normalized variants to the target */
1095 var = varFirst;
1096 while (var != NULL) {
1097 if (reslen < capacity) {
1098 *(appendAt + reslen) = SEP;
1099 }
1100 reslen++;
1101 varLen = (int32_t)uprv_strlen(var->variant);
1102 if (reslen < capacity) {
1103 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
1104 }
1105 reslen += varLen;
1106 var = var->next;
1107 }
1108 }
1109 }
1110
1111 /* clean up */
1112 var = varFirst;
1113 while (var != NULL) {
1114 VariantListEntry *tmpVar = var->next;
1115 uprv_free(var);
1116 var = tmpVar;
1117 }
1118
1119 if (U_FAILURE(*status)) {
1120 return 0;
1121 }
1122 }
1123
1124 u_terminateChars(appendAt, capacity, reslen, status);
1125 return reslen;
1126 }
1127
1128 static int32_t
_appendKeywordsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1129 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1130 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
1131 int32_t attrBufLength = 0;
1132 UEnumeration *keywordEnum = NULL;
1133 int32_t reslen = 0;
1134
1135 keywordEnum = uloc_openKeywords(localeID, status);
1136 if (U_FAILURE(*status) && !hadPosix) {
1137 uenum_close(keywordEnum);
1138 return 0;
1139 }
1140 if (keywordEnum != NULL || hadPosix) {
1141 /* reorder extensions */
1142 int32_t len;
1143 const char *key;
1144 ExtensionListEntry *firstExt = NULL;
1145 ExtensionListEntry *ext;
1146 AttributeListEntry *firstAttr = NULL;
1147 AttributeListEntry *attr;
1148 char *attrValue;
1149 CharStringPool extBufPool;
1150 const char *bcpKey=nullptr, *bcpValue=nullptr;
1151 UErrorCode tmpStatus = U_ZERO_ERROR;
1152 int32_t keylen;
1153 UBool isBcpUExt;
1154
1155 while (TRUE) {
1156 icu::CharString buf;
1157 key = uenum_next(keywordEnum, NULL, status);
1158 if (key == NULL) {
1159 break;
1160 }
1161 char* buffer;
1162 int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
1163
1164 for (;;) {
1165 buffer = buf.getAppendBuffer(
1166 /*minCapacity=*/resultCapacity,
1167 /*desiredCapacityHint=*/resultCapacity,
1168 resultCapacity,
1169 tmpStatus);
1170
1171 if (U_FAILURE(tmpStatus)) {
1172 break;
1173 }
1174
1175 len = uloc_getKeywordValue(
1176 localeID, key, buffer, resultCapacity, &tmpStatus);
1177
1178 if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
1179 break;
1180 }
1181
1182 resultCapacity = len;
1183 tmpStatus = U_ZERO_ERROR;
1184 }
1185
1186 if (U_FAILURE(tmpStatus)) {
1187 if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
1188 *status = U_MEMORY_ALLOCATION_ERROR;
1189 break;
1190 }
1191 if (strict) {
1192 *status = U_ILLEGAL_ARGUMENT_ERROR;
1193 break;
1194 }
1195 /* ignore this keyword */
1196 tmpStatus = U_ZERO_ERROR;
1197 continue;
1198 }
1199
1200 buf.append(buffer, len, tmpStatus);
1201 if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1202 tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
1203 }
1204
1205 keylen = (int32_t)uprv_strlen(key);
1206 isBcpUExt = (keylen > 1);
1207
1208 /* special keyword used for representing Unicode locale attributes */
1209 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
1210 if (len > 0) {
1211 int32_t i = 0;
1212 while (TRUE) {
1213 attrBufLength = 0;
1214 for (; i < len; i++) {
1215 if (buf[i] != '-') {
1216 attrBuf[attrBufLength++] = buf[i];
1217 } else {
1218 i++;
1219 break;
1220 }
1221 }
1222 if (attrBufLength > 0) {
1223 attrBuf[attrBufLength] = 0;
1224
1225 } else if (i >= len){
1226 break;
1227 }
1228
1229 /* create AttributeListEntry */
1230 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1231 if (attr == NULL) {
1232 *status = U_MEMORY_ALLOCATION_ERROR;
1233 break;
1234 }
1235 attrValue = (char*)uprv_malloc(attrBufLength + 1);
1236 if (attrValue == NULL) {
1237 *status = U_MEMORY_ALLOCATION_ERROR;
1238 break;
1239 }
1240 uprv_strcpy(attrValue, attrBuf);
1241 attr->attribute = attrValue;
1242
1243 if (!_addAttributeToList(&firstAttr, attr)) {
1244 uprv_free(attr);
1245 uprv_free(attrValue);
1246 if (strict) {
1247 *status = U_ILLEGAL_ARGUMENT_ERROR;
1248 break;
1249 }
1250 }
1251 }
1252 /* for a place holder ExtensionListEntry */
1253 bcpKey = LOCALE_ATTRIBUTE_KEY;
1254 bcpValue = NULL;
1255 }
1256 } else if (isBcpUExt) {
1257 bcpKey = uloc_toUnicodeLocaleKey(key);
1258 if (bcpKey == NULL) {
1259 if (strict) {
1260 *status = U_ILLEGAL_ARGUMENT_ERROR;
1261 break;
1262 }
1263 continue;
1264 }
1265
1266 /* we've checked buf is null-terminated above */
1267 bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
1268 if (bcpValue == NULL) {
1269 if (strict) {
1270 *status = U_ILLEGAL_ARGUMENT_ERROR;
1271 break;
1272 }
1273 continue;
1274 }
1275 if (bcpValue == buf.data()) {
1276 /*
1277 When uloc_toUnicodeLocaleType(key, buf) returns the
1278 input value as is, the value is well-formed, but has
1279 no known mapping. This implementation normalizes the
1280 value to lower case
1281 */
1282 icu::CharString* extBuf = extBufPool.create();
1283 if (extBuf == nullptr) {
1284 *status = U_MEMORY_ALLOCATION_ERROR;
1285 break;
1286 }
1287 int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
1288 int32_t resultCapacity;
1289 char* pExtBuf = extBuf->getAppendBuffer(
1290 /*minCapacity=*/bcpValueLen,
1291 /*desiredCapacityHint=*/bcpValueLen,
1292 resultCapacity,
1293 tmpStatus);
1294 if (U_FAILURE(tmpStatus)) {
1295 *status = tmpStatus;
1296 break;
1297 }
1298
1299 uprv_strcpy(pExtBuf, bcpValue);
1300 T_CString_toLowerCase(pExtBuf);
1301
1302 extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
1303 if (U_FAILURE(tmpStatus)) {
1304 *status = tmpStatus;
1305 break;
1306 }
1307
1308 bcpValue = extBuf->data();
1309 }
1310 } else {
1311 if (*key == PRIVATEUSE) {
1312 if (!_isPrivateuseValueSubtags(buf.data(), len)) {
1313 if (strict) {
1314 *status = U_ILLEGAL_ARGUMENT_ERROR;
1315 break;
1316 }
1317 continue;
1318 }
1319 } else {
1320 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
1321 if (strict) {
1322 *status = U_ILLEGAL_ARGUMENT_ERROR;
1323 break;
1324 }
1325 continue;
1326 }
1327 }
1328 bcpKey = key;
1329 icu::CharString* extBuf = extBufPool.create();
1330 if (extBuf == nullptr) {
1331 *status = U_MEMORY_ALLOCATION_ERROR;
1332 break;
1333 }
1334 extBuf->append(buf.data(), len, tmpStatus);
1335 if (U_FAILURE(tmpStatus)) {
1336 *status = tmpStatus;
1337 break;
1338 }
1339 bcpValue = extBuf->data();
1340 }
1341
1342 /* create ExtensionListEntry */
1343 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1344 if (ext == NULL) {
1345 *status = U_MEMORY_ALLOCATION_ERROR;
1346 break;
1347 }
1348 ext->key = bcpKey;
1349 ext->value = bcpValue;
1350
1351 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1352 uprv_free(ext);
1353 if (strict) {
1354 *status = U_ILLEGAL_ARGUMENT_ERROR;
1355 break;
1356 }
1357 }
1358 }
1359
1360 /* Special handling for POSIX variant - add the keywords for POSIX */
1361 if (hadPosix) {
1362 /* create ExtensionListEntry for POSIX */
1363 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1364 if (ext == NULL) {
1365 *status = U_MEMORY_ALLOCATION_ERROR;
1366 goto cleanup;
1367 }
1368 ext->key = POSIX_KEY;
1369 ext->value = POSIX_VALUE;
1370
1371 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1372 uprv_free(ext);
1373 }
1374 }
1375
1376 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1377 UBool startLDMLExtension = FALSE;
1378 for (ext = firstExt; ext; ext = ext->next) {
1379 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
1380 /* first LDML u singlton extension */
1381 if (reslen < capacity) {
1382 *(appendAt + reslen) = SEP;
1383 }
1384 reslen++;
1385 if (reslen < capacity) {
1386 *(appendAt + reslen) = LDMLEXT;
1387 }
1388 reslen++;
1389
1390 startLDMLExtension = TRUE;
1391 }
1392
1393 /* write out the sorted BCP47 attributes, extensions and private use */
1394 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
1395 /* write the value for the attributes */
1396 for (attr = firstAttr; attr; attr = attr->next) {
1397 if (reslen < capacity) {
1398 *(appendAt + reslen) = SEP;
1399 }
1400 reslen++;
1401 len = (int32_t)uprv_strlen(attr->attribute);
1402 if (reslen < capacity) {
1403 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1404 }
1405 reslen += len;
1406 }
1407 } else {
1408 if (reslen < capacity) {
1409 *(appendAt + reslen) = SEP;
1410 }
1411 reslen++;
1412 len = (int32_t)uprv_strlen(ext->key);
1413 if (reslen < capacity) {
1414 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1415 }
1416 reslen += len;
1417 if (reslen < capacity) {
1418 *(appendAt + reslen) = SEP;
1419 }
1420 reslen++;
1421 len = (int32_t)uprv_strlen(ext->value);
1422 if (reslen < capacity) {
1423 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1424 }
1425 reslen += len;
1426 }
1427 }
1428 }
1429 cleanup:
1430 /* clean up */
1431 ext = firstExt;
1432 while (ext != NULL) {
1433 ExtensionListEntry *tmpExt = ext->next;
1434 uprv_free(ext);
1435 ext = tmpExt;
1436 }
1437
1438 attr = firstAttr;
1439 while (attr != NULL) {
1440 AttributeListEntry *tmpAttr = attr->next;
1441 char *pValue = (char *)attr->attribute;
1442 uprv_free(pValue);
1443 uprv_free(attr);
1444 attr = tmpAttr;
1445 }
1446
1447 uenum_close(keywordEnum);
1448
1449 if (U_FAILURE(*status)) {
1450 return 0;
1451 }
1452 }
1453
1454 return u_terminateChars(appendAt, capacity, reslen, status);
1455 }
1456
1457 /**
1458 * Append keywords parsed from LDML extension value
1459 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1460 * Note: char* buf is used for storing keywords
1461 */
1462 static void
_appendLDMLExtensionAsKeywords(const char * ldmlext,ExtensionListEntry ** appendTo,char * buf,int32_t bufSize,UBool * posixVariant,UErrorCode * status)1463 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1464 const char *pTag; /* beginning of current subtag */
1465 const char *pKwds; /* beginning of key-type pairs */
1466 UBool variantExists = *posixVariant;
1467
1468 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
1469 ExtensionListEntry *kwd, *nextKwd;
1470
1471 AttributeListEntry *attrFirst = NULL; /* first attribute */
1472 AttributeListEntry *attr, *nextAttr;
1473
1474 int32_t len;
1475 int32_t bufIdx = 0;
1476
1477 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1478 int32_t attrBufIdx = 0;
1479
1480 /* Reset the posixVariant value */
1481 *posixVariant = FALSE;
1482
1483 pTag = ldmlext;
1484 pKwds = NULL;
1485
1486 /* Iterate through u extension attributes */
1487 while (*pTag) {
1488 /* locate next separator char */
1489 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1490
1491 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1492 pKwds = pTag;
1493 break;
1494 }
1495
1496 /* add this attribute to the list */
1497 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1498 if (attr == NULL) {
1499 *status = U_MEMORY_ALLOCATION_ERROR;
1500 goto cleanup;
1501 }
1502
1503 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1504 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1505 attrBuf[attrBufIdx + len] = 0;
1506 attr->attribute = &attrBuf[attrBufIdx];
1507 attrBufIdx += (len + 1);
1508 } else {
1509 *status = U_ILLEGAL_ARGUMENT_ERROR;
1510 uprv_free(attr);
1511 goto cleanup;
1512 }
1513
1514 if (!_addAttributeToList(&attrFirst, attr)) {
1515 *status = U_ILLEGAL_ARGUMENT_ERROR;
1516 uprv_free(attr);
1517 goto cleanup;
1518 }
1519
1520 /* next tag */
1521 pTag += len;
1522 if (*pTag) {
1523 /* next to the separator */
1524 pTag++;
1525 }
1526 }
1527
1528 if (attrFirst) {
1529 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1530
1531 if (attrBufIdx > bufSize) {
1532 /* attrBufIdx == <total length of attribute subtag> + 1 */
1533 *status = U_ILLEGAL_ARGUMENT_ERROR;
1534 goto cleanup;
1535 }
1536
1537 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1538 if (kwd == NULL) {
1539 *status = U_MEMORY_ALLOCATION_ERROR;
1540 goto cleanup;
1541 }
1542
1543 kwd->key = LOCALE_ATTRIBUTE_KEY;
1544 kwd->value = buf;
1545
1546 /* attribute subtags sorted in alphabetical order as type */
1547 attr = attrFirst;
1548 while (attr != NULL) {
1549 nextAttr = attr->next;
1550
1551 /* buffer size check is done above */
1552 if (attr != attrFirst) {
1553 *(buf + bufIdx) = SEP;
1554 bufIdx++;
1555 }
1556
1557 len = static_cast<int32_t>(uprv_strlen(attr->attribute));
1558 uprv_memcpy(buf + bufIdx, attr->attribute, len);
1559 bufIdx += len;
1560
1561 attr = nextAttr;
1562 }
1563 *(buf + bufIdx) = 0;
1564 bufIdx++;
1565
1566 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1567 *status = U_ILLEGAL_ARGUMENT_ERROR;
1568 uprv_free(kwd);
1569 goto cleanup;
1570 }
1571
1572 /* once keyword entry is created, delete the attribute list */
1573 attr = attrFirst;
1574 while (attr != NULL) {
1575 nextAttr = attr->next;
1576 uprv_free(attr);
1577 attr = nextAttr;
1578 }
1579 attrFirst = NULL;
1580 }
1581
1582 if (pKwds) {
1583 const char *pBcpKey = NULL; /* u extenstion key subtag */
1584 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
1585 int32_t bcpKeyLen = 0;
1586 int32_t bcpTypeLen = 0;
1587 UBool isDone = FALSE;
1588
1589 pTag = pKwds;
1590 /* BCP47 representation of LDML key/type pairs */
1591 while (!isDone) {
1592 const char *pNextBcpKey = NULL;
1593 int32_t nextBcpKeyLen = 0;
1594 UBool emitKeyword = FALSE;
1595
1596 if (*pTag) {
1597 /* locate next separator char */
1598 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1599
1600 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1601 if (pBcpKey) {
1602 emitKeyword = TRUE;
1603 pNextBcpKey = pTag;
1604 nextBcpKeyLen = len;
1605 } else {
1606 pBcpKey = pTag;
1607 bcpKeyLen = len;
1608 }
1609 } else {
1610 U_ASSERT(pBcpKey != NULL);
1611 /* within LDML type subtags */
1612 if (pBcpType) {
1613 bcpTypeLen += (len + 1);
1614 } else {
1615 pBcpType = pTag;
1616 bcpTypeLen = len;
1617 }
1618 }
1619
1620 /* next tag */
1621 pTag += len;
1622 if (*pTag) {
1623 /* next to the separator */
1624 pTag++;
1625 }
1626 } else {
1627 /* processing last one */
1628 emitKeyword = TRUE;
1629 isDone = TRUE;
1630 }
1631
1632 if (emitKeyword) {
1633 const char *pKey = NULL; /* LDML key */
1634 const char *pType = NULL; /* LDML type */
1635
1636 char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
1637
1638 U_ASSERT(pBcpKey != NULL);
1639
1640 if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
1641 /* the BCP key is invalid */
1642 *status = U_ILLEGAL_ARGUMENT_ERROR;
1643 goto cleanup;
1644 }
1645
1646 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1647 bcpKeyBuf[bcpKeyLen] = 0;
1648
1649 /* u extension key to LDML key */
1650 pKey = uloc_toLegacyKey(bcpKeyBuf);
1651 if (pKey == NULL) {
1652 *status = U_ILLEGAL_ARGUMENT_ERROR;
1653 goto cleanup;
1654 }
1655 if (pKey == bcpKeyBuf) {
1656 /*
1657 The key returned by toLegacyKey points to the input buffer.
1658 We normalize the result key to lower case.
1659 */
1660 T_CString_toLowerCase(bcpKeyBuf);
1661 if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1662 uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1663 pKey = buf + bufIdx;
1664 bufIdx += bcpKeyLen;
1665 *(buf + bufIdx) = 0;
1666 bufIdx++;
1667 } else {
1668 *status = U_BUFFER_OVERFLOW_ERROR;
1669 goto cleanup;
1670 }
1671 }
1672
1673 if (pBcpType) {
1674 char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
1675 if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
1676 /* the BCP type is too long */
1677 *status = U_ILLEGAL_ARGUMENT_ERROR;
1678 goto cleanup;
1679 }
1680
1681 uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1682 bcpTypeBuf[bcpTypeLen] = 0;
1683
1684 /* BCP type to locale type */
1685 pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1686 if (pType == NULL) {
1687 *status = U_ILLEGAL_ARGUMENT_ERROR;
1688 goto cleanup;
1689 }
1690 if (pType == bcpTypeBuf) {
1691 /*
1692 The type returned by toLegacyType points to the input buffer.
1693 We normalize the result type to lower case.
1694 */
1695 /* normalize to lower case */
1696 T_CString_toLowerCase(bcpTypeBuf);
1697 if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1698 uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1699 pType = buf + bufIdx;
1700 bufIdx += bcpTypeLen;
1701 *(buf + bufIdx) = 0;
1702 bufIdx++;
1703 } else {
1704 *status = U_BUFFER_OVERFLOW_ERROR;
1705 goto cleanup;
1706 }
1707 }
1708 } else {
1709 /* typeless - default type value is "yes" */
1710 pType = LOCALE_TYPE_YES;
1711 }
1712
1713 /* Special handling for u-va-posix, since we want to treat this as a variant,
1714 not as a keyword */
1715 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1716 *posixVariant = TRUE;
1717 } else {
1718 /* create an ExtensionListEntry for this keyword */
1719 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1720 if (kwd == NULL) {
1721 *status = U_MEMORY_ALLOCATION_ERROR;
1722 goto cleanup;
1723 }
1724
1725 kwd->key = pKey;
1726 kwd->value = pType;
1727
1728 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1729 // duplicate keyword is allowed, Only the first
1730 // is honored.
1731 uprv_free(kwd);
1732 }
1733 }
1734
1735 pBcpKey = pNextBcpKey;
1736 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1737 pBcpType = NULL;
1738 bcpTypeLen = 0;
1739 }
1740 }
1741 }
1742
1743 kwd = kwdFirst;
1744 while (kwd != NULL) {
1745 nextKwd = kwd->next;
1746 _addExtensionToList(appendTo, kwd, FALSE);
1747 kwd = nextKwd;
1748 }
1749
1750 return;
1751
1752 cleanup:
1753 attr = attrFirst;
1754 while (attr != NULL) {
1755 nextAttr = attr->next;
1756 uprv_free(attr);
1757 attr = nextAttr;
1758 }
1759
1760 kwd = kwdFirst;
1761 while (kwd != NULL) {
1762 nextKwd = kwd->next;
1763 uprv_free(kwd);
1764 kwd = nextKwd;
1765 }
1766 }
1767
1768
1769 static int32_t
_appendKeywords(ULanguageTag * langtag,char * appendAt,int32_t capacity,UErrorCode * status)1770 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1771 int32_t reslen = 0;
1772 int32_t i, n;
1773 int32_t len;
1774 ExtensionListEntry *kwdFirst = NULL;
1775 ExtensionListEntry *kwd;
1776 const char *key, *type;
1777 char *kwdBuf = NULL;
1778 int32_t kwdBufLength = capacity;
1779 UBool posixVariant = FALSE;
1780
1781 if (U_FAILURE(*status)) {
1782 return 0;
1783 }
1784
1785 kwdBuf = (char*)uprv_malloc(kwdBufLength);
1786 if (kwdBuf == NULL) {
1787 *status = U_MEMORY_ALLOCATION_ERROR;
1788 return 0;
1789 }
1790
1791 /* Determine if variants already exists */
1792 if (ultag_getVariantsSize(langtag)) {
1793 posixVariant = TRUE;
1794 }
1795
1796 n = ultag_getExtensionsSize(langtag);
1797
1798 /* resolve locale keywords and reordering keys */
1799 for (i = 0; i < n; i++) {
1800 key = ultag_getExtensionKey(langtag, i);
1801 type = ultag_getExtensionValue(langtag, i);
1802 if (*key == LDMLEXT) {
1803 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1804 if (U_FAILURE(*status)) {
1805 break;
1806 }
1807 } else {
1808 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1809 if (kwd == NULL) {
1810 *status = U_MEMORY_ALLOCATION_ERROR;
1811 break;
1812 }
1813 kwd->key = key;
1814 kwd->value = type;
1815 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1816 uprv_free(kwd);
1817 *status = U_ILLEGAL_ARGUMENT_ERROR;
1818 break;
1819 }
1820 }
1821 }
1822
1823 if (U_SUCCESS(*status)) {
1824 type = ultag_getPrivateUse(langtag);
1825 if ((int32_t)uprv_strlen(type) > 0) {
1826 /* add private use as a keyword */
1827 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1828 if (kwd == NULL) {
1829 *status = U_MEMORY_ALLOCATION_ERROR;
1830 } else {
1831 kwd->key = PRIVATEUSE_KEY;
1832 kwd->value = type;
1833 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1834 uprv_free(kwd);
1835 *status = U_ILLEGAL_ARGUMENT_ERROR;
1836 }
1837 }
1838 }
1839 }
1840
1841 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1842
1843 if (U_SUCCESS(*status) && posixVariant) {
1844 len = (int32_t) uprv_strlen(_POSIX);
1845 if (reslen < capacity) {
1846 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1847 }
1848 reslen += len;
1849 }
1850
1851 if (U_SUCCESS(*status) && kwdFirst != NULL) {
1852 /* write out the sorted keywords */
1853 UBool firstValue = TRUE;
1854 kwd = kwdFirst;
1855 do {
1856 if (reslen < capacity) {
1857 if (firstValue) {
1858 /* '@' */
1859 *(appendAt + reslen) = LOCALE_EXT_SEP;
1860 firstValue = FALSE;
1861 } else {
1862 /* ';' */
1863 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1864 }
1865 }
1866 reslen++;
1867
1868 /* key */
1869 len = (int32_t)uprv_strlen(kwd->key);
1870 if (reslen < capacity) {
1871 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1872 }
1873 reslen += len;
1874
1875 /* '=' */
1876 if (reslen < capacity) {
1877 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1878 }
1879 reslen++;
1880
1881 /* type */
1882 len = (int32_t)uprv_strlen(kwd->value);
1883 if (reslen < capacity) {
1884 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1885 }
1886 reslen += len;
1887
1888 kwd = kwd->next;
1889 } while (kwd);
1890 }
1891
1892 /* clean up */
1893 kwd = kwdFirst;
1894 while (kwd != NULL) {
1895 ExtensionListEntry *tmpKwd = kwd->next;
1896 uprv_free(kwd);
1897 kwd = tmpKwd;
1898 }
1899
1900 uprv_free(kwdBuf);
1901
1902 if (U_FAILURE(*status)) {
1903 return 0;
1904 }
1905
1906 return u_terminateChars(appendAt, capacity, reslen, status);
1907 }
1908
1909 static int32_t
_appendPrivateuseToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1910 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1911 (void)hadPosix;
1912 char buf[ULOC_FULLNAME_CAPACITY];
1913 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1914 UErrorCode tmpStatus = U_ZERO_ERROR;
1915 int32_t len, i;
1916 int32_t reslen = 0;
1917
1918 if (U_FAILURE(*status)) {
1919 return 0;
1920 }
1921
1922 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1923 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1924 if (strict) {
1925 *status = U_ILLEGAL_ARGUMENT_ERROR;
1926 }
1927 return 0;
1928 }
1929
1930 if (len > 0) {
1931 char *p, *pPriv;
1932 UBool bNext = TRUE;
1933 UBool firstValue = TRUE;
1934 UBool writeValue;
1935
1936 pPriv = NULL;
1937 p = buf;
1938 while (bNext) {
1939 writeValue = FALSE;
1940 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1941 if (*p == 0) {
1942 bNext = FALSE;
1943 } else {
1944 *p = 0; /* terminate */
1945 }
1946 if (pPriv != NULL) {
1947 /* Private use in the canonical format is lowercase in BCP47 */
1948 for (i = 0; *(pPriv + i) != 0; i++) {
1949 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1950 }
1951
1952 /* validate */
1953 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1954 if (firstValue) {
1955 if (!_isVariantSubtag(pPriv, -1)) {
1956 writeValue = TRUE;
1957 }
1958 } else {
1959 writeValue = TRUE;
1960 }
1961 } else if (strict) {
1962 *status = U_ILLEGAL_ARGUMENT_ERROR;
1963 break;
1964 } else {
1965 break;
1966 }
1967
1968 if (writeValue) {
1969 if (reslen < capacity) {
1970 tmpAppend[reslen++] = SEP;
1971 }
1972
1973 if (firstValue) {
1974 if (reslen < capacity) {
1975 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1976 }
1977
1978 if (reslen < capacity) {
1979 tmpAppend[reslen++] = SEP;
1980 }
1981
1982 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1983 if (reslen < capacity) {
1984 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1985 }
1986 reslen += len;
1987
1988 if (reslen < capacity) {
1989 tmpAppend[reslen++] = SEP;
1990 }
1991
1992 firstValue = FALSE;
1993 }
1994
1995 len = (int32_t)uprv_strlen(pPriv);
1996 if (reslen < capacity) {
1997 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1998 }
1999 reslen += len;
2000 }
2001 }
2002 /* reset private use starting position */
2003 pPriv = NULL;
2004 } else if (pPriv == NULL) {
2005 pPriv = p;
2006 }
2007 p++;
2008 }
2009
2010 if (U_FAILURE(*status)) {
2011 return 0;
2012 }
2013 }
2014
2015 if (U_SUCCESS(*status)) {
2016 len = reslen;
2017 if (reslen < capacity) {
2018 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
2019 }
2020 }
2021
2022 u_terminateChars(appendAt, capacity, reslen, status);
2023
2024 return reslen;
2025 }
2026
2027 /*
2028 * -------------------------------------------------
2029 *
2030 * ultag_ functions
2031 *
2032 * -------------------------------------------------
2033 */
2034
2035 /* Bit flags used by the parser */
2036 #define LANG 0x0001
2037 #define EXTL 0x0002
2038 #define SCRT 0x0004
2039 #define REGN 0x0008
2040 #define VART 0x0010
2041 #define EXTS 0x0020
2042 #define EXTV 0x0040
2043 #define PRIV 0x0080
2044
2045 /**
2046 * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
2047 * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
2048 * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
2049 */
2050 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2051 #pragma optimize( "", off )
2052 #endif
2053
2054 static ULanguageTag*
ultag_parse(const char * tag,int32_t tagLen,int32_t * parsedLen,UErrorCode * status)2055 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
2056 ULanguageTag *t;
2057 char *tagBuf;
2058 int16_t next;
2059 char *pSubtag, *pNext, *pLastGoodPosition;
2060 int32_t subtagLen;
2061 int32_t extlangIdx;
2062 ExtensionListEntry *pExtension;
2063 char *pExtValueSubtag, *pExtValueSubtagEnd;
2064 int32_t i;
2065 UBool privateuseVar = FALSE;
2066 int32_t grandfatheredLen = 0;
2067
2068 if (parsedLen != NULL) {
2069 *parsedLen = 0;
2070 }
2071
2072 if (U_FAILURE(*status)) {
2073 return NULL;
2074 }
2075
2076 if (tagLen < 0) {
2077 tagLen = (int32_t)uprv_strlen(tag);
2078 }
2079
2080 /* copy the entire string */
2081 tagBuf = (char*)uprv_malloc(tagLen + 1);
2082 if (tagBuf == NULL) {
2083 *status = U_MEMORY_ALLOCATION_ERROR;
2084 return NULL;
2085 }
2086 uprv_memcpy(tagBuf, tag, tagLen);
2087 *(tagBuf + tagLen) = 0;
2088
2089 /* create a ULanguageTag */
2090 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
2091 if (t == NULL) {
2092 uprv_free(tagBuf);
2093 *status = U_MEMORY_ALLOCATION_ERROR;
2094 return NULL;
2095 }
2096 _initializeULanguageTag(t);
2097 t->buf = tagBuf;
2098
2099 if (tagLen < MINLEN) {
2100 /* the input tag is too short - return empty ULanguageTag */
2101 return t;
2102 }
2103
2104 /* check if the tag is grandfathered */
2105 for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
2106 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
2107 int32_t newTagLength;
2108
2109 grandfatheredLen = tagLen; /* back up for output parsedLen */
2110 newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
2111 if (tagLen < newTagLength) {
2112 uprv_free(tagBuf);
2113 tagBuf = (char*)uprv_malloc(newTagLength + 1);
2114 if (tagBuf == NULL) {
2115 *status = U_MEMORY_ALLOCATION_ERROR;
2116 ultag_close(t);
2117 return NULL;
2118 }
2119 t->buf = tagBuf;
2120 tagLen = newTagLength;
2121 }
2122 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
2123 break;
2124 }
2125 }
2126
2127 size_t parsedLenDelta = 0;
2128 if (grandfatheredLen == 0) {
2129 for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
2130 const char* redundantTag = REDUNDANT[i];
2131 size_t redundantTagLen = uprv_strlen(redundantTag);
2132 // The preferred tag for a redundant tag is always shorter than redundant
2133 // tag. A redundant tag may or may not be followed by other subtags.
2134 // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
2135 if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
2136 const char* redundantTagEnd = tagBuf + redundantTagLen;
2137 if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
2138 const char* preferredTag = REDUNDANT[i + 1];
2139 size_t preferredTagLen = uprv_strlen(preferredTag);
2140 uprv_strncpy(t->buf, preferredTag, preferredTagLen);
2141 if (*redundantTagEnd == SEP) {
2142 uprv_memmove(tagBuf + preferredTagLen,
2143 redundantTagEnd,
2144 tagLen - redundantTagLen + 1);
2145 } else {
2146 tagBuf[preferredTagLen] = '\0';
2147 }
2148 // parsedLen should be the length of the input
2149 // before redundantTag is replaced by preferredTag.
2150 // Save the delta to add it back later.
2151 parsedLenDelta = redundantTagLen - preferredTagLen;
2152 break;
2153 }
2154 }
2155 }
2156 }
2157
2158 /*
2159 * langtag = language
2160 * ["-" script]
2161 * ["-" region]
2162 * *("-" variant)
2163 * *("-" extension)
2164 * ["-" privateuse]
2165 */
2166
2167 next = LANG | PRIV;
2168 pNext = pLastGoodPosition = tagBuf;
2169 extlangIdx = 0;
2170 pExtension = NULL;
2171 pExtValueSubtag = NULL;
2172 pExtValueSubtagEnd = NULL;
2173
2174 while (pNext) {
2175 char *pSep;
2176
2177 pSubtag = pNext;
2178
2179 /* locate next separator char */
2180 pSep = pSubtag;
2181 while (*pSep) {
2182 if (*pSep == SEP) {
2183 break;
2184 }
2185 pSep++;
2186 }
2187 if (*pSep == 0) {
2188 /* last subtag */
2189 pNext = NULL;
2190 } else {
2191 pNext = pSep + 1;
2192 }
2193 subtagLen = (int32_t)(pSep - pSubtag);
2194
2195 if (next & LANG) {
2196 if (_isLanguageSubtag(pSubtag, subtagLen)) {
2197 *pSep = 0; /* terminate */
2198 // TODO: move deprecated language code handling here.
2199 t->language = T_CString_toLowerCase(pSubtag);
2200
2201 pLastGoodPosition = pSep;
2202 next = SCRT | REGN | VART | EXTS | PRIV;
2203 if (subtagLen <= 3)
2204 next |= EXTL;
2205 continue;
2206 }
2207 }
2208 if (next & EXTL) {
2209 if (_isExtlangSubtag(pSubtag, subtagLen)) {
2210 *pSep = 0;
2211 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
2212
2213 pLastGoodPosition = pSep;
2214 if (extlangIdx < 3) {
2215 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2216 } else {
2217 next = SCRT | REGN | VART | EXTS | PRIV;
2218 }
2219 continue;
2220 }
2221 }
2222 if (next & SCRT) {
2223 if (_isScriptSubtag(pSubtag, subtagLen)) {
2224 char *p = pSubtag;
2225
2226 *pSep = 0;
2227
2228 /* to title case */
2229 *p = uprv_toupper(*p);
2230 p++;
2231 for (; *p; p++) {
2232 *p = uprv_tolower(*p);
2233 }
2234
2235 t->script = pSubtag;
2236
2237 pLastGoodPosition = pSep;
2238 next = REGN | VART | EXTS | PRIV;
2239 continue;
2240 }
2241 }
2242 if (next & REGN) {
2243 if (_isRegionSubtag(pSubtag, subtagLen)) {
2244 *pSep = 0;
2245 // TODO: move deprecated region code handling here.
2246 t->region = T_CString_toUpperCase(pSubtag);
2247
2248 pLastGoodPosition = pSep;
2249 next = VART | EXTS | PRIV;
2250 continue;
2251 }
2252 }
2253 if (next & VART) {
2254 if (_isVariantSubtag(pSubtag, subtagLen) ||
2255 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
2256 VariantListEntry *var;
2257 UBool isAdded;
2258
2259 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
2260 if (var == NULL) {
2261 *status = U_MEMORY_ALLOCATION_ERROR;
2262 goto error;
2263 }
2264 *pSep = 0;
2265 var->variant = T_CString_toUpperCase(pSubtag);
2266 isAdded = _addVariantToList(&(t->variants), var);
2267 if (!isAdded) {
2268 /* duplicated variant entry */
2269 uprv_free(var);
2270 break;
2271 }
2272 pLastGoodPosition = pSep;
2273 next = VART | EXTS | PRIV;
2274 continue;
2275 }
2276 }
2277 if (next & EXTS) {
2278 if (_isExtensionSingleton(pSubtag, subtagLen)) {
2279 if (pExtension != NULL) {
2280 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2281 /* the previous extension is incomplete */
2282 uprv_free(pExtension);
2283 pExtension = NULL;
2284 break;
2285 }
2286
2287 /* terminate the previous extension value */
2288 *pExtValueSubtagEnd = 0;
2289 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2290
2291 /* insert the extension to the list */
2292 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2293 pLastGoodPosition = pExtValueSubtagEnd;
2294 } else {
2295 /* stop parsing here */
2296 uprv_free(pExtension);
2297 pExtension = NULL;
2298 break;
2299 }
2300 }
2301
2302 /* create a new extension */
2303 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
2304 if (pExtension == NULL) {
2305 *status = U_MEMORY_ALLOCATION_ERROR;
2306 goto error;
2307 }
2308 *pSep = 0;
2309 pExtension->key = T_CString_toLowerCase(pSubtag);
2310 pExtension->value = NULL; /* will be set later */
2311
2312 /*
2313 * reset the start and the end location of extension value
2314 * subtags for this extension
2315 */
2316 pExtValueSubtag = NULL;
2317 pExtValueSubtagEnd = NULL;
2318
2319 next = EXTV;
2320 continue;
2321 }
2322 }
2323 if (next & EXTV) {
2324 if (_isExtensionSubtag(pSubtag, subtagLen)) {
2325 if (pExtValueSubtag == NULL) {
2326 /* if the start postion of this extension's value is not yet,
2327 this one is the first value subtag */
2328 pExtValueSubtag = pSubtag;
2329 }
2330
2331 /* Mark the end of this subtag */
2332 pExtValueSubtagEnd = pSep;
2333 next = EXTS | EXTV | PRIV;
2334
2335 continue;
2336 }
2337 }
2338 if (next & PRIV) {
2339 if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
2340 char *pPrivuseVal;
2341
2342 if (pExtension != NULL) {
2343 /* Process the last extension */
2344 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2345 /* the previous extension is incomplete */
2346 uprv_free(pExtension);
2347 pExtension = NULL;
2348 break;
2349 } else {
2350 /* terminate the previous extension value */
2351 *pExtValueSubtagEnd = 0;
2352 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2353
2354 /* insert the extension to the list */
2355 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2356 pLastGoodPosition = pExtValueSubtagEnd;
2357 pExtension = NULL;
2358 } else {
2359 /* stop parsing here */
2360 uprv_free(pExtension);
2361 pExtension = NULL;
2362 break;
2363 }
2364 }
2365 }
2366
2367 /* The rest of part will be private use value subtags */
2368 if (pNext == NULL) {
2369 /* empty private use subtag */
2370 break;
2371 }
2372 /* back up the private use value start position */
2373 pPrivuseVal = pNext;
2374
2375 /* validate private use value subtags */
2376 while (pNext) {
2377 pSubtag = pNext;
2378 pSep = pSubtag;
2379 while (*pSep) {
2380 if (*pSep == SEP) {
2381 break;
2382 }
2383 pSep++;
2384 }
2385 if (*pSep == 0) {
2386 /* last subtag */
2387 pNext = NULL;
2388 } else {
2389 pNext = pSep + 1;
2390 }
2391 subtagLen = (int32_t)(pSep - pSubtag);
2392
2393 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2394 *pSep = 0;
2395 next = VART;
2396 privateuseVar = TRUE;
2397 break;
2398 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2399 pLastGoodPosition = pSep;
2400 } else {
2401 break;
2402 }
2403 }
2404
2405 if (next == VART) {
2406 continue;
2407 }
2408
2409 if (pLastGoodPosition - pPrivuseVal > 0) {
2410 *pLastGoodPosition = 0;
2411 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2412 }
2413 /* No more subtags, exiting the parse loop */
2414 break;
2415 }
2416 break;
2417 }
2418
2419 /* If we fell through here, it means this subtag is illegal - quit parsing */
2420 break;
2421 }
2422
2423 if (pExtension != NULL) {
2424 /* Process the last extension */
2425 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2426 /* the previous extension is incomplete */
2427 uprv_free(pExtension);
2428 } else {
2429 /* terminate the previous extension value */
2430 *pExtValueSubtagEnd = 0;
2431 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2432 /* insert the extension to the list */
2433 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2434 pLastGoodPosition = pExtValueSubtagEnd;
2435 } else {
2436 uprv_free(pExtension);
2437 }
2438 }
2439 }
2440
2441 if (parsedLen != NULL) {
2442 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
2443 (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
2444 }
2445
2446 return t;
2447
2448 error:
2449 ultag_close(t);
2450 return NULL;
2451 }
2452
2453 /**
2454 * Ticket #12705 - Turn optimization back on.
2455 */
2456 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2457 #pragma optimize( "", on )
2458 #endif
2459
2460 static void
ultag_close(ULanguageTag * langtag)2461 ultag_close(ULanguageTag* langtag) {
2462
2463 if (langtag == NULL) {
2464 return;
2465 }
2466
2467 uprv_free(langtag->buf);
2468
2469 if (langtag->variants) {
2470 VariantListEntry *curVar = langtag->variants;
2471 while (curVar) {
2472 VariantListEntry *nextVar = curVar->next;
2473 uprv_free(curVar);
2474 curVar = nextVar;
2475 }
2476 }
2477
2478 if (langtag->extensions) {
2479 ExtensionListEntry *curExt = langtag->extensions;
2480 while (curExt) {
2481 ExtensionListEntry *nextExt = curExt->next;
2482 uprv_free(curExt);
2483 curExt = nextExt;
2484 }
2485 }
2486
2487 uprv_free(langtag);
2488 }
2489
2490 static const char*
ultag_getLanguage(const ULanguageTag * langtag)2491 ultag_getLanguage(const ULanguageTag* langtag) {
2492 return langtag->language;
2493 }
2494
2495 #if 0
2496 static const char*
2497 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2498 int32_t i;
2499 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2500 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2501 return DEPRECATEDLANGS[i + 1];
2502 }
2503 }
2504 return langtag->language;
2505 }
2506 #endif
2507
2508 static const char*
ultag_getExtlang(const ULanguageTag * langtag,int32_t idx)2509 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2510 if (idx >= 0 && idx < MAXEXTLANG) {
2511 return langtag->extlang[idx];
2512 }
2513 return NULL;
2514 }
2515
2516 static int32_t
ultag_getExtlangSize(const ULanguageTag * langtag)2517 ultag_getExtlangSize(const ULanguageTag* langtag) {
2518 int32_t size = 0;
2519 int32_t i;
2520 for (i = 0; i < MAXEXTLANG; i++) {
2521 if (langtag->extlang[i]) {
2522 size++;
2523 }
2524 }
2525 return size;
2526 }
2527
2528 static const char*
ultag_getScript(const ULanguageTag * langtag)2529 ultag_getScript(const ULanguageTag* langtag) {
2530 return langtag->script;
2531 }
2532
2533 static const char*
ultag_getRegion(const ULanguageTag * langtag)2534 ultag_getRegion(const ULanguageTag* langtag) {
2535 return langtag->region;
2536 }
2537
2538 static const char*
ultag_getVariant(const ULanguageTag * langtag,int32_t idx)2539 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2540 const char *var = NULL;
2541 VariantListEntry *cur = langtag->variants;
2542 int32_t i = 0;
2543 while (cur) {
2544 if (i == idx) {
2545 var = cur->variant;
2546 break;
2547 }
2548 cur = cur->next;
2549 i++;
2550 }
2551 return var;
2552 }
2553
2554 static int32_t
ultag_getVariantsSize(const ULanguageTag * langtag)2555 ultag_getVariantsSize(const ULanguageTag* langtag) {
2556 int32_t size = 0;
2557 VariantListEntry *cur = langtag->variants;
2558 while (TRUE) {
2559 if (cur == NULL) {
2560 break;
2561 }
2562 size++;
2563 cur = cur->next;
2564 }
2565 return size;
2566 }
2567
2568 static const char*
ultag_getExtensionKey(const ULanguageTag * langtag,int32_t idx)2569 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2570 const char *key = NULL;
2571 ExtensionListEntry *cur = langtag->extensions;
2572 int32_t i = 0;
2573 while (cur) {
2574 if (i == idx) {
2575 key = cur->key;
2576 break;
2577 }
2578 cur = cur->next;
2579 i++;
2580 }
2581 return key;
2582 }
2583
2584 static const char*
ultag_getExtensionValue(const ULanguageTag * langtag,int32_t idx)2585 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2586 const char *val = NULL;
2587 ExtensionListEntry *cur = langtag->extensions;
2588 int32_t i = 0;
2589 while (cur) {
2590 if (i == idx) {
2591 val = cur->value;
2592 break;
2593 }
2594 cur = cur->next;
2595 i++;
2596 }
2597 return val;
2598 }
2599
2600 static int32_t
ultag_getExtensionsSize(const ULanguageTag * langtag)2601 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2602 int32_t size = 0;
2603 ExtensionListEntry *cur = langtag->extensions;
2604 while (TRUE) {
2605 if (cur == NULL) {
2606 break;
2607 }
2608 size++;
2609 cur = cur->next;
2610 }
2611 return size;
2612 }
2613
2614 static const char*
ultag_getPrivateUse(const ULanguageTag * langtag)2615 ultag_getPrivateUse(const ULanguageTag* langtag) {
2616 return langtag->privateuse;
2617 }
2618
2619 #if 0
2620 static const char*
2621 ultag_getGrandfathered(const ULanguageTag* langtag) {
2622 return langtag->grandfathered;
2623 }
2624 #endif
2625
2626
2627 /*
2628 * -------------------------------------------------
2629 *
2630 * Locale/BCP47 conversion APIs, exposed as uloc_*
2631 *
2632 * -------------------------------------------------
2633 */
2634 U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char * localeID,char * langtag,int32_t langtagCapacity,UBool strict,UErrorCode * status)2635 uloc_toLanguageTag(const char* localeID,
2636 char* langtag,
2637 int32_t langtagCapacity,
2638 UBool strict,
2639 UErrorCode* status) {
2640 icu::CharString canonical;
2641 int32_t reslen;
2642 UErrorCode tmpStatus = U_ZERO_ERROR;
2643 UBool hadPosix = FALSE;
2644 const char* pKeywordStart;
2645
2646 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2647 int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
2648 if (resultCapacity > 0) {
2649 char* buffer;
2650
2651 for (;;) {
2652 buffer = canonical.getAppendBuffer(
2653 /*minCapacity=*/resultCapacity,
2654 /*desiredCapacityHint=*/resultCapacity,
2655 resultCapacity,
2656 tmpStatus);
2657
2658 if (U_FAILURE(tmpStatus)) {
2659 *status = tmpStatus;
2660 return 0;
2661 }
2662
2663 reslen =
2664 uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
2665
2666 if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
2667 break;
2668 }
2669
2670 resultCapacity = reslen;
2671 tmpStatus = U_ZERO_ERROR;
2672 }
2673
2674 if (U_FAILURE(tmpStatus)) {
2675 *status = U_ILLEGAL_ARGUMENT_ERROR;
2676 return 0;
2677 }
2678
2679 canonical.append(buffer, reslen, tmpStatus);
2680 if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
2681 tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
2682 }
2683
2684 if (U_FAILURE(tmpStatus)) {
2685 *status = tmpStatus;
2686 return 0;
2687 }
2688 }
2689
2690 reslen = 0;
2691
2692 /* For handling special case - private use only tag */
2693 pKeywordStart = locale_getKeywordsStart(canonical.data());
2694 if (pKeywordStart == canonical.data()) {
2695 UEnumeration *kwdEnum;
2696 int kwdCnt = 0;
2697 UBool done = FALSE;
2698
2699 kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
2700 if (kwdEnum != NULL) {
2701 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2702 if (kwdCnt == 1) {
2703 const char *key;
2704 int32_t len = 0;
2705
2706 key = uenum_next(kwdEnum, &len, &tmpStatus);
2707 if (len == 1 && *key == PRIVATEUSE) {
2708 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2709 buf[0] = PRIVATEUSE;
2710 buf[1] = SEP;
2711 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2712 if (U_SUCCESS(tmpStatus)) {
2713 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2714 /* return private use only tag */
2715 reslen = len + 2;
2716 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2717 u_terminateChars(langtag, langtagCapacity, reslen, status);
2718 done = TRUE;
2719 } else if (strict) {
2720 *status = U_ILLEGAL_ARGUMENT_ERROR;
2721 done = TRUE;
2722 }
2723 /* if not strict mode, then "und" will be returned */
2724 } else {
2725 *status = U_ILLEGAL_ARGUMENT_ERROR;
2726 done = TRUE;
2727 }
2728 }
2729 }
2730 uenum_close(kwdEnum);
2731 if (done) {
2732 return reslen;
2733 }
2734 }
2735 }
2736
2737 reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
2738 reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
2739 reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
2740 reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2741 reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2742 reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2743
2744 return reslen;
2745 }
2746
2747
2748 U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char * langtag,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2749 uloc_forLanguageTag(const char* langtag,
2750 char* localeID,
2751 int32_t localeIDCapacity,
2752 int32_t* parsedLength,
2753 UErrorCode* status) {
2754 return ulocimp_forLanguageTag(
2755 langtag,
2756 -1,
2757 localeID,
2758 localeIDCapacity,
2759 parsedLength,
2760 status);
2761 }
2762
2763
2764 U_CAPI int32_t U_EXPORT2
ulocimp_forLanguageTag(const char * langtag,int32_t tagLen,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2765 ulocimp_forLanguageTag(const char* langtag,
2766 int32_t tagLen,
2767 char* localeID,
2768 int32_t localeIDCapacity,
2769 int32_t* parsedLength,
2770 UErrorCode* status) {
2771 ULanguageTag *lt;
2772 int32_t reslen = 0;
2773 const char *subtag, *p;
2774 int32_t len;
2775 int32_t i, n;
2776 UBool noRegion = TRUE;
2777
2778 lt = ultag_parse(langtag, tagLen, parsedLength, status);
2779 if (U_FAILURE(*status)) {
2780 return 0;
2781 }
2782
2783 /* language */
2784 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2785 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2786 len = (int32_t)uprv_strlen(subtag);
2787 if (len > 0) {
2788 if (reslen < localeIDCapacity) {
2789 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2790 }
2791 reslen += len;
2792 }
2793 }
2794
2795 /* script */
2796 subtag = ultag_getScript(lt);
2797 len = (int32_t)uprv_strlen(subtag);
2798 if (len > 0) {
2799 if (reslen < localeIDCapacity) {
2800 *(localeID + reslen) = LOCALE_SEP;
2801 }
2802 reslen++;
2803
2804 /* write out the script in title case */
2805 p = subtag;
2806 while (*p) {
2807 if (reslen < localeIDCapacity) {
2808 if (p == subtag) {
2809 *(localeID + reslen) = uprv_toupper(*p);
2810 } else {
2811 *(localeID + reslen) = *p;
2812 }
2813 }
2814 reslen++;
2815 p++;
2816 }
2817 }
2818
2819 /* region */
2820 subtag = ultag_getRegion(lt);
2821 len = (int32_t)uprv_strlen(subtag);
2822 if (len > 0) {
2823 if (reslen < localeIDCapacity) {
2824 *(localeID + reslen) = LOCALE_SEP;
2825 }
2826 reslen++;
2827 /* write out the retion in upper case */
2828 p = subtag;
2829 while (*p) {
2830 if (reslen < localeIDCapacity) {
2831 *(localeID + reslen) = uprv_toupper(*p);
2832 }
2833 reslen++;
2834 p++;
2835 }
2836 noRegion = FALSE;
2837 }
2838
2839 /* variants */
2840 n = ultag_getVariantsSize(lt);
2841 if (n > 0) {
2842 if (noRegion) {
2843 if (reslen < localeIDCapacity) {
2844 *(localeID + reslen) = LOCALE_SEP;
2845 }
2846 reslen++;
2847 }
2848
2849 for (i = 0; i < n; i++) {
2850 subtag = ultag_getVariant(lt, i);
2851 if (reslen < localeIDCapacity) {
2852 *(localeID + reslen) = LOCALE_SEP;
2853 }
2854 reslen++;
2855 /* write out the variant in upper case */
2856 p = subtag;
2857 while (*p) {
2858 if (reslen < localeIDCapacity) {
2859 *(localeID + reslen) = uprv_toupper(*p);
2860 }
2861 reslen++;
2862 p++;
2863 }
2864 }
2865 }
2866
2867 /* keywords */
2868 n = ultag_getExtensionsSize(lt);
2869 subtag = ultag_getPrivateUse(lt);
2870 if (n > 0 || uprv_strlen(subtag) > 0) {
2871 if (reslen == 0 && n > 0) {
2872 /* need a language */
2873 if (reslen < localeIDCapacity) {
2874 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2875 }
2876 reslen += LANG_UND_LEN;
2877 }
2878 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2879 reslen += len;
2880 }
2881
2882 ultag_close(lt);
2883 return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2884 }
2885
2886
2887