• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2009-2015, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 
10 #include "unicode/utypes.h"
11 #include "unicode/ures.h"
12 #include "unicode/putil.h"
13 #include "unicode/uloc.h"
14 #include "ustr_imp.h"
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "putilimp.h"
19 #include "uinvchar.h"
20 #include "ulocimp.h"
21 #include "uvector.h"
22 #include "uassert.h"
23 
24 
25 /* struct holding a single variant */
26 typedef struct VariantListEntry {
27     const char              *variant;
28     struct VariantListEntry *next;
29 } VariantListEntry;
30 
31 /* struct holding a single attribute value */
32 typedef struct AttributeListEntry {
33     const char              *attribute;
34     struct AttributeListEntry *next;
35 } AttributeListEntry;
36 
37 /* struct holding a single extension */
38 typedef struct ExtensionListEntry {
39     const char                  *key;
40     const char                  *value;
41     struct ExtensionListEntry   *next;
42 } ExtensionListEntry;
43 
44 #define MAXEXTLANG 3
45 typedef struct ULanguageTag {
46     char                *buf;   /* holding parsed subtags */
47     const char          *language;
48     const char          *extlang[MAXEXTLANG];
49     const char          *script;
50     const char          *region;
51     VariantListEntry    *variants;
52     ExtensionListEntry  *extensions;
53     const char          *privateuse;
54     const char          *grandfathered;
55 } ULanguageTag;
56 
57 #define MINLEN 2
58 #define SEP '-'
59 #define PRIVATEUSE 'x'
60 #define LDMLEXT 'u'
61 
62 #define LOCALE_SEP '_'
63 #define LOCALE_EXT_SEP '@'
64 #define LOCALE_KEYWORD_SEP ';'
65 #define LOCALE_KEY_TYPE_SEP '='
66 
67 #define ISALPHA(c) uprv_isASCIILetter(c)
68 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
69 
70 static const char EMPTY[] = "";
71 static const char LANG_UND[] = "und";
72 static const char PRIVATEUSE_KEY[] = "x";
73 static const char _POSIX[] = "_POSIX";
74 static const char POSIX_KEY[] = "va";
75 static const char POSIX_VALUE[] = "posix";
76 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
77 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
78 static const char LOCALE_TYPE_YES[] = "yes";
79 
80 #define LANG_UND_LEN 3
81 
82 /*
83  Updated on 2018-09-12 from
84  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
85 
86  This table has 2 parts. The parts for Grandfathered tags is generated by the
87  following scripts from the IANA language tag registry.
88 
89  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
90  egrep -A 7 'Type: grandfathered' | \
91  egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
92  awk -n '/Tag/ {printf("    \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
93  tr 'A-Z' 'a-z'
94 
95 
96  The 2nd part is made of five ICU-specific entries. They're kept for
97  the backward compatibility for now, even though there are no preferred
98  values. They may have to be removed for the strict BCP 47 compliance.
99 
100 */
101 static const char* const GRANDFATHERED[] = {
102 /*  grandfathered   preferred */
103     "art-lojban",   "jbo",
104     "en-gb-oed",    "en-gb-oxendict",
105     "i-ami",        "ami",
106     "i-bnn",        "bnn",
107     "i-hak",        "hak",
108     "i-klingon",    "tlh",
109     "i-lux",        "lb",
110     "i-navajo",     "nv",
111     "i-pwn",        "pwn",
112     "i-tao",        "tao",
113     "i-tay",        "tay",
114     "i-tsu",        "tsu",
115     "no-bok",       "nb",
116     "no-nyn",       "nn",
117     "sgn-be-fr",    "sfb",
118     "sgn-be-nl",    "vgt",
119     "sgn-ch-de",    "sgg",
120     "zh-guoyu",     "cmn",
121     "zh-hakka",     "hak",
122     "zh-min-nan",   "nan",
123     "zh-xiang",     "hsn",
124 
125     // Grandfathered tags with no preferred value in the IANA
126     // registry. Kept for now for the backward compatibility
127     // because ICU has mapped them this way.
128     "cel-gaulish",  "xtg-x-cel-gaulish",
129     "i-default",    "en-x-i-default",
130     "i-enochian",   "und-x-i-enochian",
131     "i-mingo",      "see-x-i-mingo",
132     "zh-min",       "nan-x-zh-min",
133 };
134 
135 /*
136  Updated on 2018-09-12 from
137  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
138 
139  The table lists redundant tags with preferred value in the IANA languate tag registry.
140  It's generated with the following command:
141 
142  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
143  grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
144  awk -n '/Tag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
145  tr 'A-Z' 'a-z'
146 
147  In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
148  a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
149 */
150 
151 static const char* const REDUNDANT[] = {
152 //  redundant       preferred
153     "sgn-br",       "bzs",
154     "sgn-co",       "csn",
155     "sgn-de",       "gsg",
156     "sgn-dk",       "dsl",
157     "sgn-es",       "ssp",
158     "sgn-fr",       "fsl",
159     "sgn-gb",       "bfi",
160     "sgn-gr",       "gss",
161     "sgn-ie",       "isg",
162     "sgn-it",       "ise",
163     "sgn-jp",       "jsl",
164     "sgn-mx",       "mfs",
165     "sgn-ni",       "ncs",
166     "sgn-nl",       "dse",
167     "sgn-no",       "nsl",
168     "sgn-pt",       "psr",
169     "sgn-se",       "swl",
170     "sgn-us",       "ase",
171     "sgn-za",       "sfs",
172     "zh-cmn",       "cmn",
173     "zh-cmn-hans",  "cmn-hans",
174     "zh-cmn-hant",  "cmn-hant",
175     "zh-gan",       "gan",
176     "zh-wuu",       "wuu",
177     "zh-yue",       "yue",
178 
179     // variant tag with preferred value
180     "ja-latn-hepburn-heploc", "ja-latn-alalc97",
181 };
182 
183 /*
184   Updated on 2018-09-12 from
185   https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
186 
187   grep 'Type: language' -A 7 language-subtag-registry  | egrep 'Subtag|Prefe' | \
188   grep -B1 'Preferred' | grep -v '^--' | \
189   awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
190 
191   Make sure that 2-letter language subtags come before 3-letter subtags.
192 */
193 static const char DEPRECATEDLANGS[][4] = {
194 /*  deprecated  new */
195     "in",       "id",
196     "iw",       "he",
197     "ji",       "yi",
198     "jw",       "jv",
199     "mo",       "ro",
200     "aam",       "aas",
201     "adp",       "dz",
202     "aue",       "ktz",
203     "ayx",       "nun",
204     "bgm",       "bcg",
205     "bjd",       "drl",
206     "ccq",       "rki",
207     "cjr",       "mom",
208     "cka",       "cmr",
209     "cmk",       "xch",
210     "coy",       "pij",
211     "cqu",       "quh",
212     "drh",       "khk",
213     "drw",       "prs",
214     "gav",       "dev",
215     "gfx",       "vaj",
216     "ggn",       "gvr",
217     "gti",       "nyc",
218     "guv",       "duz",
219     "hrr",       "jal",
220     "ibi",       "opa",
221     "ilw",       "gal",
222     "jeg",       "oyb",
223     "kgc",       "tdf",
224     "kgh",       "kml",
225     "koj",       "kwv",
226     "krm",       "bmf",
227     "ktr",       "dtp",
228     "kvs",       "gdj",
229     "kwq",       "yam",
230     "kxe",       "tvd",
231     "kzj",       "dtp",
232     "kzt",       "dtp",
233     "lii",       "raq",
234     "lmm",       "rmx",
235     "meg",       "cir",
236     "mst",       "mry",
237     "mwj",       "vaj",
238     "myt",       "mry",
239     "nad",       "xny",
240     "ncp",       "kdz",
241     "nnx",       "ngv",
242     "nts",       "pij",
243     "oun",       "vaj",
244     "pcr",       "adx",
245     "pmc",       "huw",
246     "pmu",       "phr",
247     "ppa",       "bfy",
248     "ppr",       "lcq",
249     "pry",       "prt",
250     "puz",       "pub",
251     "sca",       "hle",
252     "skk",       "oyb",
253     "tdu",       "dtp",
254     "thc",       "tpo",
255     "thx",       "oyb",
256     "tie",       "ras",
257     "tkk",       "twm",
258     "tlw",       "weo",
259     "tmp",       "tyj",
260     "tne",       "kak",
261     "tnf",       "prs",
262     "tsf",       "taj",
263     "uok",       "ema",
264     "xba",       "cax",
265     "xia",       "acn",
266     "xkh",       "waw",
267     "xsj",       "suj",
268     "ybd",       "rki",
269     "yma",       "lrr",
270     "ymt",       "mtm",
271     "yos",       "zom",
272     "yuu",       "yug",
273 };
274 
275 /*
276   Updated on 2018-04-24 from
277 
278   curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
279   grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
280   grep -B1 'Preferred' | \
281   awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
282 */
283 static const char DEPRECATEDREGIONS[][3] = {
284 /*  deprecated  new */
285     "BU",       "MM",
286     "DD",       "DE",
287     "FX",       "FR",
288     "TP",       "TL",
289     "YD",       "YE",
290     "ZR",       "CD",
291 };
292 
293 /*
294 * -------------------------------------------------
295 *
296 * These ultag_ functions may be exposed as APIs later
297 *
298 * -------------------------------------------------
299 */
300 
301 static ULanguageTag*
302 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
303 
304 static void
305 ultag_close(ULanguageTag* langtag);
306 
307 static const char*
308 ultag_getLanguage(const ULanguageTag* langtag);
309 
310 #if 0
311 static const char*
312 ultag_getJDKLanguage(const ULanguageTag* langtag);
313 #endif
314 
315 static const char*
316 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
317 
318 static int32_t
319 ultag_getExtlangSize(const ULanguageTag* langtag);
320 
321 static const char*
322 ultag_getScript(const ULanguageTag* langtag);
323 
324 static const char*
325 ultag_getRegion(const ULanguageTag* langtag);
326 
327 static const char*
328 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
329 
330 static int32_t
331 ultag_getVariantsSize(const ULanguageTag* langtag);
332 
333 static const char*
334 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
335 
336 static const char*
337 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
338 
339 static int32_t
340 ultag_getExtensionsSize(const ULanguageTag* langtag);
341 
342 static const char*
343 ultag_getPrivateUse(const ULanguageTag* langtag);
344 
345 #if 0
346 static const char*
347 ultag_getGrandfathered(const ULanguageTag* langtag);
348 #endif
349 
350 namespace {
351 
352 // Helper class to memory manage CharString objects.
353 // Only ever stack-allocated, does not need to inherit UMemory.
354 class CharStringPool {
355 public:
CharStringPool()356     CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
357     ~CharStringPool() = default;
358 
359     CharStringPool(const CharStringPool&) = delete;
360     CharStringPool& operator=(const CharStringPool&) = delete;
361 
create()362     icu::CharString* create() {
363         if (U_FAILURE(status)) {
364             return nullptr;
365         }
366         icu::CharString* const obj = new icu::CharString;
367         if (obj == nullptr) {
368             status = U_MEMORY_ALLOCATION_ERROR;
369             return nullptr;
370         }
371         pool.addElement(obj, status);
372         if (U_FAILURE(status)) {
373             delete obj;
374             return nullptr;
375         }
376         return obj;
377     }
378 
379 private:
deleter(void * obj)380     static void U_CALLCONV deleter(void* obj) {
381         delete static_cast<icu::CharString*>(obj);
382     }
383 
384     UErrorCode status;
385     icu::UVector pool;
386 };
387 
388 }  // namespace
389 
390 /*
391 * -------------------------------------------------
392 *
393 * Language subtag syntax validation functions
394 *
395 * -------------------------------------------------
396 */
397 
398 static UBool
_isAlphaString(const char * s,int32_t len)399 _isAlphaString(const char* s, int32_t len) {
400     int32_t i;
401     for (i = 0; i < len; i++) {
402         if (!ISALPHA(*(s + i))) {
403             return FALSE;
404         }
405     }
406     return TRUE;
407 }
408 
409 static UBool
_isNumericString(const char * s,int32_t len)410 _isNumericString(const char* s, int32_t len) {
411     int32_t i;
412     for (i = 0; i < len; i++) {
413         if (!ISNUMERIC(*(s + i))) {
414             return FALSE;
415         }
416     }
417     return TRUE;
418 }
419 
420 static UBool
_isAlphaNumericString(const char * s,int32_t len)421 _isAlphaNumericString(const char* s, int32_t len) {
422     int32_t i;
423     for (i = 0; i < len; i++) {
424         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
425             return FALSE;
426         }
427     }
428     return TRUE;
429 }
430 
431 static UBool
_isLanguageSubtag(const char * s,int32_t len)432 _isLanguageSubtag(const char* s, int32_t len) {
433     /*
434      * language      = 2*3ALPHA            ; shortest ISO 639 code
435      *                 ["-" extlang]       ; sometimes followed by
436      *                                     ;   extended language subtags
437      *               / 4ALPHA              ; or reserved for future use
438      *               / 5*8ALPHA            ; or registered language subtag
439      */
440     if (len < 0) {
441         len = (int32_t)uprv_strlen(s);
442     }
443     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
444         return TRUE;
445     }
446     return FALSE;
447 }
448 
449 static UBool
_isExtlangSubtag(const char * s,int32_t len)450 _isExtlangSubtag(const char* s, int32_t len) {
451     /*
452      * extlang       = 3ALPHA              ; selected ISO 639 codes
453      *                 *2("-" 3ALPHA)      ; permanently reserved
454      */
455     if (len < 0) {
456         len = (int32_t)uprv_strlen(s);
457     }
458     if (len == 3 && _isAlphaString(s, len)) {
459         return TRUE;
460     }
461     return FALSE;
462 }
463 
464 static UBool
_isScriptSubtag(const char * s,int32_t len)465 _isScriptSubtag(const char* s, int32_t len) {
466     /*
467      * script        = 4ALPHA              ; ISO 15924 code
468      */
469     if (len < 0) {
470         len = (int32_t)uprv_strlen(s);
471     }
472     if (len == 4 && _isAlphaString(s, len)) {
473         return TRUE;
474     }
475     return FALSE;
476 }
477 
478 static UBool
_isRegionSubtag(const char * s,int32_t len)479 _isRegionSubtag(const char* s, int32_t len) {
480     /*
481      * region        = 2ALPHA              ; ISO 3166-1 code
482      *               / 3DIGIT              ; UN M.49 code
483      */
484     if (len < 0) {
485         len = (int32_t)uprv_strlen(s);
486     }
487     if (len == 2 && _isAlphaString(s, len)) {
488         return TRUE;
489     }
490     if (len == 3 && _isNumericString(s, len)) {
491         return TRUE;
492     }
493     return FALSE;
494 }
495 
496 static UBool
_isVariantSubtag(const char * s,int32_t len)497 _isVariantSubtag(const char* s, int32_t len) {
498     /*
499      * variant       = 5*8alphanum         ; registered variants
500      *               / (DIGIT 3alphanum)
501      */
502     if (len < 0) {
503         len = (int32_t)uprv_strlen(s);
504     }
505     if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
506         return TRUE;
507     }
508     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
509         return TRUE;
510     }
511     return FALSE;
512 }
513 
514 static UBool
_isPrivateuseVariantSubtag(const char * s,int32_t len)515 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
516     /*
517      * variant       = 1*8alphanum         ; registered variants
518      *               / (DIGIT 3alphanum)
519      */
520     if (len < 0) {
521         len = (int32_t)uprv_strlen(s);
522     }
523     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
524         return TRUE;
525     }
526     return FALSE;
527 }
528 
529 static UBool
_isExtensionSingleton(const char * s,int32_t len)530 _isExtensionSingleton(const char* s, int32_t len) {
531     /*
532      * extension     = singleton 1*("-" (2*8alphanum))
533      */
534     if (len < 0) {
535         len = (int32_t)uprv_strlen(s);
536     }
537     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
538         return TRUE;
539     }
540     return FALSE;
541 }
542 
543 static UBool
_isExtensionSubtag(const char * s,int32_t len)544 _isExtensionSubtag(const char* s, int32_t len) {
545     /*
546      * extension     = singleton 1*("-" (2*8alphanum))
547      */
548     if (len < 0) {
549         len = (int32_t)uprv_strlen(s);
550     }
551     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
552         return TRUE;
553     }
554     return FALSE;
555 }
556 
557 static UBool
_isExtensionSubtags(const char * s,int32_t len)558 _isExtensionSubtags(const char* s, int32_t len) {
559     const char *p = s;
560     const char *pSubtag = NULL;
561 
562     if (len < 0) {
563         len = (int32_t)uprv_strlen(s);
564     }
565 
566     while ((p - s) < len) {
567         if (*p == SEP) {
568             if (pSubtag == NULL) {
569                 return FALSE;
570             }
571             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
572                 return FALSE;
573             }
574             pSubtag = NULL;
575         } else if (pSubtag == NULL) {
576             pSubtag = p;
577         }
578         p++;
579     }
580     if (pSubtag == NULL) {
581         return FALSE;
582     }
583     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
584 }
585 
586 static UBool
_isPrivateuseValueSubtag(const char * s,int32_t len)587 _isPrivateuseValueSubtag(const char* s, int32_t len) {
588     /*
589      * privateuse    = "x" 1*("-" (1*8alphanum))
590      */
591     if (len < 0) {
592         len = (int32_t)uprv_strlen(s);
593     }
594     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
595         return TRUE;
596     }
597     return FALSE;
598 }
599 
600 static UBool
_isPrivateuseValueSubtags(const char * s,int32_t len)601 _isPrivateuseValueSubtags(const char* s, int32_t len) {
602     const char *p = s;
603     const char *pSubtag = NULL;
604 
605     if (len < 0) {
606         len = (int32_t)uprv_strlen(s);
607     }
608 
609     while ((p - s) < len) {
610         if (*p == SEP) {
611             if (pSubtag == NULL) {
612                 return FALSE;
613             }
614             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
615                 return FALSE;
616             }
617             pSubtag = NULL;
618         } else if (pSubtag == NULL) {
619             pSubtag = p;
620         }
621         p++;
622     }
623     if (pSubtag == NULL) {
624         return FALSE;
625     }
626     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
627 }
628 
629 U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char * s,int32_t len)630 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
631     if (len < 0) {
632         len = (int32_t)uprv_strlen(s);
633     }
634     if (len == 2 && _isAlphaNumericString(s, len)) {
635         return TRUE;
636     }
637     return FALSE;
638 }
639 
640 U_CFUNC UBool
ultag_isUnicodeLocaleType(const char * s,int32_t len)641 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
642     const char* p;
643     int32_t subtagLen = 0;
644 
645     if (len < 0) {
646         len = (int32_t)uprv_strlen(s);
647     }
648 
649     for (p = s; len > 0; p++, len--) {
650         if (*p == SEP) {
651             if (subtagLen < 3) {
652                 return FALSE;
653             }
654             subtagLen = 0;
655         } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
656             subtagLen++;
657             if (subtagLen > 8) {
658                 return FALSE;
659             }
660         } else {
661             return FALSE;
662         }
663     }
664 
665     return (subtagLen >= 3);
666 }
667 /*
668 * -------------------------------------------------
669 *
670 * Helper functions
671 *
672 * -------------------------------------------------
673 */
674 
675 static UBool
_addVariantToList(VariantListEntry ** first,VariantListEntry * var)676 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
677     UBool bAdded = TRUE;
678 
679     if (*first == NULL) {
680         var->next = NULL;
681         *first = var;
682     } else {
683         VariantListEntry *prev, *cur;
684         int32_t cmp;
685 
686         /* variants order should be preserved */
687         prev = NULL;
688         cur = *first;
689         while (TRUE) {
690             if (cur == NULL) {
691                 prev->next = var;
692                 var->next = NULL;
693                 break;
694             }
695 
696             /* Checking for duplicate variant */
697             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
698             if (cmp == 0) {
699                 /* duplicated variant */
700                 bAdded = FALSE;
701                 break;
702             }
703             prev = cur;
704             cur = cur->next;
705         }
706     }
707 
708     return bAdded;
709 }
710 
711 static UBool
_addAttributeToList(AttributeListEntry ** first,AttributeListEntry * attr)712 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
713     UBool bAdded = TRUE;
714 
715     if (*first == NULL) {
716         attr->next = NULL;
717         *first = attr;
718     } else {
719         AttributeListEntry *prev, *cur;
720         int32_t cmp;
721 
722         /* reorder variants in alphabetical order */
723         prev = NULL;
724         cur = *first;
725         while (TRUE) {
726             if (cur == NULL) {
727                 prev->next = attr;
728                 attr->next = NULL;
729                 break;
730             }
731             cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
732             if (cmp < 0) {
733                 if (prev == NULL) {
734                     *first = attr;
735                 } else {
736                     prev->next = attr;
737                 }
738                 attr->next = cur;
739                 break;
740             }
741             if (cmp == 0) {
742                 /* duplicated variant */
743                 bAdded = FALSE;
744                 break;
745             }
746             prev = cur;
747             cur = cur->next;
748         }
749     }
750 
751     return bAdded;
752 }
753 
754 
755 static UBool
_addExtensionToList(ExtensionListEntry ** first,ExtensionListEntry * ext,UBool localeToBCP)756 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
757     UBool bAdded = TRUE;
758 
759     if (*first == NULL) {
760         ext->next = NULL;
761         *first = ext;
762     } else {
763         ExtensionListEntry *prev, *cur;
764         int32_t cmp;
765 
766         /* reorder variants in alphabetical order */
767         prev = NULL;
768         cur = *first;
769         while (TRUE) {
770             if (cur == NULL) {
771                 prev->next = ext;
772                 ext->next = NULL;
773                 break;
774             }
775             if (localeToBCP) {
776                 /* special handling for locale to bcp conversion */
777                 int32_t len, curlen;
778 
779                 len = (int32_t)uprv_strlen(ext->key);
780                 curlen = (int32_t)uprv_strlen(cur->key);
781 
782                 if (len == 1 && curlen == 1) {
783                     if (*(ext->key) == *(cur->key)) {
784                         cmp = 0;
785                     } else if (*(ext->key) == PRIVATEUSE) {
786                         cmp = 1;
787                     } else if (*(cur->key) == PRIVATEUSE) {
788                         cmp = -1;
789                     } else {
790                         cmp = *(ext->key) - *(cur->key);
791                     }
792                 } else if (len == 1) {
793                     cmp = *(ext->key) - LDMLEXT;
794                 } else if (curlen == 1) {
795                     cmp = LDMLEXT - *(cur->key);
796                 } else {
797                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
798                     /* Both are u extension keys - we need special handling for 'attribute' */
799                     if (cmp != 0) {
800                         if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
801                             cmp = 1;
802                         } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
803                             cmp = -1;
804                         }
805                     }
806                 }
807             } else {
808                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
809             }
810             if (cmp < 0) {
811                 if (prev == NULL) {
812                     *first = ext;
813                 } else {
814                     prev->next = ext;
815                 }
816                 ext->next = cur;
817                 break;
818             }
819             if (cmp == 0) {
820                 /* duplicated extension key */
821                 bAdded = FALSE;
822                 break;
823             }
824             prev = cur;
825             cur = cur->next;
826         }
827     }
828 
829     return bAdded;
830 }
831 
832 static void
_initializeULanguageTag(ULanguageTag * langtag)833 _initializeULanguageTag(ULanguageTag* langtag) {
834     int32_t i;
835 
836     langtag->buf = NULL;
837 
838     langtag->language = EMPTY;
839     for (i = 0; i < MAXEXTLANG; i++) {
840         langtag->extlang[i] = NULL;
841     }
842 
843     langtag->script = EMPTY;
844     langtag->region = EMPTY;
845 
846     langtag->variants = NULL;
847     langtag->extensions = NULL;
848 
849     langtag->grandfathered = EMPTY;
850     langtag->privateuse = EMPTY;
851 }
852 
853 static int32_t
_appendLanguageToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)854 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
855     char buf[ULOC_LANG_CAPACITY];
856     UErrorCode tmpStatus = U_ZERO_ERROR;
857     int32_t len, i;
858     int32_t reslen = 0;
859 
860     if (U_FAILURE(*status)) {
861         return 0;
862     }
863 
864     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
865     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
866         if (strict) {
867             *status = U_ILLEGAL_ARGUMENT_ERROR;
868             return 0;
869         }
870         len = 0;
871     }
872 
873     /* Note: returned language code is in lower case letters */
874 
875     if (len == 0) {
876         if (reslen < capacity) {
877             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
878         }
879         reslen += LANG_UND_LEN;
880     } else if (!_isLanguageSubtag(buf, len)) {
881             /* invalid language code */
882         if (strict) {
883             *status = U_ILLEGAL_ARGUMENT_ERROR;
884             return 0;
885         }
886         if (reslen < capacity) {
887             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
888         }
889         reslen += LANG_UND_LEN;
890     } else {
891         /* resolve deprecated */
892         for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
893             // 2-letter deprecated subtags are listede before 3-letter
894             // ones in DEPRECATEDLANGS[]. Get out of loop on coming
895             // across the 1st 3-letter subtag, if the input is a 2-letter code.
896             // to avoid continuing to try when there's no match.
897             if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
898             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
899                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
900                 len = (int32_t)uprv_strlen(buf);
901                 break;
902             }
903         }
904         if (reslen < capacity) {
905             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
906         }
907         reslen += len;
908     }
909     u_terminateChars(appendAt, capacity, reslen, status);
910     return reslen;
911 }
912 
913 static int32_t
_appendScriptToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)914 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
915     char buf[ULOC_SCRIPT_CAPACITY];
916     UErrorCode tmpStatus = U_ZERO_ERROR;
917     int32_t len;
918     int32_t reslen = 0;
919 
920     if (U_FAILURE(*status)) {
921         return 0;
922     }
923 
924     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
925     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
926         if (strict) {
927             *status = U_ILLEGAL_ARGUMENT_ERROR;
928         }
929         return 0;
930     }
931 
932     if (len > 0) {
933         if (!_isScriptSubtag(buf, len)) {
934             /* invalid script code */
935             if (strict) {
936                 *status = U_ILLEGAL_ARGUMENT_ERROR;
937             }
938             return 0;
939         } else {
940             if (reslen < capacity) {
941                 *(appendAt + reslen) = SEP;
942             }
943             reslen++;
944             if (reslen < capacity) {
945                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
946             }
947             reslen += len;
948         }
949     }
950     u_terminateChars(appendAt, capacity, reslen, status);
951     return reslen;
952 }
953 
954 static int32_t
_appendRegionToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)955 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
956     char buf[ULOC_COUNTRY_CAPACITY];
957     UErrorCode tmpStatus = U_ZERO_ERROR;
958     int32_t len;
959     int32_t reslen = 0;
960 
961     if (U_FAILURE(*status)) {
962         return 0;
963     }
964 
965     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
966     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
967         if (strict) {
968             *status = U_ILLEGAL_ARGUMENT_ERROR;
969         }
970         return 0;
971     }
972 
973     if (len > 0) {
974         if (!_isRegionSubtag(buf, len)) {
975             /* invalid region code */
976             if (strict) {
977                 *status = U_ILLEGAL_ARGUMENT_ERROR;
978             }
979             return 0;
980         } else {
981             if (reslen < capacity) {
982                 *(appendAt + reslen) = SEP;
983             }
984             reslen++;
985            /* resolve deprecated */
986             for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
987                 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
988                     uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
989                     len = (int32_t)uprv_strlen(buf);
990                     break;
991                 }
992             }
993 
994             if (reslen < capacity) {
995                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
996             }
997             reslen += len;
998         }
999     }
1000     u_terminateChars(appendAt, capacity, reslen, status);
1001     return reslen;
1002 }
1003 
1004 static int32_t
_appendVariantsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool * hadPosix,UErrorCode * status)1005 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
1006     char buf[ULOC_FULLNAME_CAPACITY];
1007     UErrorCode tmpStatus = U_ZERO_ERROR;
1008     int32_t len, i;
1009     int32_t reslen = 0;
1010 
1011     if (U_FAILURE(*status)) {
1012         return 0;
1013     }
1014 
1015     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1016     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1017         if (strict) {
1018             *status = U_ILLEGAL_ARGUMENT_ERROR;
1019         }
1020         return 0;
1021     }
1022 
1023     if (len > 0) {
1024         char *p, *pVar;
1025         UBool bNext = TRUE;
1026         VariantListEntry *var;
1027         VariantListEntry *varFirst = NULL;
1028 
1029         pVar = NULL;
1030         p = buf;
1031         while (bNext) {
1032             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1033                 if (*p == 0) {
1034                     bNext = FALSE;
1035                 } else {
1036                     *p = 0; /* terminate */
1037                 }
1038                 if (pVar == NULL) {
1039                     if (strict) {
1040                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1041                         break;
1042                     }
1043                     /* ignore empty variant */
1044                 } else {
1045                     /* ICU uses upper case letters for variants, but
1046                        the canonical format is lowercase in BCP47 */
1047                     for (i = 0; *(pVar + i) != 0; i++) {
1048                         *(pVar + i) = uprv_tolower(*(pVar + i));
1049                     }
1050 
1051                     /* validate */
1052                     if (_isVariantSubtag(pVar, -1)) {
1053                         if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
1054                             /* emit the variant to the list */
1055                             var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1056                             if (var == NULL) {
1057                                 *status = U_MEMORY_ALLOCATION_ERROR;
1058                                 break;
1059                             }
1060                             var->variant = pVar;
1061                             if (!_addVariantToList(&varFirst, var)) {
1062                                 /* duplicated variant */
1063                                 uprv_free(var);
1064                                 if (strict) {
1065                                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1066                                     break;
1067                                 }
1068                             }
1069                         } else {
1070                             /* Special handling for POSIX variant, need to remember that we had it and then */
1071                             /* treat it like an extension later. */
1072                             *hadPosix = TRUE;
1073                         }
1074                     } else if (strict) {
1075                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1076                         break;
1077                     } else if (_isPrivateuseValueSubtag(pVar, -1)) {
1078                         /* Handle private use subtags separately */
1079                         break;
1080                     }
1081                 }
1082                 /* reset variant starting position */
1083                 pVar = NULL;
1084             } else if (pVar == NULL) {
1085                 pVar = p;
1086             }
1087             p++;
1088         }
1089 
1090         if (U_SUCCESS(*status)) {
1091             if (varFirst != NULL) {
1092                 int32_t varLen;
1093 
1094                 /* write out validated/normalized variants to the target */
1095                 var = varFirst;
1096                 while (var != NULL) {
1097                     if (reslen < capacity) {
1098                         *(appendAt + reslen) = SEP;
1099                     }
1100                     reslen++;
1101                     varLen = (int32_t)uprv_strlen(var->variant);
1102                     if (reslen < capacity) {
1103                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
1104                     }
1105                     reslen += varLen;
1106                     var = var->next;
1107                 }
1108             }
1109         }
1110 
1111         /* clean up */
1112         var = varFirst;
1113         while (var != NULL) {
1114             VariantListEntry *tmpVar = var->next;
1115             uprv_free(var);
1116             var = tmpVar;
1117         }
1118 
1119         if (U_FAILURE(*status)) {
1120             return 0;
1121         }
1122     }
1123 
1124     u_terminateChars(appendAt, capacity, reslen, status);
1125     return reslen;
1126 }
1127 
1128 static int32_t
_appendKeywordsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1129 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1130     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
1131     int32_t attrBufLength = 0;
1132     UEnumeration *keywordEnum = NULL;
1133     int32_t reslen = 0;
1134 
1135     keywordEnum = uloc_openKeywords(localeID, status);
1136     if (U_FAILURE(*status) && !hadPosix) {
1137         uenum_close(keywordEnum);
1138         return 0;
1139     }
1140     if (keywordEnum != NULL || hadPosix) {
1141         /* reorder extensions */
1142         int32_t len;
1143         const char *key;
1144         ExtensionListEntry *firstExt = NULL;
1145         ExtensionListEntry *ext;
1146         AttributeListEntry *firstAttr = NULL;
1147         AttributeListEntry *attr;
1148         char *attrValue;
1149         CharStringPool extBufPool;
1150         const char *bcpKey=nullptr, *bcpValue=nullptr;
1151         UErrorCode tmpStatus = U_ZERO_ERROR;
1152         int32_t keylen;
1153         UBool isBcpUExt;
1154 
1155         while (TRUE) {
1156             icu::CharString buf;
1157             key = uenum_next(keywordEnum, NULL, status);
1158             if (key == NULL) {
1159                 break;
1160             }
1161             char* buffer;
1162             int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
1163 
1164             for (;;) {
1165                 buffer = buf.getAppendBuffer(
1166                         /*minCapacity=*/resultCapacity,
1167                         /*desiredCapacityHint=*/resultCapacity,
1168                         resultCapacity,
1169                         tmpStatus);
1170 
1171                 if (U_FAILURE(tmpStatus)) {
1172                     break;
1173                 }
1174 
1175                 len = uloc_getKeywordValue(
1176                         localeID, key, buffer, resultCapacity, &tmpStatus);
1177 
1178                 if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
1179                     break;
1180                 }
1181 
1182                 resultCapacity = len;
1183                 tmpStatus = U_ZERO_ERROR;
1184             }
1185 
1186             if (U_FAILURE(tmpStatus)) {
1187                 if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
1188                     *status = U_MEMORY_ALLOCATION_ERROR;
1189                     break;
1190                 }
1191                 if (strict) {
1192                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1193                     break;
1194                 }
1195                 /* ignore this keyword */
1196                 tmpStatus = U_ZERO_ERROR;
1197                 continue;
1198             }
1199 
1200             buf.append(buffer, len, tmpStatus);
1201             if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1202                 tmpStatus = U_ZERO_ERROR;  // Terminators provided by CharString.
1203             }
1204 
1205             keylen = (int32_t)uprv_strlen(key);
1206             isBcpUExt = (keylen > 1);
1207 
1208             /* special keyword used for representing Unicode locale attributes */
1209             if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
1210                 if (len > 0) {
1211                     int32_t i = 0;
1212                     while (TRUE) {
1213                         attrBufLength = 0;
1214                         for (; i < len; i++) {
1215                             if (buf[i] != '-') {
1216                                 attrBuf[attrBufLength++] = buf[i];
1217                             } else {
1218                                 i++;
1219                                 break;
1220                             }
1221                         }
1222                         if (attrBufLength > 0) {
1223                             attrBuf[attrBufLength] = 0;
1224 
1225                         } else if (i >= len){
1226                             break;
1227                         }
1228 
1229                         /* create AttributeListEntry */
1230                         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1231                         if (attr == NULL) {
1232                             *status = U_MEMORY_ALLOCATION_ERROR;
1233                             break;
1234                         }
1235                         attrValue = (char*)uprv_malloc(attrBufLength + 1);
1236                         if (attrValue == NULL) {
1237                             *status = U_MEMORY_ALLOCATION_ERROR;
1238                             break;
1239                         }
1240                         uprv_strcpy(attrValue, attrBuf);
1241                         attr->attribute = attrValue;
1242 
1243                         if (!_addAttributeToList(&firstAttr, attr)) {
1244                             uprv_free(attr);
1245                             uprv_free(attrValue);
1246                             if (strict) {
1247                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
1248                                 break;
1249                             }
1250                         }
1251                     }
1252                     /* for a place holder ExtensionListEntry */
1253                     bcpKey = LOCALE_ATTRIBUTE_KEY;
1254                     bcpValue = NULL;
1255                 }
1256             } else if (isBcpUExt) {
1257                 bcpKey = uloc_toUnicodeLocaleKey(key);
1258                 if (bcpKey == NULL) {
1259                     if (strict) {
1260                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1261                         break;
1262                     }
1263                     continue;
1264                 }
1265 
1266                 /* we've checked buf is null-terminated above */
1267                 bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
1268                 if (bcpValue == NULL) {
1269                     if (strict) {
1270                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1271                         break;
1272                     }
1273                     continue;
1274                 }
1275                 if (bcpValue == buf.data()) {
1276                     /*
1277                     When uloc_toUnicodeLocaleType(key, buf) returns the
1278                     input value as is, the value is well-formed, but has
1279                     no known mapping. This implementation normalizes the
1280                     value to lower case
1281                     */
1282                     icu::CharString* extBuf = extBufPool.create();
1283                     if (extBuf == nullptr) {
1284                         *status = U_MEMORY_ALLOCATION_ERROR;
1285                         break;
1286                     }
1287                     int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
1288                     int32_t resultCapacity;
1289                     char* pExtBuf = extBuf->getAppendBuffer(
1290                             /*minCapacity=*/bcpValueLen,
1291                             /*desiredCapacityHint=*/bcpValueLen,
1292                             resultCapacity,
1293                             tmpStatus);
1294                     if (U_FAILURE(tmpStatus)) {
1295                         *status = tmpStatus;
1296                         break;
1297                     }
1298 
1299                     uprv_strcpy(pExtBuf, bcpValue);
1300                     T_CString_toLowerCase(pExtBuf);
1301 
1302                     extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
1303                     if (U_FAILURE(tmpStatus)) {
1304                         *status = tmpStatus;
1305                         break;
1306                     }
1307 
1308                     bcpValue = extBuf->data();
1309                 }
1310             } else {
1311                 if (*key == PRIVATEUSE) {
1312                     if (!_isPrivateuseValueSubtags(buf.data(), len)) {
1313                         if (strict) {
1314                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1315                             break;
1316                         }
1317                         continue;
1318                     }
1319                 } else {
1320                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
1321                         if (strict) {
1322                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1323                             break;
1324                         }
1325                         continue;
1326                     }
1327                 }
1328                 bcpKey = key;
1329                 icu::CharString* extBuf = extBufPool.create();
1330                 if (extBuf == nullptr) {
1331                     *status = U_MEMORY_ALLOCATION_ERROR;
1332                     break;
1333                 }
1334                 extBuf->append(buf.data(), len, tmpStatus);
1335                 if (U_FAILURE(tmpStatus)) {
1336                     *status = tmpStatus;
1337                     break;
1338                 }
1339                 bcpValue = extBuf->data();
1340             }
1341 
1342             /* create ExtensionListEntry */
1343             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1344             if (ext == NULL) {
1345                 *status = U_MEMORY_ALLOCATION_ERROR;
1346                 break;
1347             }
1348             ext->key = bcpKey;
1349             ext->value = bcpValue;
1350 
1351             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1352                 uprv_free(ext);
1353                 if (strict) {
1354                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1355                     break;
1356                 }
1357             }
1358         }
1359 
1360         /* Special handling for POSIX variant - add the keywords for POSIX */
1361         if (hadPosix) {
1362             /* create ExtensionListEntry for POSIX */
1363             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1364             if (ext == NULL) {
1365                 *status = U_MEMORY_ALLOCATION_ERROR;
1366                 goto cleanup;
1367             }
1368             ext->key = POSIX_KEY;
1369             ext->value = POSIX_VALUE;
1370 
1371             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1372                 uprv_free(ext);
1373             }
1374         }
1375 
1376         if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1377             UBool startLDMLExtension = FALSE;
1378             for (ext = firstExt; ext; ext = ext->next) {
1379                 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
1380                     /* first LDML u singlton extension */
1381                    if (reslen < capacity) {
1382                        *(appendAt + reslen) = SEP;
1383                    }
1384                    reslen++;
1385                    if (reslen < capacity) {
1386                        *(appendAt + reslen) = LDMLEXT;
1387                    }
1388                    reslen++;
1389 
1390                    startLDMLExtension = TRUE;
1391                 }
1392 
1393                 /* write out the sorted BCP47 attributes, extensions and private use */
1394                 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
1395                     /* write the value for the attributes */
1396                     for (attr = firstAttr; attr; attr = attr->next) {
1397                         if (reslen < capacity) {
1398                             *(appendAt + reslen) = SEP;
1399                         }
1400                         reslen++;
1401                         len = (int32_t)uprv_strlen(attr->attribute);
1402                         if (reslen < capacity) {
1403                             uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1404                         }
1405                         reslen += len;
1406                     }
1407                 } else {
1408                     if (reslen < capacity) {
1409                         *(appendAt + reslen) = SEP;
1410                     }
1411                     reslen++;
1412                     len = (int32_t)uprv_strlen(ext->key);
1413                     if (reslen < capacity) {
1414                         uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1415                     }
1416                     reslen += len;
1417                     if (reslen < capacity) {
1418                         *(appendAt + reslen) = SEP;
1419                     }
1420                     reslen++;
1421                     len = (int32_t)uprv_strlen(ext->value);
1422                     if (reslen < capacity) {
1423                         uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1424                     }
1425                     reslen += len;
1426                 }
1427             }
1428         }
1429 cleanup:
1430         /* clean up */
1431         ext = firstExt;
1432         while (ext != NULL) {
1433             ExtensionListEntry *tmpExt = ext->next;
1434             uprv_free(ext);
1435             ext = tmpExt;
1436         }
1437 
1438         attr = firstAttr;
1439         while (attr != NULL) {
1440             AttributeListEntry *tmpAttr = attr->next;
1441             char *pValue = (char *)attr->attribute;
1442             uprv_free(pValue);
1443             uprv_free(attr);
1444             attr = tmpAttr;
1445         }
1446 
1447         uenum_close(keywordEnum);
1448 
1449         if (U_FAILURE(*status)) {
1450             return 0;
1451         }
1452     }
1453 
1454     return u_terminateChars(appendAt, capacity, reslen, status);
1455 }
1456 
1457 /**
1458  * Append keywords parsed from LDML extension value
1459  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1460  * Note: char* buf is used for storing keywords
1461  */
1462 static void
_appendLDMLExtensionAsKeywords(const char * ldmlext,ExtensionListEntry ** appendTo,char * buf,int32_t bufSize,UBool * posixVariant,UErrorCode * status)1463 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1464     const char *pTag;   /* beginning of current subtag */
1465     const char *pKwds;  /* beginning of key-type pairs */
1466     UBool variantExists = *posixVariant;
1467 
1468     ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
1469     ExtensionListEntry *kwd, *nextKwd;
1470 
1471     AttributeListEntry *attrFirst = NULL;   /* first attribute */
1472     AttributeListEntry *attr, *nextAttr;
1473 
1474     int32_t len;
1475     int32_t bufIdx = 0;
1476 
1477     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1478     int32_t attrBufIdx = 0;
1479 
1480     /* Reset the posixVariant value */
1481     *posixVariant = FALSE;
1482 
1483     pTag = ldmlext;
1484     pKwds = NULL;
1485 
1486     /* Iterate through u extension attributes */
1487     while (*pTag) {
1488         /* locate next separator char */
1489         for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1490 
1491         if (ultag_isUnicodeLocaleKey(pTag, len)) {
1492             pKwds = pTag;
1493             break;
1494         }
1495 
1496         /* add this attribute to the list */
1497         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1498         if (attr == NULL) {
1499             *status = U_MEMORY_ALLOCATION_ERROR;
1500             goto cleanup;
1501         }
1502 
1503         if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1504             uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1505             attrBuf[attrBufIdx + len] = 0;
1506             attr->attribute = &attrBuf[attrBufIdx];
1507             attrBufIdx += (len + 1);
1508         } else {
1509             *status = U_ILLEGAL_ARGUMENT_ERROR;
1510             uprv_free(attr);
1511             goto cleanup;
1512         }
1513 
1514         if (!_addAttributeToList(&attrFirst, attr)) {
1515             *status = U_ILLEGAL_ARGUMENT_ERROR;
1516             uprv_free(attr);
1517             goto cleanup;
1518         }
1519 
1520         /* next tag */
1521         pTag += len;
1522         if (*pTag) {
1523             /* next to the separator */
1524             pTag++;
1525         }
1526     }
1527 
1528     if (attrFirst) {
1529         /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1530 
1531         if (attrBufIdx > bufSize) {
1532             /* attrBufIdx == <total length of attribute subtag> + 1 */
1533             *status = U_ILLEGAL_ARGUMENT_ERROR;
1534             goto cleanup;
1535         }
1536 
1537         kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1538         if (kwd == NULL) {
1539             *status = U_MEMORY_ALLOCATION_ERROR;
1540             goto cleanup;
1541         }
1542 
1543         kwd->key = LOCALE_ATTRIBUTE_KEY;
1544         kwd->value = buf;
1545 
1546         /* attribute subtags sorted in alphabetical order as type */
1547         attr = attrFirst;
1548         while (attr != NULL) {
1549             nextAttr = attr->next;
1550 
1551             /* buffer size check is done above */
1552             if (attr != attrFirst) {
1553                 *(buf + bufIdx) = SEP;
1554                 bufIdx++;
1555             }
1556 
1557             len = static_cast<int32_t>(uprv_strlen(attr->attribute));
1558             uprv_memcpy(buf + bufIdx, attr->attribute, len);
1559             bufIdx += len;
1560 
1561             attr = nextAttr;
1562         }
1563         *(buf + bufIdx) = 0;
1564         bufIdx++;
1565 
1566         if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1567             *status = U_ILLEGAL_ARGUMENT_ERROR;
1568             uprv_free(kwd);
1569             goto cleanup;
1570         }
1571 
1572         /* once keyword entry is created, delete the attribute list */
1573         attr = attrFirst;
1574         while (attr != NULL) {
1575             nextAttr = attr->next;
1576             uprv_free(attr);
1577             attr = nextAttr;
1578         }
1579         attrFirst = NULL;
1580     }
1581 
1582     if (pKwds) {
1583         const char *pBcpKey = NULL;     /* u extenstion key subtag */
1584         const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
1585         int32_t bcpKeyLen = 0;
1586         int32_t bcpTypeLen = 0;
1587         UBool isDone = FALSE;
1588 
1589         pTag = pKwds;
1590         /* BCP47 representation of LDML key/type pairs */
1591         while (!isDone) {
1592             const char *pNextBcpKey = NULL;
1593             int32_t nextBcpKeyLen = 0;
1594             UBool emitKeyword = FALSE;
1595 
1596             if (*pTag) {
1597                 /* locate next separator char */
1598                 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1599 
1600                 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1601                     if (pBcpKey) {
1602                         emitKeyword = TRUE;
1603                         pNextBcpKey = pTag;
1604                         nextBcpKeyLen = len;
1605                     } else {
1606                         pBcpKey = pTag;
1607                         bcpKeyLen = len;
1608                     }
1609                 } else {
1610                     U_ASSERT(pBcpKey != NULL);
1611                     /* within LDML type subtags */
1612                     if (pBcpType) {
1613                         bcpTypeLen += (len + 1);
1614                     } else {
1615                         pBcpType = pTag;
1616                         bcpTypeLen = len;
1617                     }
1618                 }
1619 
1620                 /* next tag */
1621                 pTag += len;
1622                 if (*pTag) {
1623                     /* next to the separator */
1624                     pTag++;
1625                 }
1626             } else {
1627                 /* processing last one */
1628                 emitKeyword = TRUE;
1629                 isDone = TRUE;
1630             }
1631 
1632             if (emitKeyword) {
1633                 const char *pKey = NULL;    /* LDML key */
1634                 const char *pType = NULL;   /* LDML type */
1635 
1636                 char bcpKeyBuf[9];          /* BCP key length is always 2 for now */
1637 
1638                 U_ASSERT(pBcpKey != NULL);
1639 
1640                 if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
1641                     /* the BCP key is invalid */
1642                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1643                     goto cleanup;
1644                 }
1645 
1646                 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1647                 bcpKeyBuf[bcpKeyLen] = 0;
1648 
1649                 /* u extension key to LDML key */
1650                 pKey = uloc_toLegacyKey(bcpKeyBuf);
1651                 if (pKey == NULL) {
1652                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1653                     goto cleanup;
1654                 }
1655                 if (pKey == bcpKeyBuf) {
1656                     /*
1657                     The key returned by toLegacyKey points to the input buffer.
1658                     We normalize the result key to lower case.
1659                     */
1660                     T_CString_toLowerCase(bcpKeyBuf);
1661                     if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1662                         uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1663                         pKey = buf + bufIdx;
1664                         bufIdx += bcpKeyLen;
1665                         *(buf + bufIdx) = 0;
1666                         bufIdx++;
1667                     } else {
1668                         *status = U_BUFFER_OVERFLOW_ERROR;
1669                         goto cleanup;
1670                     }
1671                 }
1672 
1673                 if (pBcpType) {
1674                     char bcpTypeBuf[128];       /* practically long enough even considering multiple subtag type */
1675                     if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
1676                         /* the BCP type is too long */
1677                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1678                         goto cleanup;
1679                     }
1680 
1681                     uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1682                     bcpTypeBuf[bcpTypeLen] = 0;
1683 
1684                     /* BCP type to locale type */
1685                     pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1686                     if (pType == NULL) {
1687                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1688                         goto cleanup;
1689                     }
1690                     if (pType == bcpTypeBuf) {
1691                         /*
1692                         The type returned by toLegacyType points to the input buffer.
1693                         We normalize the result type to lower case.
1694                         */
1695                         /* normalize to lower case */
1696                         T_CString_toLowerCase(bcpTypeBuf);
1697                         if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1698                             uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1699                             pType = buf + bufIdx;
1700                             bufIdx += bcpTypeLen;
1701                             *(buf + bufIdx) = 0;
1702                             bufIdx++;
1703                         } else {
1704                             *status = U_BUFFER_OVERFLOW_ERROR;
1705                             goto cleanup;
1706                         }
1707                     }
1708                 } else {
1709                     /* typeless - default type value is "yes" */
1710                     pType = LOCALE_TYPE_YES;
1711                 }
1712 
1713                 /* Special handling for u-va-posix, since we want to treat this as a variant,
1714                    not as a keyword */
1715                 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1716                     *posixVariant = TRUE;
1717                 } else {
1718                     /* create an ExtensionListEntry for this keyword */
1719                     kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1720                     if (kwd == NULL) {
1721                         *status = U_MEMORY_ALLOCATION_ERROR;
1722                         goto cleanup;
1723                     }
1724 
1725                     kwd->key = pKey;
1726                     kwd->value = pType;
1727 
1728                     if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1729                         // duplicate keyword is allowed, Only the first
1730                         // is honored.
1731                         uprv_free(kwd);
1732                     }
1733                 }
1734 
1735                 pBcpKey = pNextBcpKey;
1736                 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1737                 pBcpType = NULL;
1738                 bcpTypeLen = 0;
1739             }
1740         }
1741     }
1742 
1743     kwd = kwdFirst;
1744     while (kwd != NULL) {
1745         nextKwd = kwd->next;
1746         _addExtensionToList(appendTo, kwd, FALSE);
1747         kwd = nextKwd;
1748     }
1749 
1750     return;
1751 
1752 cleanup:
1753     attr = attrFirst;
1754     while (attr != NULL) {
1755         nextAttr = attr->next;
1756         uprv_free(attr);
1757         attr = nextAttr;
1758     }
1759 
1760     kwd = kwdFirst;
1761     while (kwd != NULL) {
1762         nextKwd = kwd->next;
1763         uprv_free(kwd);
1764         kwd = nextKwd;
1765     }
1766 }
1767 
1768 
1769 static int32_t
_appendKeywords(ULanguageTag * langtag,char * appendAt,int32_t capacity,UErrorCode * status)1770 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1771     int32_t reslen = 0;
1772     int32_t i, n;
1773     int32_t len;
1774     ExtensionListEntry *kwdFirst = NULL;
1775     ExtensionListEntry *kwd;
1776     const char *key, *type;
1777     char *kwdBuf = NULL;
1778     int32_t kwdBufLength = capacity;
1779     UBool posixVariant = FALSE;
1780 
1781     if (U_FAILURE(*status)) {
1782         return 0;
1783     }
1784 
1785     kwdBuf = (char*)uprv_malloc(kwdBufLength);
1786     if (kwdBuf == NULL) {
1787         *status = U_MEMORY_ALLOCATION_ERROR;
1788         return 0;
1789     }
1790 
1791     /* Determine if variants already exists */
1792     if (ultag_getVariantsSize(langtag)) {
1793         posixVariant = TRUE;
1794     }
1795 
1796     n = ultag_getExtensionsSize(langtag);
1797 
1798     /* resolve locale keywords and reordering keys */
1799     for (i = 0; i < n; i++) {
1800         key = ultag_getExtensionKey(langtag, i);
1801         type = ultag_getExtensionValue(langtag, i);
1802         if (*key == LDMLEXT) {
1803             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1804             if (U_FAILURE(*status)) {
1805                 break;
1806             }
1807         } else {
1808             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1809             if (kwd == NULL) {
1810                 *status = U_MEMORY_ALLOCATION_ERROR;
1811                 break;
1812             }
1813             kwd->key = key;
1814             kwd->value = type;
1815             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1816                 uprv_free(kwd);
1817                 *status = U_ILLEGAL_ARGUMENT_ERROR;
1818                 break;
1819             }
1820         }
1821     }
1822 
1823     if (U_SUCCESS(*status)) {
1824         type = ultag_getPrivateUse(langtag);
1825         if ((int32_t)uprv_strlen(type) > 0) {
1826             /* add private use as a keyword */
1827             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1828             if (kwd == NULL) {
1829                 *status = U_MEMORY_ALLOCATION_ERROR;
1830             } else {
1831                 kwd->key = PRIVATEUSE_KEY;
1832                 kwd->value = type;
1833                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1834                     uprv_free(kwd);
1835                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1836                 }
1837             }
1838         }
1839     }
1840 
1841     /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1842 
1843     if (U_SUCCESS(*status) && posixVariant) {
1844         len = (int32_t) uprv_strlen(_POSIX);
1845         if (reslen < capacity) {
1846             uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1847         }
1848         reslen += len;
1849     }
1850 
1851     if (U_SUCCESS(*status) && kwdFirst != NULL) {
1852         /* write out the sorted keywords */
1853         UBool firstValue = TRUE;
1854         kwd = kwdFirst;
1855         do {
1856             if (reslen < capacity) {
1857                 if (firstValue) {
1858                     /* '@' */
1859                     *(appendAt + reslen) = LOCALE_EXT_SEP;
1860                     firstValue = FALSE;
1861                 } else {
1862                     /* ';' */
1863                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1864                 }
1865             }
1866             reslen++;
1867 
1868             /* key */
1869             len = (int32_t)uprv_strlen(kwd->key);
1870             if (reslen < capacity) {
1871                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1872             }
1873             reslen += len;
1874 
1875             /* '=' */
1876             if (reslen < capacity) {
1877                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1878             }
1879             reslen++;
1880 
1881             /* type */
1882             len = (int32_t)uprv_strlen(kwd->value);
1883             if (reslen < capacity) {
1884                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1885             }
1886             reslen += len;
1887 
1888             kwd = kwd->next;
1889         } while (kwd);
1890     }
1891 
1892     /* clean up */
1893     kwd = kwdFirst;
1894     while (kwd != NULL) {
1895         ExtensionListEntry *tmpKwd = kwd->next;
1896         uprv_free(kwd);
1897         kwd = tmpKwd;
1898     }
1899 
1900     uprv_free(kwdBuf);
1901 
1902     if (U_FAILURE(*status)) {
1903         return 0;
1904     }
1905 
1906     return u_terminateChars(appendAt, capacity, reslen, status);
1907 }
1908 
1909 static int32_t
_appendPrivateuseToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1910 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1911     (void)hadPosix;
1912     char buf[ULOC_FULLNAME_CAPACITY];
1913     char tmpAppend[ULOC_FULLNAME_CAPACITY];
1914     UErrorCode tmpStatus = U_ZERO_ERROR;
1915     int32_t len, i;
1916     int32_t reslen = 0;
1917 
1918     if (U_FAILURE(*status)) {
1919         return 0;
1920     }
1921 
1922     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1923     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1924         if (strict) {
1925             *status = U_ILLEGAL_ARGUMENT_ERROR;
1926         }
1927         return 0;
1928     }
1929 
1930     if (len > 0) {
1931         char *p, *pPriv;
1932         UBool bNext = TRUE;
1933         UBool firstValue = TRUE;
1934         UBool writeValue;
1935 
1936         pPriv = NULL;
1937         p = buf;
1938         while (bNext) {
1939             writeValue = FALSE;
1940             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1941                 if (*p == 0) {
1942                     bNext = FALSE;
1943                 } else {
1944                     *p = 0; /* terminate */
1945                 }
1946                 if (pPriv != NULL) {
1947                     /* Private use in the canonical format is lowercase in BCP47 */
1948                     for (i = 0; *(pPriv + i) != 0; i++) {
1949                         *(pPriv + i) = uprv_tolower(*(pPriv + i));
1950                     }
1951 
1952                     /* validate */
1953                     if (_isPrivateuseValueSubtag(pPriv, -1)) {
1954                         if (firstValue) {
1955                             if (!_isVariantSubtag(pPriv, -1)) {
1956                                 writeValue = TRUE;
1957                             }
1958                         } else {
1959                             writeValue = TRUE;
1960                         }
1961                     } else if (strict) {
1962                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1963                         break;
1964                     } else {
1965                         break;
1966                     }
1967 
1968                     if (writeValue) {
1969                         if (reslen < capacity) {
1970                             tmpAppend[reslen++] = SEP;
1971                         }
1972 
1973                         if (firstValue) {
1974                             if (reslen < capacity) {
1975                                 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1976                             }
1977 
1978                             if (reslen < capacity) {
1979                                 tmpAppend[reslen++] = SEP;
1980                             }
1981 
1982                             len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1983                             if (reslen < capacity) {
1984                                 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1985                             }
1986                             reslen += len;
1987 
1988                             if (reslen < capacity) {
1989                                 tmpAppend[reslen++] = SEP;
1990                             }
1991 
1992                             firstValue = FALSE;
1993                         }
1994 
1995                         len = (int32_t)uprv_strlen(pPriv);
1996                         if (reslen < capacity) {
1997                             uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1998                         }
1999                         reslen += len;
2000                     }
2001                 }
2002                 /* reset private use starting position */
2003                 pPriv = NULL;
2004             } else if (pPriv == NULL) {
2005                 pPriv = p;
2006             }
2007             p++;
2008         }
2009 
2010         if (U_FAILURE(*status)) {
2011             return 0;
2012         }
2013     }
2014 
2015     if (U_SUCCESS(*status)) {
2016         len = reslen;
2017         if (reslen < capacity) {
2018             uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
2019         }
2020     }
2021 
2022     u_terminateChars(appendAt, capacity, reslen, status);
2023 
2024     return reslen;
2025 }
2026 
2027 /*
2028 * -------------------------------------------------
2029 *
2030 * ultag_ functions
2031 *
2032 * -------------------------------------------------
2033 */
2034 
2035 /* Bit flags used by the parser */
2036 #define LANG 0x0001
2037 #define EXTL 0x0002
2038 #define SCRT 0x0004
2039 #define REGN 0x0008
2040 #define VART 0x0010
2041 #define EXTS 0x0020
2042 #define EXTV 0x0040
2043 #define PRIV 0x0080
2044 
2045 /**
2046  * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
2047  * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
2048  * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
2049  */
2050 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2051 #pragma optimize( "", off )
2052 #endif
2053 
2054 static ULanguageTag*
ultag_parse(const char * tag,int32_t tagLen,int32_t * parsedLen,UErrorCode * status)2055 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
2056     ULanguageTag *t;
2057     char *tagBuf;
2058     int16_t next;
2059     char *pSubtag, *pNext, *pLastGoodPosition;
2060     int32_t subtagLen;
2061     int32_t extlangIdx;
2062     ExtensionListEntry *pExtension;
2063     char *pExtValueSubtag, *pExtValueSubtagEnd;
2064     int32_t i;
2065     UBool privateuseVar = FALSE;
2066     int32_t grandfatheredLen = 0;
2067 
2068     if (parsedLen != NULL) {
2069         *parsedLen = 0;
2070     }
2071 
2072     if (U_FAILURE(*status)) {
2073         return NULL;
2074     }
2075 
2076     if (tagLen < 0) {
2077         tagLen = (int32_t)uprv_strlen(tag);
2078     }
2079 
2080     /* copy the entire string */
2081     tagBuf = (char*)uprv_malloc(tagLen + 1);
2082     if (tagBuf == NULL) {
2083         *status = U_MEMORY_ALLOCATION_ERROR;
2084         return NULL;
2085     }
2086     uprv_memcpy(tagBuf, tag, tagLen);
2087     *(tagBuf + tagLen) = 0;
2088 
2089     /* create a ULanguageTag */
2090     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
2091     if (t == NULL) {
2092         uprv_free(tagBuf);
2093         *status = U_MEMORY_ALLOCATION_ERROR;
2094         return NULL;
2095     }
2096     _initializeULanguageTag(t);
2097     t->buf = tagBuf;
2098 
2099     if (tagLen < MINLEN) {
2100         /* the input tag is too short - return empty ULanguageTag */
2101         return t;
2102     }
2103 
2104     /* check if the tag is grandfathered */
2105     for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
2106         if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
2107             int32_t newTagLength;
2108 
2109             grandfatheredLen = tagLen;  /* back up for output parsedLen */
2110             newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
2111             if (tagLen < newTagLength) {
2112                 uprv_free(tagBuf);
2113                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
2114                 if (tagBuf == NULL) {
2115                     *status = U_MEMORY_ALLOCATION_ERROR;
2116                     ultag_close(t);
2117                     return NULL;
2118                 }
2119                 t->buf = tagBuf;
2120                 tagLen = newTagLength;
2121             }
2122             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
2123             break;
2124         }
2125     }
2126 
2127     size_t parsedLenDelta = 0;
2128     if (grandfatheredLen == 0) {
2129         for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
2130             const char* redundantTag = REDUNDANT[i];
2131             size_t redundantTagLen = uprv_strlen(redundantTag);
2132             // The preferred tag for a redundant tag is always shorter than redundant
2133             // tag. A redundant tag may or may not be followed by other subtags.
2134             // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
2135             if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
2136                 const char* redundantTagEnd = tagBuf + redundantTagLen;
2137                 if (*redundantTagEnd  == '\0' || *redundantTagEnd == SEP) {
2138                     const char* preferredTag = REDUNDANT[i + 1];
2139                     size_t preferredTagLen = uprv_strlen(preferredTag);
2140                     uprv_strncpy(t->buf, preferredTag, preferredTagLen);
2141                     if (*redundantTagEnd == SEP) {
2142                         uprv_memmove(tagBuf + preferredTagLen,
2143                                      redundantTagEnd,
2144                                      tagLen - redundantTagLen + 1);
2145                     } else {
2146                         tagBuf[preferredTagLen] = '\0';
2147                     }
2148                     // parsedLen should be the length of the input
2149                     // before redundantTag is replaced by preferredTag.
2150                     // Save the delta to add it back later.
2151                     parsedLenDelta = redundantTagLen - preferredTagLen;
2152                     break;
2153                 }
2154             }
2155         }
2156     }
2157 
2158     /*
2159      * langtag      =   language
2160      *                  ["-" script]
2161      *                  ["-" region]
2162      *                  *("-" variant)
2163      *                  *("-" extension)
2164      *                  ["-" privateuse]
2165      */
2166 
2167     next = LANG | PRIV;
2168     pNext = pLastGoodPosition = tagBuf;
2169     extlangIdx = 0;
2170     pExtension = NULL;
2171     pExtValueSubtag = NULL;
2172     pExtValueSubtagEnd = NULL;
2173 
2174     while (pNext) {
2175         char *pSep;
2176 
2177         pSubtag = pNext;
2178 
2179         /* locate next separator char */
2180         pSep = pSubtag;
2181         while (*pSep) {
2182             if (*pSep == SEP) {
2183                 break;
2184             }
2185             pSep++;
2186         }
2187         if (*pSep == 0) {
2188             /* last subtag */
2189             pNext = NULL;
2190         } else {
2191             pNext = pSep + 1;
2192         }
2193         subtagLen = (int32_t)(pSep - pSubtag);
2194 
2195         if (next & LANG) {
2196             if (_isLanguageSubtag(pSubtag, subtagLen)) {
2197                 *pSep = 0;  /* terminate */
2198                 // TODO: move deprecated language code handling here.
2199                 t->language = T_CString_toLowerCase(pSubtag);
2200 
2201                 pLastGoodPosition = pSep;
2202                 next = SCRT | REGN | VART | EXTS | PRIV;
2203                 if (subtagLen <= 3)
2204                   next |= EXTL;
2205                 continue;
2206             }
2207         }
2208         if (next & EXTL) {
2209             if (_isExtlangSubtag(pSubtag, subtagLen)) {
2210                 *pSep = 0;
2211                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
2212 
2213                 pLastGoodPosition = pSep;
2214                 if (extlangIdx < 3) {
2215                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2216                 } else {
2217                     next = SCRT | REGN | VART | EXTS | PRIV;
2218                 }
2219                 continue;
2220             }
2221         }
2222         if (next & SCRT) {
2223             if (_isScriptSubtag(pSubtag, subtagLen)) {
2224                 char *p = pSubtag;
2225 
2226                 *pSep = 0;
2227 
2228                 /* to title case */
2229                 *p = uprv_toupper(*p);
2230                 p++;
2231                 for (; *p; p++) {
2232                     *p = uprv_tolower(*p);
2233                 }
2234 
2235                 t->script = pSubtag;
2236 
2237                 pLastGoodPosition = pSep;
2238                 next = REGN | VART | EXTS | PRIV;
2239                 continue;
2240             }
2241         }
2242         if (next & REGN) {
2243             if (_isRegionSubtag(pSubtag, subtagLen)) {
2244                 *pSep = 0;
2245                 // TODO: move deprecated region code handling here.
2246                 t->region = T_CString_toUpperCase(pSubtag);
2247 
2248                 pLastGoodPosition = pSep;
2249                 next = VART | EXTS | PRIV;
2250                 continue;
2251             }
2252         }
2253         if (next & VART) {
2254             if (_isVariantSubtag(pSubtag, subtagLen) ||
2255                (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
2256                 VariantListEntry *var;
2257                 UBool isAdded;
2258 
2259                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
2260                 if (var == NULL) {
2261                     *status = U_MEMORY_ALLOCATION_ERROR;
2262                     goto error;
2263                 }
2264                 *pSep = 0;
2265                 var->variant = T_CString_toUpperCase(pSubtag);
2266                 isAdded = _addVariantToList(&(t->variants), var);
2267                 if (!isAdded) {
2268                     /* duplicated variant entry */
2269                     uprv_free(var);
2270                     break;
2271                 }
2272                 pLastGoodPosition = pSep;
2273                 next = VART | EXTS | PRIV;
2274                 continue;
2275             }
2276         }
2277         if (next & EXTS) {
2278             if (_isExtensionSingleton(pSubtag, subtagLen)) {
2279                 if (pExtension != NULL) {
2280                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2281                         /* the previous extension is incomplete */
2282                         uprv_free(pExtension);
2283                         pExtension = NULL;
2284                         break;
2285                     }
2286 
2287                     /* terminate the previous extension value */
2288                     *pExtValueSubtagEnd = 0;
2289                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2290 
2291                     /* insert the extension to the list */
2292                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2293                         pLastGoodPosition = pExtValueSubtagEnd;
2294                     } else {
2295                         /* stop parsing here */
2296                         uprv_free(pExtension);
2297                         pExtension = NULL;
2298                         break;
2299                     }
2300                 }
2301 
2302                 /* create a new extension */
2303                 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
2304                 if (pExtension == NULL) {
2305                     *status = U_MEMORY_ALLOCATION_ERROR;
2306                     goto error;
2307                 }
2308                 *pSep = 0;
2309                 pExtension->key = T_CString_toLowerCase(pSubtag);
2310                 pExtension->value = NULL;   /* will be set later */
2311 
2312                 /*
2313                  * reset the start and the end location of extension value
2314                  * subtags for this extension
2315                  */
2316                 pExtValueSubtag = NULL;
2317                 pExtValueSubtagEnd = NULL;
2318 
2319                 next = EXTV;
2320                 continue;
2321             }
2322         }
2323         if (next & EXTV) {
2324             if (_isExtensionSubtag(pSubtag, subtagLen)) {
2325                 if (pExtValueSubtag == NULL) {
2326                     /* if the start postion of this extension's value is not yet,
2327                         this one is the first value subtag */
2328                     pExtValueSubtag = pSubtag;
2329                 }
2330 
2331                 /* Mark the end of this subtag */
2332                 pExtValueSubtagEnd = pSep;
2333                 next = EXTS | EXTV | PRIV;
2334 
2335                 continue;
2336             }
2337         }
2338         if (next & PRIV) {
2339             if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
2340                 char *pPrivuseVal;
2341 
2342                 if (pExtension != NULL) {
2343                     /* Process the last extension */
2344                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2345                         /* the previous extension is incomplete */
2346                         uprv_free(pExtension);
2347                         pExtension = NULL;
2348                         break;
2349                     } else {
2350                         /* terminate the previous extension value */
2351                         *pExtValueSubtagEnd = 0;
2352                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2353 
2354                         /* insert the extension to the list */
2355                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2356                             pLastGoodPosition = pExtValueSubtagEnd;
2357                             pExtension = NULL;
2358                         } else {
2359                         /* stop parsing here */
2360                             uprv_free(pExtension);
2361                             pExtension = NULL;
2362                             break;
2363                         }
2364                     }
2365                 }
2366 
2367                 /* The rest of part will be private use value subtags */
2368                 if (pNext == NULL) {
2369                     /* empty private use subtag */
2370                     break;
2371                 }
2372                 /* back up the private use value start position */
2373                 pPrivuseVal = pNext;
2374 
2375                 /* validate private use value subtags */
2376                 while (pNext) {
2377                     pSubtag = pNext;
2378                     pSep = pSubtag;
2379                     while (*pSep) {
2380                         if (*pSep == SEP) {
2381                             break;
2382                         }
2383                         pSep++;
2384                     }
2385                     if (*pSep == 0) {
2386                         /* last subtag */
2387                         pNext = NULL;
2388                     } else {
2389                         pNext = pSep + 1;
2390                     }
2391                     subtagLen = (int32_t)(pSep - pSubtag);
2392 
2393                     if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2394                         *pSep = 0;
2395                         next = VART;
2396                         privateuseVar = TRUE;
2397                         break;
2398                     } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2399                         pLastGoodPosition = pSep;
2400                     } else {
2401                         break;
2402                     }
2403                 }
2404 
2405                 if (next == VART) {
2406                     continue;
2407                 }
2408 
2409                 if (pLastGoodPosition - pPrivuseVal > 0) {
2410                     *pLastGoodPosition = 0;
2411                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2412                 }
2413                 /* No more subtags, exiting the parse loop */
2414                 break;
2415             }
2416             break;
2417         }
2418 
2419         /* If we fell through here, it means this subtag is illegal - quit parsing */
2420         break;
2421     }
2422 
2423     if (pExtension != NULL) {
2424         /* Process the last extension */
2425         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2426             /* the previous extension is incomplete */
2427             uprv_free(pExtension);
2428         } else {
2429             /* terminate the previous extension value */
2430             *pExtValueSubtagEnd = 0;
2431             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2432             /* insert the extension to the list */
2433             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2434                 pLastGoodPosition = pExtValueSubtagEnd;
2435             } else {
2436                 uprv_free(pExtension);
2437             }
2438         }
2439     }
2440 
2441     if (parsedLen != NULL) {
2442         *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
2443             (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
2444     }
2445 
2446     return t;
2447 
2448 error:
2449     ultag_close(t);
2450     return NULL;
2451 }
2452 
2453 /**
2454 * Ticket #12705 - Turn optimization back on.
2455 */
2456 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2457 #pragma optimize( "", on )
2458 #endif
2459 
2460 static void
ultag_close(ULanguageTag * langtag)2461 ultag_close(ULanguageTag* langtag) {
2462 
2463     if (langtag == NULL) {
2464         return;
2465     }
2466 
2467     uprv_free(langtag->buf);
2468 
2469     if (langtag->variants) {
2470         VariantListEntry *curVar = langtag->variants;
2471         while (curVar) {
2472             VariantListEntry *nextVar = curVar->next;
2473             uprv_free(curVar);
2474             curVar = nextVar;
2475         }
2476     }
2477 
2478     if (langtag->extensions) {
2479         ExtensionListEntry *curExt = langtag->extensions;
2480         while (curExt) {
2481             ExtensionListEntry *nextExt = curExt->next;
2482             uprv_free(curExt);
2483             curExt = nextExt;
2484         }
2485     }
2486 
2487     uprv_free(langtag);
2488 }
2489 
2490 static const char*
ultag_getLanguage(const ULanguageTag * langtag)2491 ultag_getLanguage(const ULanguageTag* langtag) {
2492     return langtag->language;
2493 }
2494 
2495 #if 0
2496 static const char*
2497 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2498     int32_t i;
2499     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2500         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2501             return DEPRECATEDLANGS[i + 1];
2502         }
2503     }
2504     return langtag->language;
2505 }
2506 #endif
2507 
2508 static const char*
ultag_getExtlang(const ULanguageTag * langtag,int32_t idx)2509 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2510     if (idx >= 0 && idx < MAXEXTLANG) {
2511         return langtag->extlang[idx];
2512     }
2513     return NULL;
2514 }
2515 
2516 static int32_t
ultag_getExtlangSize(const ULanguageTag * langtag)2517 ultag_getExtlangSize(const ULanguageTag* langtag) {
2518     int32_t size = 0;
2519     int32_t i;
2520     for (i = 0; i < MAXEXTLANG; i++) {
2521         if (langtag->extlang[i]) {
2522             size++;
2523         }
2524     }
2525     return size;
2526 }
2527 
2528 static const char*
ultag_getScript(const ULanguageTag * langtag)2529 ultag_getScript(const ULanguageTag* langtag) {
2530     return langtag->script;
2531 }
2532 
2533 static const char*
ultag_getRegion(const ULanguageTag * langtag)2534 ultag_getRegion(const ULanguageTag* langtag) {
2535     return langtag->region;
2536 }
2537 
2538 static const char*
ultag_getVariant(const ULanguageTag * langtag,int32_t idx)2539 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2540     const char *var = NULL;
2541     VariantListEntry *cur = langtag->variants;
2542     int32_t i = 0;
2543     while (cur) {
2544         if (i == idx) {
2545             var = cur->variant;
2546             break;
2547         }
2548         cur = cur->next;
2549         i++;
2550     }
2551     return var;
2552 }
2553 
2554 static int32_t
ultag_getVariantsSize(const ULanguageTag * langtag)2555 ultag_getVariantsSize(const ULanguageTag* langtag) {
2556     int32_t size = 0;
2557     VariantListEntry *cur = langtag->variants;
2558     while (TRUE) {
2559         if (cur == NULL) {
2560             break;
2561         }
2562         size++;
2563         cur = cur->next;
2564     }
2565     return size;
2566 }
2567 
2568 static const char*
ultag_getExtensionKey(const ULanguageTag * langtag,int32_t idx)2569 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2570     const char *key = NULL;
2571     ExtensionListEntry *cur = langtag->extensions;
2572     int32_t i = 0;
2573     while (cur) {
2574         if (i == idx) {
2575             key = cur->key;
2576             break;
2577         }
2578         cur = cur->next;
2579         i++;
2580     }
2581     return key;
2582 }
2583 
2584 static const char*
ultag_getExtensionValue(const ULanguageTag * langtag,int32_t idx)2585 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2586     const char *val = NULL;
2587     ExtensionListEntry *cur = langtag->extensions;
2588     int32_t i = 0;
2589     while (cur) {
2590         if (i == idx) {
2591             val = cur->value;
2592             break;
2593         }
2594         cur = cur->next;
2595         i++;
2596     }
2597     return val;
2598 }
2599 
2600 static int32_t
ultag_getExtensionsSize(const ULanguageTag * langtag)2601 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2602     int32_t size = 0;
2603     ExtensionListEntry *cur = langtag->extensions;
2604     while (TRUE) {
2605         if (cur == NULL) {
2606             break;
2607         }
2608         size++;
2609         cur = cur->next;
2610     }
2611     return size;
2612 }
2613 
2614 static const char*
ultag_getPrivateUse(const ULanguageTag * langtag)2615 ultag_getPrivateUse(const ULanguageTag* langtag) {
2616     return langtag->privateuse;
2617 }
2618 
2619 #if 0
2620 static const char*
2621 ultag_getGrandfathered(const ULanguageTag* langtag) {
2622     return langtag->grandfathered;
2623 }
2624 #endif
2625 
2626 
2627 /*
2628 * -------------------------------------------------
2629 *
2630 * Locale/BCP47 conversion APIs, exposed as uloc_*
2631 *
2632 * -------------------------------------------------
2633 */
2634 U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char * localeID,char * langtag,int32_t langtagCapacity,UBool strict,UErrorCode * status)2635 uloc_toLanguageTag(const char* localeID,
2636                    char* langtag,
2637                    int32_t langtagCapacity,
2638                    UBool strict,
2639                    UErrorCode* status) {
2640     icu::CharString canonical;
2641     int32_t reslen;
2642     UErrorCode tmpStatus = U_ZERO_ERROR;
2643     UBool hadPosix = FALSE;
2644     const char* pKeywordStart;
2645 
2646     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
2647     int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
2648     if (resultCapacity > 0) {
2649         char* buffer;
2650 
2651         for (;;) {
2652             buffer = canonical.getAppendBuffer(
2653                     /*minCapacity=*/resultCapacity,
2654                     /*desiredCapacityHint=*/resultCapacity,
2655                     resultCapacity,
2656                     tmpStatus);
2657 
2658             if (U_FAILURE(tmpStatus)) {
2659                 *status = tmpStatus;
2660                 return 0;
2661             }
2662 
2663             reslen =
2664                 uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
2665 
2666             if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
2667                 break;
2668             }
2669 
2670             resultCapacity = reslen;
2671             tmpStatus = U_ZERO_ERROR;
2672         }
2673 
2674         if (U_FAILURE(tmpStatus)) {
2675             *status = U_ILLEGAL_ARGUMENT_ERROR;
2676             return 0;
2677         }
2678 
2679         canonical.append(buffer, reslen, tmpStatus);
2680         if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
2681             tmpStatus = U_ZERO_ERROR;  // Terminators provided by CharString.
2682         }
2683 
2684         if (U_FAILURE(tmpStatus)) {
2685             *status = tmpStatus;
2686             return 0;
2687         }
2688     }
2689 
2690     reslen = 0;
2691 
2692     /* For handling special case - private use only tag */
2693     pKeywordStart = locale_getKeywordsStart(canonical.data());
2694     if (pKeywordStart == canonical.data()) {
2695         UEnumeration *kwdEnum;
2696         int kwdCnt = 0;
2697         UBool done = FALSE;
2698 
2699         kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
2700         if (kwdEnum != NULL) {
2701             kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2702             if (kwdCnt == 1) {
2703                 const char *key;
2704                 int32_t len = 0;
2705 
2706                 key = uenum_next(kwdEnum, &len, &tmpStatus);
2707                 if (len == 1 && *key == PRIVATEUSE) {
2708                     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2709                     buf[0] = PRIVATEUSE;
2710                     buf[1] = SEP;
2711                     len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2712                     if (U_SUCCESS(tmpStatus)) {
2713                         if (_isPrivateuseValueSubtags(&buf[2], len)) {
2714                             /* return private use only tag */
2715                             reslen = len + 2;
2716                             uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2717                             u_terminateChars(langtag, langtagCapacity, reslen, status);
2718                             done = TRUE;
2719                         } else if (strict) {
2720                             *status = U_ILLEGAL_ARGUMENT_ERROR;
2721                             done = TRUE;
2722                         }
2723                         /* if not strict mode, then "und" will be returned */
2724                     } else {
2725                         *status = U_ILLEGAL_ARGUMENT_ERROR;
2726                         done = TRUE;
2727                     }
2728                 }
2729             }
2730             uenum_close(kwdEnum);
2731             if (done) {
2732                 return reslen;
2733             }
2734         }
2735     }
2736 
2737     reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
2738     reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
2739     reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
2740     reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2741     reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2742     reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2743 
2744     return reslen;
2745 }
2746 
2747 
2748 U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char * langtag,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2749 uloc_forLanguageTag(const char* langtag,
2750                     char* localeID,
2751                     int32_t localeIDCapacity,
2752                     int32_t* parsedLength,
2753                     UErrorCode* status) {
2754     return ulocimp_forLanguageTag(
2755             langtag,
2756             -1,
2757             localeID,
2758             localeIDCapacity,
2759             parsedLength,
2760             status);
2761 }
2762 
2763 
2764 U_CAPI int32_t U_EXPORT2
ulocimp_forLanguageTag(const char * langtag,int32_t tagLen,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2765 ulocimp_forLanguageTag(const char* langtag,
2766                        int32_t tagLen,
2767                        char* localeID,
2768                        int32_t localeIDCapacity,
2769                        int32_t* parsedLength,
2770                        UErrorCode* status) {
2771     ULanguageTag *lt;
2772     int32_t reslen = 0;
2773     const char *subtag, *p;
2774     int32_t len;
2775     int32_t i, n;
2776     UBool noRegion = TRUE;
2777 
2778     lt = ultag_parse(langtag, tagLen, parsedLength, status);
2779     if (U_FAILURE(*status)) {
2780         return 0;
2781     }
2782 
2783     /* language */
2784     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2785     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2786         len = (int32_t)uprv_strlen(subtag);
2787         if (len > 0) {
2788             if (reslen < localeIDCapacity) {
2789                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2790             }
2791             reslen += len;
2792         }
2793     }
2794 
2795     /* script */
2796     subtag = ultag_getScript(lt);
2797     len = (int32_t)uprv_strlen(subtag);
2798     if (len > 0) {
2799         if (reslen < localeIDCapacity) {
2800             *(localeID + reslen) = LOCALE_SEP;
2801         }
2802         reslen++;
2803 
2804         /* write out the script in title case */
2805         p = subtag;
2806         while (*p) {
2807             if (reslen < localeIDCapacity) {
2808                 if (p == subtag) {
2809                     *(localeID + reslen) = uprv_toupper(*p);
2810                 } else {
2811                     *(localeID + reslen) = *p;
2812                 }
2813             }
2814             reslen++;
2815             p++;
2816         }
2817     }
2818 
2819     /* region */
2820     subtag = ultag_getRegion(lt);
2821     len = (int32_t)uprv_strlen(subtag);
2822     if (len > 0) {
2823         if (reslen < localeIDCapacity) {
2824             *(localeID + reslen) = LOCALE_SEP;
2825         }
2826         reslen++;
2827         /* write out the retion in upper case */
2828         p = subtag;
2829         while (*p) {
2830             if (reslen < localeIDCapacity) {
2831                 *(localeID + reslen) = uprv_toupper(*p);
2832             }
2833             reslen++;
2834             p++;
2835         }
2836         noRegion = FALSE;
2837     }
2838 
2839     /* variants */
2840     n = ultag_getVariantsSize(lt);
2841     if (n > 0) {
2842         if (noRegion) {
2843             if (reslen < localeIDCapacity) {
2844                 *(localeID + reslen) = LOCALE_SEP;
2845             }
2846             reslen++;
2847         }
2848 
2849         for (i = 0; i < n; i++) {
2850             subtag = ultag_getVariant(lt, i);
2851             if (reslen < localeIDCapacity) {
2852                 *(localeID + reslen) = LOCALE_SEP;
2853             }
2854             reslen++;
2855             /* write out the variant in upper case */
2856             p = subtag;
2857             while (*p) {
2858                 if (reslen < localeIDCapacity) {
2859                     *(localeID + reslen) = uprv_toupper(*p);
2860                 }
2861                 reslen++;
2862                 p++;
2863             }
2864         }
2865     }
2866 
2867     /* keywords */
2868     n = ultag_getExtensionsSize(lt);
2869     subtag = ultag_getPrivateUse(lt);
2870     if (n > 0 || uprv_strlen(subtag) > 0) {
2871         if (reslen == 0 && n > 0) {
2872             /* need a language */
2873             if (reslen < localeIDCapacity) {
2874                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2875             }
2876             reslen += LANG_UND_LEN;
2877         }
2878         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2879         reslen += len;
2880     }
2881 
2882     ultag_close(lt);
2883     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2884 }
2885 
2886 
2887