• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2009-2015, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 
10 #include "unicode/utypes.h"
11 #include "unicode/ures.h"
12 #include "unicode/putil.h"
13 #include "unicode/uloc.h"
14 #include "ustr_imp.h"
15 #include "cmemory.h"
16 #include "cstring.h"
17 #include "putilimp.h"
18 #include "uinvchar.h"
19 #include "ulocimp.h"
20 #include "uassert.h"
21 
22 
23 /* struct holding a single variant */
24 typedef struct VariantListEntry {
25     const char              *variant;
26     struct VariantListEntry *next;
27 } VariantListEntry;
28 
29 /* struct holding a single attribute value */
30 typedef struct AttributeListEntry {
31     const char              *attribute;
32     struct AttributeListEntry *next;
33 } AttributeListEntry;
34 
35 /* struct holding a single extension */
36 typedef struct ExtensionListEntry {
37     const char                  *key;
38     const char                  *value;
39     struct ExtensionListEntry   *next;
40 } ExtensionListEntry;
41 
42 #define MAXEXTLANG 3
43 typedef struct ULanguageTag {
44     char                *buf;   /* holding parsed subtags */
45     const char          *language;
46     const char          *extlang[MAXEXTLANG];
47     const char          *script;
48     const char          *region;
49     VariantListEntry    *variants;
50     ExtensionListEntry  *extensions;
51     const char          *privateuse;
52     const char          *grandfathered;
53 } ULanguageTag;
54 
55 #define MINLEN 2
56 #define SEP '-'
57 #define PRIVATEUSE 'x'
58 #define LDMLEXT 'u'
59 
60 #define LOCALE_SEP '_'
61 #define LOCALE_EXT_SEP '@'
62 #define LOCALE_KEYWORD_SEP ';'
63 #define LOCALE_KEY_TYPE_SEP '='
64 
65 #define ISALPHA(c) uprv_isASCIILetter(c)
66 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
67 
68 static const char EMPTY[] = "";
69 static const char LANG_UND[] = "und";
70 static const char PRIVATEUSE_KEY[] = "x";
71 static const char _POSIX[] = "_POSIX";
72 static const char POSIX_KEY[] = "va";
73 static const char POSIX_VALUE[] = "posix";
74 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
75 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
76 static const char LOCALE_TYPE_YES[] = "yes";
77 
78 #define LANG_UND_LEN 3
79 
80 static const char* const GRANDFATHERED[] = {
81 /*  grandfathered   preferred */
82     "art-lojban",   "jbo",
83     "cel-gaulish",  "xtg-x-cel-gaulish",
84     "en-GB-oed",    "en-GB-x-oed",
85     "i-ami",        "ami",
86     "i-bnn",        "bnn",
87     "i-default",    "en-x-i-default",
88     "i-enochian",   "und-x-i-enochian",
89     "i-hak",        "hak",
90     "i-klingon",    "tlh",
91     "i-lux",        "lb",
92     "i-mingo",      "see-x-i-mingo",
93     "i-navajo",     "nv",
94     "i-pwn",        "pwn",
95     "i-tao",        "tao",
96     "i-tay",        "tay",
97     "i-tsu",        "tsu",
98     "no-bok",       "nb",
99     "no-nyn",       "nn",
100     "sgn-be-fr",    "sfb",
101     "sgn-be-nl",    "vgt",
102     "sgn-ch-de",    "sgg",
103     "zh-guoyu",     "cmn",
104     "zh-hakka",     "hak",
105     "zh-min",       "nan-x-zh-min",
106     "zh-min-nan",   "nan",
107     "zh-xiang",     "hsn",
108     NULL,           NULL
109 };
110 
111 static const char DEPRECATEDLANGS[][4] = {
112 /*  deprecated  new */
113     "iw",       "he",
114     "ji",       "yi",
115     "in",       "id"
116 };
117 
118 /*
119 * -------------------------------------------------
120 *
121 * These ultag_ functions may be exposed as APIs later
122 *
123 * -------------------------------------------------
124 */
125 
126 static ULanguageTag*
127 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
128 
129 static void
130 ultag_close(ULanguageTag* langtag);
131 
132 static const char*
133 ultag_getLanguage(const ULanguageTag* langtag);
134 
135 #if 0
136 static const char*
137 ultag_getJDKLanguage(const ULanguageTag* langtag);
138 #endif
139 
140 static const char*
141 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
142 
143 static int32_t
144 ultag_getExtlangSize(const ULanguageTag* langtag);
145 
146 static const char*
147 ultag_getScript(const ULanguageTag* langtag);
148 
149 static const char*
150 ultag_getRegion(const ULanguageTag* langtag);
151 
152 static const char*
153 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
154 
155 static int32_t
156 ultag_getVariantsSize(const ULanguageTag* langtag);
157 
158 static const char*
159 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
160 
161 static const char*
162 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
163 
164 static int32_t
165 ultag_getExtensionsSize(const ULanguageTag* langtag);
166 
167 static const char*
168 ultag_getPrivateUse(const ULanguageTag* langtag);
169 
170 #if 0
171 static const char*
172 ultag_getGrandfathered(const ULanguageTag* langtag);
173 #endif
174 
175 /*
176 * -------------------------------------------------
177 *
178 * Language subtag syntax validation functions
179 *
180 * -------------------------------------------------
181 */
182 
183 static UBool
_isAlphaString(const char * s,int32_t len)184 _isAlphaString(const char* s, int32_t len) {
185     int32_t i;
186     for (i = 0; i < len; i++) {
187         if (!ISALPHA(*(s + i))) {
188             return FALSE;
189         }
190     }
191     return TRUE;
192 }
193 
194 static UBool
_isNumericString(const char * s,int32_t len)195 _isNumericString(const char* s, int32_t len) {
196     int32_t i;
197     for (i = 0; i < len; i++) {
198         if (!ISNUMERIC(*(s + i))) {
199             return FALSE;
200         }
201     }
202     return TRUE;
203 }
204 
205 static UBool
_isAlphaNumericString(const char * s,int32_t len)206 _isAlphaNumericString(const char* s, int32_t len) {
207     int32_t i;
208     for (i = 0; i < len; i++) {
209         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
210             return FALSE;
211         }
212     }
213     return TRUE;
214 }
215 
216 static UBool
_isLanguageSubtag(const char * s,int32_t len)217 _isLanguageSubtag(const char* s, int32_t len) {
218     /*
219      * language      = 2*3ALPHA            ; shortest ISO 639 code
220      *                 ["-" extlang]       ; sometimes followed by
221      *                                     ;   extended language subtags
222      *               / 4ALPHA              ; or reserved for future use
223      *               / 5*8ALPHA            ; or registered language subtag
224      */
225     if (len < 0) {
226         len = (int32_t)uprv_strlen(s);
227     }
228     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
229         return TRUE;
230     }
231     return FALSE;
232 }
233 
234 static UBool
_isExtlangSubtag(const char * s,int32_t len)235 _isExtlangSubtag(const char* s, int32_t len) {
236     /*
237      * extlang       = 3ALPHA              ; selected ISO 639 codes
238      *                 *2("-" 3ALPHA)      ; permanently reserved
239      */
240     if (len < 0) {
241         len = (int32_t)uprv_strlen(s);
242     }
243     if (len == 3 && _isAlphaString(s, len)) {
244         return TRUE;
245     }
246     return FALSE;
247 }
248 
249 static UBool
_isScriptSubtag(const char * s,int32_t len)250 _isScriptSubtag(const char* s, int32_t len) {
251     /*
252      * script        = 4ALPHA              ; ISO 15924 code
253      */
254     if (len < 0) {
255         len = (int32_t)uprv_strlen(s);
256     }
257     if (len == 4 && _isAlphaString(s, len)) {
258         return TRUE;
259     }
260     return FALSE;
261 }
262 
263 static UBool
_isRegionSubtag(const char * s,int32_t len)264 _isRegionSubtag(const char* s, int32_t len) {
265     /*
266      * region        = 2ALPHA              ; ISO 3166-1 code
267      *               / 3DIGIT              ; UN M.49 code
268      */
269     if (len < 0) {
270         len = (int32_t)uprv_strlen(s);
271     }
272     if (len == 2 && _isAlphaString(s, len)) {
273         return TRUE;
274     }
275     if (len == 3 && _isNumericString(s, len)) {
276         return TRUE;
277     }
278     return FALSE;
279 }
280 
281 static UBool
_isVariantSubtag(const char * s,int32_t len)282 _isVariantSubtag(const char* s, int32_t len) {
283     /*
284      * variant       = 5*8alphanum         ; registered variants
285      *               / (DIGIT 3alphanum)
286      */
287     if (len < 0) {
288         len = (int32_t)uprv_strlen(s);
289     }
290     if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
291         return TRUE;
292     }
293     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
294         return TRUE;
295     }
296     return FALSE;
297 }
298 
299 static UBool
_isPrivateuseVariantSubtag(const char * s,int32_t len)300 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
301     /*
302      * variant       = 1*8alphanum         ; registered variants
303      *               / (DIGIT 3alphanum)
304      */
305     if (len < 0) {
306         len = (int32_t)uprv_strlen(s);
307     }
308     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
309         return TRUE;
310     }
311     return FALSE;
312 }
313 
314 static UBool
_isExtensionSingleton(const char * s,int32_t len)315 _isExtensionSingleton(const char* s, int32_t len) {
316     /*
317      * extension     = singleton 1*("-" (2*8alphanum))
318      */
319     if (len < 0) {
320         len = (int32_t)uprv_strlen(s);
321     }
322     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
323         return TRUE;
324     }
325     return FALSE;
326 }
327 
328 static UBool
_isExtensionSubtag(const char * s,int32_t len)329 _isExtensionSubtag(const char* s, int32_t len) {
330     /*
331      * extension     = singleton 1*("-" (2*8alphanum))
332      */
333     if (len < 0) {
334         len = (int32_t)uprv_strlen(s);
335     }
336     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
337         return TRUE;
338     }
339     return FALSE;
340 }
341 
342 static UBool
_isExtensionSubtags(const char * s,int32_t len)343 _isExtensionSubtags(const char* s, int32_t len) {
344     const char *p = s;
345     const char *pSubtag = NULL;
346 
347     if (len < 0) {
348         len = (int32_t)uprv_strlen(s);
349     }
350 
351     while ((p - s) < len) {
352         if (*p == SEP) {
353             if (pSubtag == NULL) {
354                 return FALSE;
355             }
356             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
357                 return FALSE;
358             }
359             pSubtag = NULL;
360         } else if (pSubtag == NULL) {
361             pSubtag = p;
362         }
363         p++;
364     }
365     if (pSubtag == NULL) {
366         return FALSE;
367     }
368     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
369 }
370 
371 static UBool
_isPrivateuseValueSubtag(const char * s,int32_t len)372 _isPrivateuseValueSubtag(const char* s, int32_t len) {
373     /*
374      * privateuse    = "x" 1*("-" (1*8alphanum))
375      */
376     if (len < 0) {
377         len = (int32_t)uprv_strlen(s);
378     }
379     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
380         return TRUE;
381     }
382     return FALSE;
383 }
384 
385 static UBool
_isPrivateuseValueSubtags(const char * s,int32_t len)386 _isPrivateuseValueSubtags(const char* s, int32_t len) {
387     const char *p = s;
388     const char *pSubtag = NULL;
389 
390     if (len < 0) {
391         len = (int32_t)uprv_strlen(s);
392     }
393 
394     while ((p - s) < len) {
395         if (*p == SEP) {
396             if (pSubtag == NULL) {
397                 return FALSE;
398             }
399             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
400                 return FALSE;
401             }
402             pSubtag = NULL;
403         } else if (pSubtag == NULL) {
404             pSubtag = p;
405         }
406         p++;
407     }
408     if (pSubtag == NULL) {
409         return FALSE;
410     }
411     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
412 }
413 
414 U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char * s,int32_t len)415 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
416     if (len < 0) {
417         len = (int32_t)uprv_strlen(s);
418     }
419     if (len == 2 && _isAlphaNumericString(s, len)) {
420         return TRUE;
421     }
422     return FALSE;
423 }
424 
425 U_CFUNC UBool
ultag_isUnicodeLocaleType(const char * s,int32_t len)426 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
427     const char* p;
428     int32_t subtagLen = 0;
429 
430     if (len < 0) {
431         len = (int32_t)uprv_strlen(s);
432     }
433 
434     for (p = s; len > 0; p++, len--) {
435         if (*p == SEP) {
436             if (subtagLen < 3) {
437                 return FALSE;
438             }
439             subtagLen = 0;
440         } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
441             subtagLen++;
442             if (subtagLen > 8) {
443                 return FALSE;
444             }
445         } else {
446             return FALSE;
447         }
448     }
449 
450     return (subtagLen >= 3);
451 }
452 /*
453 * -------------------------------------------------
454 *
455 * Helper functions
456 *
457 * -------------------------------------------------
458 */
459 
460 static UBool
_addVariantToList(VariantListEntry ** first,VariantListEntry * var)461 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
462     UBool bAdded = TRUE;
463 
464     if (*first == NULL) {
465         var->next = NULL;
466         *first = var;
467     } else {
468         VariantListEntry *prev, *cur;
469         int32_t cmp;
470 
471         /* variants order should be preserved */
472         prev = NULL;
473         cur = *first;
474         while (TRUE) {
475             if (cur == NULL) {
476                 prev->next = var;
477                 var->next = NULL;
478                 break;
479             }
480 
481             /* Checking for duplicate variant */
482             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
483             if (cmp == 0) {
484                 /* duplicated variant */
485                 bAdded = FALSE;
486                 break;
487             }
488             prev = cur;
489             cur = cur->next;
490         }
491     }
492 
493     return bAdded;
494 }
495 
496 static UBool
_addAttributeToList(AttributeListEntry ** first,AttributeListEntry * attr)497 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
498     UBool bAdded = TRUE;
499 
500     if (*first == NULL) {
501         attr->next = NULL;
502         *first = attr;
503     } else {
504         AttributeListEntry *prev, *cur;
505         int32_t cmp;
506 
507         /* reorder variants in alphabetical order */
508         prev = NULL;
509         cur = *first;
510         while (TRUE) {
511             if (cur == NULL) {
512                 prev->next = attr;
513                 attr->next = NULL;
514                 break;
515             }
516             cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
517             if (cmp < 0) {
518                 if (prev == NULL) {
519                     *first = attr;
520                 } else {
521                     prev->next = attr;
522                 }
523                 attr->next = cur;
524                 break;
525             }
526             if (cmp == 0) {
527                 /* duplicated variant */
528                 bAdded = FALSE;
529                 break;
530             }
531             prev = cur;
532             cur = cur->next;
533         }
534     }
535 
536     return bAdded;
537 }
538 
539 
540 static UBool
_addExtensionToList(ExtensionListEntry ** first,ExtensionListEntry * ext,UBool localeToBCP)541 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
542     UBool bAdded = TRUE;
543 
544     if (*first == NULL) {
545         ext->next = NULL;
546         *first = ext;
547     } else {
548         ExtensionListEntry *prev, *cur;
549         int32_t cmp;
550 
551         /* reorder variants in alphabetical order */
552         prev = NULL;
553         cur = *first;
554         while (TRUE) {
555             if (cur == NULL) {
556                 prev->next = ext;
557                 ext->next = NULL;
558                 break;
559             }
560             if (localeToBCP) {
561                 /* special handling for locale to bcp conversion */
562                 int32_t len, curlen;
563 
564                 len = (int32_t)uprv_strlen(ext->key);
565                 curlen = (int32_t)uprv_strlen(cur->key);
566 
567                 if (len == 1 && curlen == 1) {
568                     if (*(ext->key) == *(cur->key)) {
569                         cmp = 0;
570                     } else if (*(ext->key) == PRIVATEUSE) {
571                         cmp = 1;
572                     } else if (*(cur->key) == PRIVATEUSE) {
573                         cmp = -1;
574                     } else {
575                         cmp = *(ext->key) - *(cur->key);
576                     }
577                 } else if (len == 1) {
578                     cmp = *(ext->key) - LDMLEXT;
579                 } else if (curlen == 1) {
580                     cmp = LDMLEXT - *(cur->key);
581                 } else {
582                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
583                     /* Both are u extension keys - we need special handling for 'attribute' */
584                     if (cmp != 0) {
585                         if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
586                             cmp = 1;
587                         } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
588                             cmp = -1;
589                         }
590                     }
591                 }
592             } else {
593                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
594             }
595             if (cmp < 0) {
596                 if (prev == NULL) {
597                     *first = ext;
598                 } else {
599                     prev->next = ext;
600                 }
601                 ext->next = cur;
602                 break;
603             }
604             if (cmp == 0) {
605                 /* duplicated extension key */
606                 bAdded = FALSE;
607                 break;
608             }
609             prev = cur;
610             cur = cur->next;
611         }
612     }
613 
614     return bAdded;
615 }
616 
617 static void
_initializeULanguageTag(ULanguageTag * langtag)618 _initializeULanguageTag(ULanguageTag* langtag) {
619     int32_t i;
620 
621     langtag->buf = NULL;
622 
623     langtag->language = EMPTY;
624     for (i = 0; i < MAXEXTLANG; i++) {
625         langtag->extlang[i] = NULL;
626     }
627 
628     langtag->script = EMPTY;
629     langtag->region = EMPTY;
630 
631     langtag->variants = NULL;
632     langtag->extensions = NULL;
633 
634     langtag->grandfathered = EMPTY;
635     langtag->privateuse = EMPTY;
636 }
637 
638 static int32_t
_appendLanguageToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)639 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
640     char buf[ULOC_LANG_CAPACITY];
641     UErrorCode tmpStatus = U_ZERO_ERROR;
642     int32_t len, i;
643     int32_t reslen = 0;
644 
645     if (U_FAILURE(*status)) {
646         return 0;
647     }
648 
649     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
650     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
651         if (strict) {
652             *status = U_ILLEGAL_ARGUMENT_ERROR;
653             return 0;
654         }
655         len = 0;
656     }
657 
658     /* Note: returned language code is in lower case letters */
659 
660     if (len == 0) {
661         if (reslen < capacity) {
662             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
663         }
664         reslen += LANG_UND_LEN;
665     } else if (!_isLanguageSubtag(buf, len)) {
666             /* invalid language code */
667         if (strict) {
668             *status = U_ILLEGAL_ARGUMENT_ERROR;
669             return 0;
670         }
671         if (reslen < capacity) {
672             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
673         }
674         reslen += LANG_UND_LEN;
675     } else {
676         /* resolve deprecated */
677         for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
678             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
679                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
680                 len = (int32_t)uprv_strlen(buf);
681                 break;
682             }
683         }
684         if (reslen < capacity) {
685             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
686         }
687         reslen += len;
688     }
689     u_terminateChars(appendAt, capacity, reslen, status);
690     return reslen;
691 }
692 
693 static int32_t
_appendScriptToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)694 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
695     char buf[ULOC_SCRIPT_CAPACITY];
696     UErrorCode tmpStatus = U_ZERO_ERROR;
697     int32_t len;
698     int32_t reslen = 0;
699 
700     if (U_FAILURE(*status)) {
701         return 0;
702     }
703 
704     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
705     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
706         if (strict) {
707             *status = U_ILLEGAL_ARGUMENT_ERROR;
708         }
709         return 0;
710     }
711 
712     if (len > 0) {
713         if (!_isScriptSubtag(buf, len)) {
714             /* invalid script code */
715             if (strict) {
716                 *status = U_ILLEGAL_ARGUMENT_ERROR;
717             }
718             return 0;
719         } else {
720             if (reslen < capacity) {
721                 *(appendAt + reslen) = SEP;
722             }
723             reslen++;
724 
725             if (reslen < capacity) {
726                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
727             }
728             reslen += len;
729         }
730     }
731     u_terminateChars(appendAt, capacity, reslen, status);
732     return reslen;
733 }
734 
735 static int32_t
_appendRegionToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)736 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
737     char buf[ULOC_COUNTRY_CAPACITY];
738     UErrorCode tmpStatus = U_ZERO_ERROR;
739     int32_t len;
740     int32_t reslen = 0;
741 
742     if (U_FAILURE(*status)) {
743         return 0;
744     }
745 
746     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
747     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
748         if (strict) {
749             *status = U_ILLEGAL_ARGUMENT_ERROR;
750         }
751         return 0;
752     }
753 
754     if (len > 0) {
755         if (!_isRegionSubtag(buf, len)) {
756             /* invalid region code */
757             if (strict) {
758                 *status = U_ILLEGAL_ARGUMENT_ERROR;
759             }
760             return 0;
761         } else {
762             if (reslen < capacity) {
763                 *(appendAt + reslen) = SEP;
764             }
765             reslen++;
766 
767             if (reslen < capacity) {
768                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
769             }
770             reslen += len;
771         }
772     }
773     u_terminateChars(appendAt, capacity, reslen, status);
774     return reslen;
775 }
776 
777 static int32_t
_appendVariantsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool * hadPosix,UErrorCode * status)778 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
779     char buf[ULOC_FULLNAME_CAPACITY];
780     UErrorCode tmpStatus = U_ZERO_ERROR;
781     int32_t len, i;
782     int32_t reslen = 0;
783 
784     if (U_FAILURE(*status)) {
785         return 0;
786     }
787 
788     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
789     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
790         if (strict) {
791             *status = U_ILLEGAL_ARGUMENT_ERROR;
792         }
793         return 0;
794     }
795 
796     if (len > 0) {
797         char *p, *pVar;
798         UBool bNext = TRUE;
799         VariantListEntry *var;
800         VariantListEntry *varFirst = NULL;
801 
802         pVar = NULL;
803         p = buf;
804         while (bNext) {
805             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
806                 if (*p == 0) {
807                     bNext = FALSE;
808                 } else {
809                     *p = 0; /* terminate */
810                 }
811                 if (pVar == NULL) {
812                     if (strict) {
813                         *status = U_ILLEGAL_ARGUMENT_ERROR;
814                         break;
815                     }
816                     /* ignore empty variant */
817                 } else {
818                     /* ICU uses upper case letters for variants, but
819                        the canonical format is lowercase in BCP47 */
820                     for (i = 0; *(pVar + i) != 0; i++) {
821                         *(pVar + i) = uprv_tolower(*(pVar + i));
822                     }
823 
824                     /* validate */
825                     if (_isVariantSubtag(pVar, -1)) {
826                         if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
827                             /* emit the variant to the list */
828                             var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
829                             if (var == NULL) {
830                                 *status = U_MEMORY_ALLOCATION_ERROR;
831                                 break;
832                             }
833                             var->variant = pVar;
834                             if (!_addVariantToList(&varFirst, var)) {
835                                 /* duplicated variant */
836                                 uprv_free(var);
837                                 if (strict) {
838                                     *status = U_ILLEGAL_ARGUMENT_ERROR;
839                                     break;
840                                 }
841                             }
842                         } else {
843                             /* Special handling for POSIX variant, need to remember that we had it and then */
844                             /* treat it like an extension later. */
845                             *hadPosix = TRUE;
846                         }
847                     } else if (strict) {
848                         *status = U_ILLEGAL_ARGUMENT_ERROR;
849                         break;
850                     } else if (_isPrivateuseValueSubtag(pVar, -1)) {
851                         /* Handle private use subtags separately */
852                         break;
853                     }
854                 }
855                 /* reset variant starting position */
856                 pVar = NULL;
857             } else if (pVar == NULL) {
858                 pVar = p;
859             }
860             p++;
861         }
862 
863         if (U_SUCCESS(*status)) {
864             if (varFirst != NULL) {
865                 int32_t varLen;
866 
867                 /* write out validated/normalized variants to the target */
868                 var = varFirst;
869                 while (var != NULL) {
870                     if (reslen < capacity) {
871                         *(appendAt + reslen) = SEP;
872                     }
873                     reslen++;
874                     varLen = (int32_t)uprv_strlen(var->variant);
875                     if (reslen < capacity) {
876                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
877                     }
878                     reslen += varLen;
879                     var = var->next;
880                 }
881             }
882         }
883 
884         /* clean up */
885         var = varFirst;
886         while (var != NULL) {
887             VariantListEntry *tmpVar = var->next;
888             uprv_free(var);
889             var = tmpVar;
890         }
891 
892         if (U_FAILURE(*status)) {
893             return 0;
894         }
895     }
896 
897     u_terminateChars(appendAt, capacity, reslen, status);
898     return reslen;
899 }
900 
901 static int32_t
_appendKeywordsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)902 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
903     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
904     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
905     int32_t attrBufLength = 0;
906     UEnumeration *keywordEnum = NULL;
907     int32_t reslen = 0;
908 
909     keywordEnum = uloc_openKeywords(localeID, status);
910     if (U_FAILURE(*status) && !hadPosix) {
911         uenum_close(keywordEnum);
912         return 0;
913     }
914     if (keywordEnum != NULL || hadPosix) {
915         /* reorder extensions */
916         int32_t len;
917         const char *key;
918         ExtensionListEntry *firstExt = NULL;
919         ExtensionListEntry *ext;
920         AttributeListEntry *firstAttr = NULL;
921         AttributeListEntry *attr;
922         char *attrValue;
923         char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
924         char *pExtBuf = extBuf;
925         int32_t extBufCapacity = sizeof(extBuf);
926         const char *bcpKey, *bcpValue;
927         UErrorCode tmpStatus = U_ZERO_ERROR;
928         int32_t keylen;
929         UBool isBcpUExt;
930 
931         while (TRUE) {
932             key = uenum_next(keywordEnum, NULL, status);
933             if (key == NULL) {
934                 break;
935             }
936             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
937             /* buf must be null-terminated */
938             if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
939                 if (strict) {
940                     *status = U_ILLEGAL_ARGUMENT_ERROR;
941                     break;
942                 }
943                 /* ignore this keyword */
944                 tmpStatus = U_ZERO_ERROR;
945                 continue;
946             }
947 
948             keylen = (int32_t)uprv_strlen(key);
949             isBcpUExt = (keylen > 1);
950 
951             /* special keyword used for representing Unicode locale attributes */
952             if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
953                 if (len > 0) {
954                     int32_t i = 0;
955                     while (TRUE) {
956                         attrBufLength = 0;
957                         for (; i < len; i++) {
958                             if (buf[i] != '-') {
959                                 attrBuf[attrBufLength++] = buf[i];
960                             } else {
961                                 i++;
962                                 break;
963                             }
964                         }
965                         if (attrBufLength > 0) {
966                             attrBuf[attrBufLength] = 0;
967 
968                         } else if (i >= len){
969                             break;
970                         }
971 
972                         /* create AttributeListEntry */
973                         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
974                         if (attr == NULL) {
975                             *status = U_MEMORY_ALLOCATION_ERROR;
976                             break;
977                         }
978                         attrValue = (char*)uprv_malloc(attrBufLength + 1);
979                         if (attrValue == NULL) {
980                             *status = U_MEMORY_ALLOCATION_ERROR;
981                             break;
982                         }
983                         uprv_strcpy(attrValue, attrBuf);
984                         attr->attribute = attrValue;
985 
986                         if (!_addAttributeToList(&firstAttr, attr)) {
987                             uprv_free(attr);
988                             uprv_free(attrValue);
989                             if (strict) {
990                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
991                                 break;
992                             }
993                         }
994                     }
995                     /* for a place holder ExtensionListEntry */
996                     bcpKey = LOCALE_ATTRIBUTE_KEY;
997                     bcpValue = NULL;
998                 }
999             } else if (isBcpUExt) {
1000                 bcpKey = uloc_toUnicodeLocaleKey(key);
1001                 if (bcpKey == NULL) {
1002                     if (strict) {
1003                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1004                         break;
1005                     }
1006                     continue;
1007                 }
1008 
1009                 /* we've checked buf is null-terminated above */
1010                 bcpValue = uloc_toUnicodeLocaleType(key, buf);
1011                 if (bcpValue == NULL) {
1012                     if (strict) {
1013                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1014                         break;
1015                     }
1016                     continue;
1017                 }
1018                 if (bcpValue == buf) {
1019                     /*
1020                     When uloc_toUnicodeLocaleType(key, buf) returns the
1021                     input value as is, the value is well-formed, but has
1022                     no known mapping. This implementation normalizes the
1023                     the value to lower case
1024                     */
1025                     int32_t bcpValueLen = uprv_strlen(bcpValue);
1026                     if (bcpValueLen < extBufCapacity) {
1027                         uprv_strcpy(pExtBuf, bcpValue);
1028                         T_CString_toLowerCase(pExtBuf);
1029 
1030                         bcpValue = pExtBuf;
1031 
1032                         pExtBuf += (bcpValueLen + 1);
1033                         extBufCapacity -= (bcpValueLen + 1);
1034                     } else {
1035                         if (strict) {
1036                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1037                             break;
1038                         }
1039                         continue;
1040                     }
1041                 }
1042             } else {
1043                 if (*key == PRIVATEUSE) {
1044                     if (!_isPrivateuseValueSubtags(buf, len)) {
1045                         if (strict) {
1046                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1047                             break;
1048                         }
1049                         continue;
1050                     }
1051                 } else {
1052                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1053                         if (strict) {
1054                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1055                             break;
1056                         }
1057                         continue;
1058                     }
1059                 }
1060                 bcpKey = key;
1061                 if ((len + 1) < extBufCapacity) {
1062                     uprv_memcpy(pExtBuf, buf, len);
1063                     bcpValue = pExtBuf;
1064 
1065                     pExtBuf += len;
1066 
1067                     *pExtBuf = 0;
1068                     pExtBuf++;
1069 
1070                     extBufCapacity -= (len + 1);
1071                 } else {
1072                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1073                     break;
1074                 }
1075             }
1076 
1077             /* create ExtensionListEntry */
1078             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1079             if (ext == NULL) {
1080                 *status = U_MEMORY_ALLOCATION_ERROR;
1081                 break;
1082             }
1083             ext->key = bcpKey;
1084             ext->value = bcpValue;
1085 
1086             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1087                 uprv_free(ext);
1088                 if (strict) {
1089                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1090                     break;
1091                 }
1092             }
1093         }
1094 
1095         /* Special handling for POSIX variant - add the keywords for POSIX */
1096         if (hadPosix) {
1097             /* create ExtensionListEntry for POSIX */
1098             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1099             if (ext == NULL) {
1100                 *status = U_MEMORY_ALLOCATION_ERROR;
1101                 goto cleanup;
1102             }
1103             ext->key = POSIX_KEY;
1104             ext->value = POSIX_VALUE;
1105 
1106             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1107                 uprv_free(ext);
1108             }
1109         }
1110 
1111         if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1112             UBool startLDMLExtension = FALSE;
1113             for (ext = firstExt; ext; ext = ext->next) {
1114                 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
1115                     /* first LDML u singlton extension */
1116                    if (reslen < capacity) {
1117                        *(appendAt + reslen) = SEP;
1118                    }
1119                    reslen++;
1120                    if (reslen < capacity) {
1121                        *(appendAt + reslen) = LDMLEXT;
1122                    }
1123                    reslen++;
1124 
1125                    startLDMLExtension = TRUE;
1126                 }
1127 
1128                 /* write out the sorted BCP47 attributes, extensions and private use */
1129                 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
1130                     /* write the value for the attributes */
1131                     for (attr = firstAttr; attr; attr = attr->next) {
1132                         if (reslen < capacity) {
1133                             *(appendAt + reslen) = SEP;
1134                         }
1135                         reslen++;
1136                         len = (int32_t)uprv_strlen(attr->attribute);
1137                         if (reslen < capacity) {
1138                             uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1139                         }
1140                         reslen += len;
1141                     }
1142                 } else {
1143                     if (reslen < capacity) {
1144                         *(appendAt + reslen) = SEP;
1145                     }
1146                     reslen++;
1147                     len = (int32_t)uprv_strlen(ext->key);
1148                     if (reslen < capacity) {
1149                         uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1150                     }
1151                     reslen += len;
1152                     if (reslen < capacity) {
1153                         *(appendAt + reslen) = SEP;
1154                     }
1155                     reslen++;
1156                     len = (int32_t)uprv_strlen(ext->value);
1157                     if (reslen < capacity) {
1158                         uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1159                     }
1160                     reslen += len;
1161                 }
1162             }
1163         }
1164 cleanup:
1165         /* clean up */
1166         ext = firstExt;
1167         while (ext != NULL) {
1168             ExtensionListEntry *tmpExt = ext->next;
1169             uprv_free(ext);
1170             ext = tmpExt;
1171         }
1172 
1173         attr = firstAttr;
1174         while (attr != NULL) {
1175             AttributeListEntry *tmpAttr = attr->next;
1176             char *pValue = (char *)attr->attribute;
1177             uprv_free(pValue);
1178             uprv_free(attr);
1179             attr = tmpAttr;
1180         }
1181 
1182         uenum_close(keywordEnum);
1183 
1184         if (U_FAILURE(*status)) {
1185             return 0;
1186         }
1187     }
1188 
1189     return u_terminateChars(appendAt, capacity, reslen, status);
1190 }
1191 
1192 /**
1193  * Append keywords parsed from LDML extension value
1194  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1195  * Note: char* buf is used for storing keywords
1196  */
1197 static void
_appendLDMLExtensionAsKeywords(const char * ldmlext,ExtensionListEntry ** appendTo,char * buf,int32_t bufSize,UBool * posixVariant,UErrorCode * status)1198 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1199     const char *pTag;   /* beginning of current subtag */
1200     const char *pKwds;  /* beginning of key-type pairs */
1201     UBool variantExists = *posixVariant;
1202 
1203     ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
1204     ExtensionListEntry *kwd, *nextKwd;
1205 
1206     AttributeListEntry *attrFirst = NULL;   /* first attribute */
1207     AttributeListEntry *attr, *nextAttr;
1208 
1209     int32_t len;
1210     int32_t bufIdx = 0;
1211 
1212     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1213     int32_t attrBufIdx = 0;
1214 
1215     /* Reset the posixVariant value */
1216     *posixVariant = FALSE;
1217 
1218     pTag = ldmlext;
1219     pKwds = NULL;
1220 
1221     /* Iterate through u extension attributes */
1222     while (*pTag) {
1223         /* locate next separator char */
1224         for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1225 
1226         if (ultag_isUnicodeLocaleKey(pTag, len)) {
1227             pKwds = pTag;
1228             break;
1229         }
1230 
1231         /* add this attribute to the list */
1232         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1233         if (attr == NULL) {
1234             *status = U_MEMORY_ALLOCATION_ERROR;
1235             goto cleanup;
1236         }
1237 
1238         if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1239             uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1240             attrBuf[attrBufIdx + len] = 0;
1241             attr->attribute = &attrBuf[attrBufIdx];
1242             attrBufIdx += (len + 1);
1243         } else {
1244             *status = U_ILLEGAL_ARGUMENT_ERROR;
1245             goto cleanup;
1246         }
1247 
1248         if (!_addAttributeToList(&attrFirst, attr)) {
1249             *status = U_ILLEGAL_ARGUMENT_ERROR;
1250             uprv_free(attr);
1251             goto cleanup;
1252         }
1253 
1254         /* next tag */
1255         pTag += len;
1256         if (*pTag) {
1257             /* next to the separator */
1258             pTag++;
1259         }
1260     }
1261 
1262     if (attrFirst) {
1263         /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1264 
1265         if (attrBufIdx > bufSize) {
1266             /* attrBufIdx == <total length of attribute subtag> + 1 */
1267             *status = U_ILLEGAL_ARGUMENT_ERROR;
1268             goto cleanup;
1269         }
1270 
1271         kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1272         if (kwd == NULL) {
1273             *status = U_MEMORY_ALLOCATION_ERROR;
1274             goto cleanup;
1275         }
1276 
1277         kwd->key = LOCALE_ATTRIBUTE_KEY;
1278         kwd->value = buf;
1279 
1280         /* attribute subtags sorted in alphabetical order as type */
1281         attr = attrFirst;
1282         while (attr != NULL) {
1283             nextAttr = attr->next;
1284 
1285             /* buffer size check is done above */
1286             if (attr != attrFirst) {
1287                 *(buf + bufIdx) = SEP;
1288                 bufIdx++;
1289             }
1290 
1291             len = uprv_strlen(attr->attribute);
1292             uprv_memcpy(buf + bufIdx, attr->attribute, len);
1293             bufIdx += len;
1294 
1295             attr = nextAttr;
1296         }
1297         *(buf + bufIdx) = 0;
1298         bufIdx++;
1299 
1300         if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1301             *status = U_ILLEGAL_ARGUMENT_ERROR;
1302             uprv_free(kwd);
1303             goto cleanup;
1304         }
1305 
1306         /* once keyword entry is created, delete the attribute list */
1307         attr = attrFirst;
1308         while (attr != NULL) {
1309             nextAttr = attr->next;
1310             uprv_free(attr);
1311             attr = nextAttr;
1312         }
1313         attrFirst = NULL;
1314     }
1315 
1316     if (pKwds) {
1317         const char *pBcpKey = NULL;     /* u extenstion key subtag */
1318         const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
1319         int32_t bcpKeyLen = 0;
1320         int32_t bcpTypeLen = 0;
1321         UBool isDone = FALSE;
1322 
1323         pTag = pKwds;
1324         /* BCP47 representation of LDML key/type pairs */
1325         while (!isDone) {
1326             const char *pNextBcpKey = NULL;
1327             int32_t nextBcpKeyLen = 0;
1328             UBool emitKeyword = FALSE;
1329 
1330             if (*pTag) {
1331                 /* locate next separator char */
1332                 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1333 
1334                 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1335                     if (pBcpKey) {
1336                         emitKeyword = TRUE;
1337                         pNextBcpKey = pTag;
1338                         nextBcpKeyLen = len;
1339                     } else {
1340                         pBcpKey = pTag;
1341                         bcpKeyLen = len;
1342                     }
1343                 } else {
1344                     U_ASSERT(pBcpKey != NULL);
1345                     /* within LDML type subtags */
1346                     if (pBcpType) {
1347                         bcpTypeLen += (len + 1);
1348                     } else {
1349                         pBcpType = pTag;
1350                         bcpTypeLen = len;
1351                     }
1352                 }
1353 
1354                 /* next tag */
1355                 pTag += len;
1356                 if (*pTag) {
1357                     /* next to the separator */
1358                     pTag++;
1359                 }
1360             } else {
1361                 /* processing last one */
1362                 emitKeyword = TRUE;
1363                 isDone = TRUE;
1364             }
1365 
1366             if (emitKeyword) {
1367                 const char *pKey = NULL;    /* LDML key */
1368                 const char *pType = NULL;   /* LDML type */
1369 
1370                 char bcpKeyBuf[9];          /* BCP key length is always 2 for now */
1371 
1372                 U_ASSERT(pBcpKey != NULL);
1373 
1374                 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
1375                     /* the BCP key is invalid */
1376                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1377                     goto cleanup;
1378                 }
1379 
1380                 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1381                 bcpKeyBuf[bcpKeyLen] = 0;
1382 
1383                 /* u extension key to LDML key */
1384                 pKey = uloc_toLegacyKey(bcpKeyBuf);
1385                 if (pKey == NULL) {
1386                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1387                     goto cleanup;
1388                 }
1389                 if (pKey == bcpKeyBuf) {
1390                     /*
1391                     The key returned by toLegacyKey points to the input buffer.
1392                     We normalize the result key to lower case.
1393                     */
1394                     T_CString_toLowerCase(bcpKeyBuf);
1395                     if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1396                         uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1397                         pKey = buf + bufIdx;
1398                         bufIdx += bcpKeyLen;
1399                         *(buf + bufIdx) = 0;
1400                         bufIdx++;
1401                     } else {
1402                         *status = U_BUFFER_OVERFLOW_ERROR;
1403                         goto cleanup;
1404                     }
1405                 }
1406 
1407                 if (pBcpType) {
1408                     char bcpTypeBuf[128];       /* practically long enough even considering multiple subtag type */
1409                     if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
1410                         /* the BCP type is too long */
1411                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1412                         goto cleanup;
1413                     }
1414 
1415                     uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1416                     bcpTypeBuf[bcpTypeLen] = 0;
1417 
1418                     /* BCP type to locale type */
1419                     pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1420                     if (pType == NULL) {
1421                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1422                         goto cleanup;
1423                     }
1424                     if (pType == bcpTypeBuf) {
1425                         /*
1426                         The type returned by toLegacyType points to the input buffer.
1427                         We normalize the result type to lower case.
1428                         */
1429                         /* normalize to lower case */
1430                         T_CString_toLowerCase(bcpTypeBuf);
1431                         if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1432                             uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1433                             pType = buf + bufIdx;
1434                             bufIdx += bcpTypeLen;
1435                             *(buf + bufIdx) = 0;
1436                             bufIdx++;
1437                         } else {
1438                             *status = U_BUFFER_OVERFLOW_ERROR;
1439                             goto cleanup;
1440                         }
1441                     }
1442                 } else {
1443                     /* typeless - default type value is "yes" */
1444                     pType = LOCALE_TYPE_YES;
1445                 }
1446 
1447                 /* Special handling for u-va-posix, since we want to treat this as a variant,
1448                    not as a keyword */
1449                 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1450                     *posixVariant = TRUE;
1451                 } else {
1452                     /* create an ExtensionListEntry for this keyword */
1453                     kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1454                     if (kwd == NULL) {
1455                         *status = U_MEMORY_ALLOCATION_ERROR;
1456                         goto cleanup;
1457                     }
1458 
1459                     kwd->key = pKey;
1460                     kwd->value = pType;
1461 
1462                     if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1463                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1464                         uprv_free(kwd);
1465                         goto cleanup;
1466                     }
1467                 }
1468 
1469                 pBcpKey = pNextBcpKey;
1470                 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1471                 pBcpType = NULL;
1472                 bcpTypeLen = 0;
1473             }
1474         }
1475     }
1476 
1477     kwd = kwdFirst;
1478     while (kwd != NULL) {
1479         nextKwd = kwd->next;
1480         _addExtensionToList(appendTo, kwd, FALSE);
1481         kwd = nextKwd;
1482     }
1483 
1484     return;
1485 
1486 cleanup:
1487     attr = attrFirst;
1488     while (attr != NULL) {
1489         nextAttr = attr->next;
1490         uprv_free(attr);
1491         attr = nextAttr;
1492     }
1493 
1494     kwd = kwdFirst;
1495     while (kwd != NULL) {
1496         nextKwd = kwd->next;
1497         uprv_free(kwd);
1498         kwd = nextKwd;
1499     }
1500 }
1501 
1502 
1503 static int32_t
_appendKeywords(ULanguageTag * langtag,char * appendAt,int32_t capacity,UErrorCode * status)1504 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1505     int32_t reslen = 0;
1506     int32_t i, n;
1507     int32_t len;
1508     ExtensionListEntry *kwdFirst = NULL;
1509     ExtensionListEntry *kwd;
1510     const char *key, *type;
1511     char *kwdBuf = NULL;
1512     int32_t kwdBufLength = capacity;
1513     UBool posixVariant = FALSE;
1514 
1515     if (U_FAILURE(*status)) {
1516         return 0;
1517     }
1518 
1519     kwdBuf = (char*)uprv_malloc(kwdBufLength);
1520     if (kwdBuf == NULL) {
1521         *status = U_MEMORY_ALLOCATION_ERROR;
1522         return 0;
1523     }
1524 
1525     /* Determine if variants already exists */
1526     if (ultag_getVariantsSize(langtag)) {
1527         posixVariant = TRUE;
1528     }
1529 
1530     n = ultag_getExtensionsSize(langtag);
1531 
1532     /* resolve locale keywords and reordering keys */
1533     for (i = 0; i < n; i++) {
1534         key = ultag_getExtensionKey(langtag, i);
1535         type = ultag_getExtensionValue(langtag, i);
1536         if (*key == LDMLEXT) {
1537             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1538             if (U_FAILURE(*status)) {
1539                 break;
1540             }
1541         } else {
1542             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1543             if (kwd == NULL) {
1544                 *status = U_MEMORY_ALLOCATION_ERROR;
1545                 break;
1546             }
1547             kwd->key = key;
1548             kwd->value = type;
1549             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1550                 uprv_free(kwd);
1551                 *status = U_ILLEGAL_ARGUMENT_ERROR;
1552                 break;
1553             }
1554         }
1555     }
1556 
1557     if (U_SUCCESS(*status)) {
1558         type = ultag_getPrivateUse(langtag);
1559         if ((int32_t)uprv_strlen(type) > 0) {
1560             /* add private use as a keyword */
1561             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1562             if (kwd == NULL) {
1563                 *status = U_MEMORY_ALLOCATION_ERROR;
1564             } else {
1565                 kwd->key = PRIVATEUSE_KEY;
1566                 kwd->value = type;
1567                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1568                     uprv_free(kwd);
1569                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1570                 }
1571             }
1572         }
1573     }
1574 
1575     /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1576 
1577     if (U_SUCCESS(*status) && posixVariant) {
1578         len = (int32_t) uprv_strlen(_POSIX);
1579         if (reslen < capacity) {
1580             uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1581         }
1582         reslen += len;
1583     }
1584 
1585     if (U_SUCCESS(*status) && kwdFirst != NULL) {
1586         /* write out the sorted keywords */
1587         UBool firstValue = TRUE;
1588         kwd = kwdFirst;
1589         do {
1590             if (reslen < capacity) {
1591                 if (firstValue) {
1592                     /* '@' */
1593                     *(appendAt + reslen) = LOCALE_EXT_SEP;
1594                     firstValue = FALSE;
1595                 } else {
1596                     /* ';' */
1597                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1598                 }
1599             }
1600             reslen++;
1601 
1602             /* key */
1603             len = (int32_t)uprv_strlen(kwd->key);
1604             if (reslen < capacity) {
1605                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1606             }
1607             reslen += len;
1608 
1609             /* '=' */
1610             if (reslen < capacity) {
1611                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1612             }
1613             reslen++;
1614 
1615             /* type */
1616             len = (int32_t)uprv_strlen(kwd->value);
1617             if (reslen < capacity) {
1618                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1619             }
1620             reslen += len;
1621 
1622             kwd = kwd->next;
1623         } while (kwd);
1624     }
1625 
1626     /* clean up */
1627     kwd = kwdFirst;
1628     while (kwd != NULL) {
1629         ExtensionListEntry *tmpKwd = kwd->next;
1630         uprv_free(kwd);
1631         kwd = tmpKwd;
1632     }
1633 
1634     uprv_free(kwdBuf);
1635 
1636     if (U_FAILURE(*status)) {
1637         return 0;
1638     }
1639 
1640     return u_terminateChars(appendAt, capacity, reslen, status);
1641 }
1642 
1643 static int32_t
_appendPrivateuseToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1644 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1645     char buf[ULOC_FULLNAME_CAPACITY];
1646     char tmpAppend[ULOC_FULLNAME_CAPACITY];
1647     UErrorCode tmpStatus = U_ZERO_ERROR;
1648     int32_t len, i;
1649     int32_t reslen = 0;
1650 
1651     if (U_FAILURE(*status)) {
1652         return 0;
1653     }
1654 
1655     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1656     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1657         if (strict) {
1658             *status = U_ILLEGAL_ARGUMENT_ERROR;
1659         }
1660         return 0;
1661     }
1662 
1663     if (len > 0) {
1664         char *p, *pPriv;
1665         UBool bNext = TRUE;
1666         UBool firstValue = TRUE;
1667         UBool writeValue;
1668 
1669         pPriv = NULL;
1670         p = buf;
1671         while (bNext) {
1672             writeValue = FALSE;
1673             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1674                 if (*p == 0) {
1675                     bNext = FALSE;
1676                 } else {
1677                     *p = 0; /* terminate */
1678                 }
1679                 if (pPriv != NULL) {
1680                     /* Private use in the canonical format is lowercase in BCP47 */
1681                     for (i = 0; *(pPriv + i) != 0; i++) {
1682                         *(pPriv + i) = uprv_tolower(*(pPriv + i));
1683                     }
1684 
1685                     /* validate */
1686                     if (_isPrivateuseValueSubtag(pPriv, -1)) {
1687                         if (firstValue) {
1688                             if (!_isVariantSubtag(pPriv, -1)) {
1689                                 writeValue = TRUE;
1690                             }
1691                         } else {
1692                             writeValue = TRUE;
1693                         }
1694                     } else if (strict) {
1695                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1696                         break;
1697                     } else {
1698                         break;
1699                     }
1700 
1701                     if (writeValue) {
1702                         if (reslen < capacity) {
1703                             tmpAppend[reslen++] = SEP;
1704                         }
1705 
1706                         if (firstValue) {
1707                             if (reslen < capacity) {
1708                                 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1709                             }
1710 
1711                             if (reslen < capacity) {
1712                                 tmpAppend[reslen++] = SEP;
1713                             }
1714 
1715                             len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1716                             if (reslen < capacity) {
1717                                 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1718                             }
1719                             reslen += len;
1720 
1721                             if (reslen < capacity) {
1722                                 tmpAppend[reslen++] = SEP;
1723                             }
1724 
1725                             firstValue = FALSE;
1726                         }
1727 
1728                         len = (int32_t)uprv_strlen(pPriv);
1729                         if (reslen < capacity) {
1730                             uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1731                         }
1732                         reslen += len;
1733                     }
1734                 }
1735                 /* reset private use starting position */
1736                 pPriv = NULL;
1737             } else if (pPriv == NULL) {
1738                 pPriv = p;
1739             }
1740             p++;
1741         }
1742 
1743         if (U_FAILURE(*status)) {
1744             return 0;
1745         }
1746     }
1747 
1748     if (U_SUCCESS(*status)) {
1749         len = reslen;
1750         if (reslen < capacity) {
1751             uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
1752         }
1753     }
1754 
1755     u_terminateChars(appendAt, capacity, reslen, status);
1756 
1757     return reslen;
1758 }
1759 
1760 /*
1761 * -------------------------------------------------
1762 *
1763 * ultag_ functions
1764 *
1765 * -------------------------------------------------
1766 */
1767 
1768 /* Bit flags used by the parser */
1769 #define LANG 0x0001
1770 #define EXTL 0x0002
1771 #define SCRT 0x0004
1772 #define REGN 0x0008
1773 #define VART 0x0010
1774 #define EXTS 0x0020
1775 #define EXTV 0x0040
1776 #define PRIV 0x0080
1777 
1778 /**
1779  * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
1780  * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
1781  * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
1782  */
1783 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
1784 #pragma optimize( "", off )
1785 #endif
1786 
1787 static ULanguageTag*
ultag_parse(const char * tag,int32_t tagLen,int32_t * parsedLen,UErrorCode * status)1788 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
1789     ULanguageTag *t;
1790     char *tagBuf;
1791     int16_t next;
1792     char *pSubtag, *pNext, *pLastGoodPosition;
1793     int32_t subtagLen;
1794     int32_t extlangIdx;
1795     ExtensionListEntry *pExtension;
1796     char *pExtValueSubtag, *pExtValueSubtagEnd;
1797     int32_t i;
1798     UBool privateuseVar = FALSE;
1799     int32_t grandfatheredLen = 0;
1800 
1801     if (parsedLen != NULL) {
1802         *parsedLen = 0;
1803     }
1804 
1805     if (U_FAILURE(*status)) {
1806         return NULL;
1807     }
1808 
1809     if (tagLen < 0) {
1810         tagLen = (int32_t)uprv_strlen(tag);
1811     }
1812 
1813     /* copy the entire string */
1814     tagBuf = (char*)uprv_malloc(tagLen + 1);
1815     if (tagBuf == NULL) {
1816         *status = U_MEMORY_ALLOCATION_ERROR;
1817         return NULL;
1818     }
1819     uprv_memcpy(tagBuf, tag, tagLen);
1820     *(tagBuf + tagLen) = 0;
1821 
1822     /* create a ULanguageTag */
1823     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
1824     if (t == NULL) {
1825         uprv_free(tagBuf);
1826         *status = U_MEMORY_ALLOCATION_ERROR;
1827         return NULL;
1828     }
1829     _initializeULanguageTag(t);
1830     t->buf = tagBuf;
1831 
1832     if (tagLen < MINLEN) {
1833         /* the input tag is too short - return empty ULanguageTag */
1834         return t;
1835     }
1836 
1837     /* check if the tag is grandfathered */
1838     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
1839         if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
1840             int32_t newTagLength;
1841 
1842             grandfatheredLen = tagLen;  /* back up for output parsedLen */
1843             newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
1844             if (tagLen < newTagLength) {
1845                 uprv_free(tagBuf);
1846                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
1847                 if (tagBuf == NULL) {
1848                     *status = U_MEMORY_ALLOCATION_ERROR;
1849                     ultag_close(t);
1850                     return NULL;
1851                 }
1852                 t->buf = tagBuf;
1853                 tagLen = newTagLength;
1854             }
1855             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
1856             break;
1857         }
1858     }
1859 
1860     /*
1861      * langtag      =   language
1862      *                  ["-" script]
1863      *                  ["-" region]
1864      *                  *("-" variant)
1865      *                  *("-" extension)
1866      *                  ["-" privateuse]
1867      */
1868 
1869     next = LANG | PRIV;
1870     pNext = pLastGoodPosition = tagBuf;
1871     extlangIdx = 0;
1872     pExtension = NULL;
1873     pExtValueSubtag = NULL;
1874     pExtValueSubtagEnd = NULL;
1875 
1876     while (pNext) {
1877         char *pSep;
1878 
1879         pSubtag = pNext;
1880 
1881         /* locate next separator char */
1882         pSep = pSubtag;
1883         while (*pSep) {
1884             if (*pSep == SEP) {
1885                 break;
1886             }
1887             pSep++;
1888         }
1889         if (*pSep == 0) {
1890             /* last subtag */
1891             pNext = NULL;
1892         } else {
1893             pNext = pSep + 1;
1894         }
1895         subtagLen = (int32_t)(pSep - pSubtag);
1896 
1897         if (next & LANG) {
1898             if (_isLanguageSubtag(pSubtag, subtagLen)) {
1899                 *pSep = 0;  /* terminate */
1900                 t->language = T_CString_toLowerCase(pSubtag);
1901 
1902                 pLastGoodPosition = pSep;
1903                 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1904                 continue;
1905             }
1906         }
1907         if (next & EXTL) {
1908             if (_isExtlangSubtag(pSubtag, subtagLen)) {
1909                 *pSep = 0;
1910                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1911 
1912                 pLastGoodPosition = pSep;
1913                 if (extlangIdx < 3) {
1914                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1915                 } else {
1916                     next = SCRT | REGN | VART | EXTS | PRIV;
1917                 }
1918                 continue;
1919             }
1920         }
1921         if (next & SCRT) {
1922             if (_isScriptSubtag(pSubtag, subtagLen)) {
1923                 char *p = pSubtag;
1924 
1925                 *pSep = 0;
1926 
1927                 /* to title case */
1928                 *p = uprv_toupper(*p);
1929                 p++;
1930                 for (; *p; p++) {
1931                     *p = uprv_tolower(*p);
1932                 }
1933 
1934                 t->script = pSubtag;
1935 
1936                 pLastGoodPosition = pSep;
1937                 next = REGN | VART | EXTS | PRIV;
1938                 continue;
1939             }
1940         }
1941         if (next & REGN) {
1942             if (_isRegionSubtag(pSubtag, subtagLen)) {
1943                 *pSep = 0;
1944                 t->region = T_CString_toUpperCase(pSubtag);
1945 
1946                 pLastGoodPosition = pSep;
1947                 next = VART | EXTS | PRIV;
1948                 continue;
1949             }
1950         }
1951         if (next & VART) {
1952             if (_isVariantSubtag(pSubtag, subtagLen) ||
1953                (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
1954                 VariantListEntry *var;
1955                 UBool isAdded;
1956 
1957                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1958                 if (var == NULL) {
1959                     *status = U_MEMORY_ALLOCATION_ERROR;
1960                     goto error;
1961                 }
1962                 *pSep = 0;
1963                 var->variant = T_CString_toUpperCase(pSubtag);
1964                 isAdded = _addVariantToList(&(t->variants), var);
1965                 if (!isAdded) {
1966                     /* duplicated variant entry */
1967                     uprv_free(var);
1968                     break;
1969                 }
1970                 pLastGoodPosition = pSep;
1971                 next = VART | EXTS | PRIV;
1972                 continue;
1973             }
1974         }
1975         if (next & EXTS) {
1976             if (_isExtensionSingleton(pSubtag, subtagLen)) {
1977                 if (pExtension != NULL) {
1978                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1979                         /* the previous extension is incomplete */
1980                         uprv_free(pExtension);
1981                         pExtension = NULL;
1982                         break;
1983                     }
1984 
1985                     /* terminate the previous extension value */
1986                     *pExtValueSubtagEnd = 0;
1987                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1988 
1989                     /* insert the extension to the list */
1990                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1991                         pLastGoodPosition = pExtValueSubtagEnd;
1992                     } else {
1993                         /* stop parsing here */
1994                         uprv_free(pExtension);
1995                         pExtension = NULL;
1996                         break;
1997                     }
1998                 }
1999 
2000                 /* create a new extension */
2001                 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
2002                 if (pExtension == NULL) {
2003                     *status = U_MEMORY_ALLOCATION_ERROR;
2004                     goto error;
2005                 }
2006                 *pSep = 0;
2007                 pExtension->key = T_CString_toLowerCase(pSubtag);
2008                 pExtension->value = NULL;   /* will be set later */
2009 
2010                 /*
2011                  * reset the start and the end location of extension value
2012                  * subtags for this extension
2013                  */
2014                 pExtValueSubtag = NULL;
2015                 pExtValueSubtagEnd = NULL;
2016 
2017                 next = EXTV;
2018                 continue;
2019             }
2020         }
2021         if (next & EXTV) {
2022             if (_isExtensionSubtag(pSubtag, subtagLen)) {
2023                 if (pExtValueSubtag == NULL) {
2024                     /* if the start postion of this extension's value is not yet,
2025                         this one is the first value subtag */
2026                     pExtValueSubtag = pSubtag;
2027                 }
2028 
2029                 /* Mark the end of this subtag */
2030                 pExtValueSubtagEnd = pSep;
2031                 next = EXTS | EXTV | PRIV;
2032 
2033                 continue;
2034             }
2035         }
2036         if (next & PRIV) {
2037             if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2038                 char *pPrivuseVal;
2039 
2040                 if (pExtension != NULL) {
2041                     /* Process the last extension */
2042                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2043                         /* the previous extension is incomplete */
2044                         uprv_free(pExtension);
2045                         pExtension = NULL;
2046                         break;
2047                     } else {
2048                         /* terminate the previous extension value */
2049                         *pExtValueSubtagEnd = 0;
2050                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2051 
2052                         /* insert the extension to the list */
2053                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2054                             pLastGoodPosition = pExtValueSubtagEnd;
2055                             pExtension = NULL;
2056                         } else {
2057                         /* stop parsing here */
2058                             uprv_free(pExtension);
2059                             pExtension = NULL;
2060                             break;
2061                         }
2062                     }
2063                 }
2064 
2065                 /* The rest of part will be private use value subtags */
2066                 if (pNext == NULL) {
2067                     /* empty private use subtag */
2068                     break;
2069                 }
2070                 /* back up the private use value start position */
2071                 pPrivuseVal = pNext;
2072 
2073                 /* validate private use value subtags */
2074                 while (pNext) {
2075                     pSubtag = pNext;
2076                     pSep = pSubtag;
2077                     while (*pSep) {
2078                         if (*pSep == SEP) {
2079                             break;
2080                         }
2081                         pSep++;
2082                     }
2083                     if (*pSep == 0) {
2084                         /* last subtag */
2085                         pNext = NULL;
2086                     } else {
2087                         pNext = pSep + 1;
2088                     }
2089                     subtagLen = (int32_t)(pSep - pSubtag);
2090 
2091                     if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2092                         *pSep = 0;
2093                         next = VART;
2094                         privateuseVar = TRUE;
2095                         break;
2096                     } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2097                         pLastGoodPosition = pSep;
2098                     } else {
2099                         break;
2100                     }
2101                 }
2102 
2103                 if (next == VART) {
2104                     continue;
2105                 }
2106 
2107                 if (pLastGoodPosition - pPrivuseVal > 0) {
2108                     *pLastGoodPosition = 0;
2109                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2110                 }
2111                 /* No more subtags, exiting the parse loop */
2112                 break;
2113             }
2114             break;
2115         }
2116 
2117         /* If we fell through here, it means this subtag is illegal - quit parsing */
2118         break;
2119     }
2120 
2121     if (pExtension != NULL) {
2122         /* Process the last extension */
2123         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2124             /* the previous extension is incomplete */
2125             uprv_free(pExtension);
2126         } else {
2127             /* terminate the previous extension value */
2128             *pExtValueSubtagEnd = 0;
2129             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2130             /* insert the extension to the list */
2131             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2132                 pLastGoodPosition = pExtValueSubtagEnd;
2133             } else {
2134                 uprv_free(pExtension);
2135             }
2136         }
2137     }
2138 
2139     if (parsedLen != NULL) {
2140         *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2141     }
2142 
2143     return t;
2144 
2145 error:
2146     ultag_close(t);
2147     return NULL;
2148 }
2149 
2150 /**
2151 * Ticket #12705 - Turn optimization back on.
2152 */
2153 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2154 #pragma optimize( "", on )
2155 #endif
2156 
2157 static void
ultag_close(ULanguageTag * langtag)2158 ultag_close(ULanguageTag* langtag) {
2159 
2160     if (langtag == NULL) {
2161         return;
2162     }
2163 
2164     uprv_free(langtag->buf);
2165 
2166     if (langtag->variants) {
2167         VariantListEntry *curVar = langtag->variants;
2168         while (curVar) {
2169             VariantListEntry *nextVar = curVar->next;
2170             uprv_free(curVar);
2171             curVar = nextVar;
2172         }
2173     }
2174 
2175     if (langtag->extensions) {
2176         ExtensionListEntry *curExt = langtag->extensions;
2177         while (curExt) {
2178             ExtensionListEntry *nextExt = curExt->next;
2179             uprv_free(curExt);
2180             curExt = nextExt;
2181         }
2182     }
2183 
2184     uprv_free(langtag);
2185 }
2186 
2187 static const char*
ultag_getLanguage(const ULanguageTag * langtag)2188 ultag_getLanguage(const ULanguageTag* langtag) {
2189     return langtag->language;
2190 }
2191 
2192 #if 0
2193 static const char*
2194 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2195     int32_t i;
2196     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2197         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2198             return DEPRECATEDLANGS[i + 1];
2199         }
2200     }
2201     return langtag->language;
2202 }
2203 #endif
2204 
2205 static const char*
ultag_getExtlang(const ULanguageTag * langtag,int32_t idx)2206 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2207     if (idx >= 0 && idx < MAXEXTLANG) {
2208         return langtag->extlang[idx];
2209     }
2210     return NULL;
2211 }
2212 
2213 static int32_t
ultag_getExtlangSize(const ULanguageTag * langtag)2214 ultag_getExtlangSize(const ULanguageTag* langtag) {
2215     int32_t size = 0;
2216     int32_t i;
2217     for (i = 0; i < MAXEXTLANG; i++) {
2218         if (langtag->extlang[i]) {
2219             size++;
2220         }
2221     }
2222     return size;
2223 }
2224 
2225 static const char*
ultag_getScript(const ULanguageTag * langtag)2226 ultag_getScript(const ULanguageTag* langtag) {
2227     return langtag->script;
2228 }
2229 
2230 static const char*
ultag_getRegion(const ULanguageTag * langtag)2231 ultag_getRegion(const ULanguageTag* langtag) {
2232     return langtag->region;
2233 }
2234 
2235 static const char*
ultag_getVariant(const ULanguageTag * langtag,int32_t idx)2236 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2237     const char *var = NULL;
2238     VariantListEntry *cur = langtag->variants;
2239     int32_t i = 0;
2240     while (cur) {
2241         if (i == idx) {
2242             var = cur->variant;
2243             break;
2244         }
2245         cur = cur->next;
2246         i++;
2247     }
2248     return var;
2249 }
2250 
2251 static int32_t
ultag_getVariantsSize(const ULanguageTag * langtag)2252 ultag_getVariantsSize(const ULanguageTag* langtag) {
2253     int32_t size = 0;
2254     VariantListEntry *cur = langtag->variants;
2255     while (TRUE) {
2256         if (cur == NULL) {
2257             break;
2258         }
2259         size++;
2260         cur = cur->next;
2261     }
2262     return size;
2263 }
2264 
2265 static const char*
ultag_getExtensionKey(const ULanguageTag * langtag,int32_t idx)2266 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2267     const char *key = NULL;
2268     ExtensionListEntry *cur = langtag->extensions;
2269     int32_t i = 0;
2270     while (cur) {
2271         if (i == idx) {
2272             key = cur->key;
2273             break;
2274         }
2275         cur = cur->next;
2276         i++;
2277     }
2278     return key;
2279 }
2280 
2281 static const char*
ultag_getExtensionValue(const ULanguageTag * langtag,int32_t idx)2282 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2283     const char *val = NULL;
2284     ExtensionListEntry *cur = langtag->extensions;
2285     int32_t i = 0;
2286     while (cur) {
2287         if (i == idx) {
2288             val = cur->value;
2289             break;
2290         }
2291         cur = cur->next;
2292         i++;
2293     }
2294     return val;
2295 }
2296 
2297 static int32_t
ultag_getExtensionsSize(const ULanguageTag * langtag)2298 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2299     int32_t size = 0;
2300     ExtensionListEntry *cur = langtag->extensions;
2301     while (TRUE) {
2302         if (cur == NULL) {
2303             break;
2304         }
2305         size++;
2306         cur = cur->next;
2307     }
2308     return size;
2309 }
2310 
2311 static const char*
ultag_getPrivateUse(const ULanguageTag * langtag)2312 ultag_getPrivateUse(const ULanguageTag* langtag) {
2313     return langtag->privateuse;
2314 }
2315 
2316 #if 0
2317 static const char*
2318 ultag_getGrandfathered(const ULanguageTag* langtag) {
2319     return langtag->grandfathered;
2320 }
2321 #endif
2322 
2323 
2324 /*
2325 * -------------------------------------------------
2326 *
2327 * Locale/BCP47 conversion APIs, exposed as uloc_*
2328 *
2329 * -------------------------------------------------
2330 */
2331 U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char * localeID,char * langtag,int32_t langtagCapacity,UBool strict,UErrorCode * status)2332 uloc_toLanguageTag(const char* localeID,
2333                    char* langtag,
2334                    int32_t langtagCapacity,
2335                    UBool strict,
2336                    UErrorCode* status) {
2337     /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2338     char canonical[256];
2339     int32_t reslen = 0;
2340     UErrorCode tmpStatus = U_ZERO_ERROR;
2341     UBool hadPosix = FALSE;
2342     const char* pKeywordStart;
2343 
2344     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
2345     canonical[0] = 0;
2346     if (uprv_strlen(localeID) > 0) {
2347         uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2348         if (tmpStatus != U_ZERO_ERROR) {
2349             *status = U_ILLEGAL_ARGUMENT_ERROR;
2350             return 0;
2351         }
2352     }
2353 
2354     /* For handling special case - private use only tag */
2355     pKeywordStart = locale_getKeywordsStart(canonical);
2356     if (pKeywordStart == canonical) {
2357         UEnumeration *kwdEnum;
2358         int kwdCnt = 0;
2359         UBool done = FALSE;
2360 
2361         kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2362         if (kwdEnum != NULL) {
2363             kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2364             if (kwdCnt == 1) {
2365                 const char *key;
2366                 int32_t len = 0;
2367 
2368                 key = uenum_next(kwdEnum, &len, &tmpStatus);
2369                 if (len == 1 && *key == PRIVATEUSE) {
2370                     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2371                     buf[0] = PRIVATEUSE;
2372                     buf[1] = SEP;
2373                     len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2374                     if (U_SUCCESS(tmpStatus)) {
2375                         if (_isPrivateuseValueSubtags(&buf[2], len)) {
2376                             /* return private use only tag */
2377                             reslen = len + 2;
2378                             uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2379                             u_terminateChars(langtag, langtagCapacity, reslen, status);
2380                             done = TRUE;
2381                         } else if (strict) {
2382                             *status = U_ILLEGAL_ARGUMENT_ERROR;
2383                             done = TRUE;
2384                         }
2385                         /* if not strict mode, then "und" will be returned */
2386                     } else {
2387                         *status = U_ILLEGAL_ARGUMENT_ERROR;
2388                         done = TRUE;
2389                     }
2390                 }
2391             }
2392             uenum_close(kwdEnum);
2393             if (done) {
2394                 return reslen;
2395             }
2396         }
2397     }
2398 
2399     reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2400     reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2401     reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2402     reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2403     reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2404     reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2405 
2406     return reslen;
2407 }
2408 
2409 
2410 U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char * langtag,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2411 uloc_forLanguageTag(const char* langtag,
2412                     char* localeID,
2413                     int32_t localeIDCapacity,
2414                     int32_t* parsedLength,
2415                     UErrorCode* status) {
2416     ULanguageTag *lt;
2417     int32_t reslen = 0;
2418     const char *subtag, *p;
2419     int32_t len;
2420     int32_t i, n;
2421     UBool noRegion = TRUE;
2422 
2423     lt = ultag_parse(langtag, -1, parsedLength, status);
2424     if (U_FAILURE(*status)) {
2425         return 0;
2426     }
2427 
2428     /* language */
2429     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2430     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2431         len = (int32_t)uprv_strlen(subtag);
2432         if (len > 0) {
2433             if (reslen < localeIDCapacity) {
2434                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2435             }
2436             reslen += len;
2437         }
2438     }
2439 
2440     /* script */
2441     subtag = ultag_getScript(lt);
2442     len = (int32_t)uprv_strlen(subtag);
2443     if (len > 0) {
2444         if (reslen < localeIDCapacity) {
2445             *(localeID + reslen) = LOCALE_SEP;
2446         }
2447         reslen++;
2448 
2449         /* write out the script in title case */
2450         p = subtag;
2451         while (*p) {
2452             if (reslen < localeIDCapacity) {
2453                 if (p == subtag) {
2454                     *(localeID + reslen) = uprv_toupper(*p);
2455                 } else {
2456                     *(localeID + reslen) = *p;
2457                 }
2458             }
2459             reslen++;
2460             p++;
2461         }
2462     }
2463 
2464     /* region */
2465     subtag = ultag_getRegion(lt);
2466     len = (int32_t)uprv_strlen(subtag);
2467     if (len > 0) {
2468         if (reslen < localeIDCapacity) {
2469             *(localeID + reslen) = LOCALE_SEP;
2470         }
2471         reslen++;
2472         /* write out the retion in upper case */
2473         p = subtag;
2474         while (*p) {
2475             if (reslen < localeIDCapacity) {
2476                 *(localeID + reslen) = uprv_toupper(*p);
2477             }
2478             reslen++;
2479             p++;
2480         }
2481         noRegion = FALSE;
2482     }
2483 
2484     /* variants */
2485     n = ultag_getVariantsSize(lt);
2486     if (n > 0) {
2487         if (noRegion) {
2488             if (reslen < localeIDCapacity) {
2489                 *(localeID + reslen) = LOCALE_SEP;
2490             }
2491             reslen++;
2492         }
2493 
2494         for (i = 0; i < n; i++) {
2495             subtag = ultag_getVariant(lt, i);
2496             if (reslen < localeIDCapacity) {
2497                 *(localeID + reslen) = LOCALE_SEP;
2498             }
2499             reslen++;
2500             /* write out the variant in upper case */
2501             p = subtag;
2502             while (*p) {
2503                 if (reslen < localeIDCapacity) {
2504                     *(localeID + reslen) = uprv_toupper(*p);
2505                 }
2506                 reslen++;
2507                 p++;
2508             }
2509         }
2510     }
2511 
2512     /* keywords */
2513     n = ultag_getExtensionsSize(lt);
2514     subtag = ultag_getPrivateUse(lt);
2515     if (n > 0 || uprv_strlen(subtag) > 0) {
2516         if (reslen == 0 && n > 0) {
2517             /* need a language */
2518             if (reslen < localeIDCapacity) {
2519                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2520             }
2521             reslen += LANG_UND_LEN;
2522         }
2523         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2524         reslen += len;
2525     }
2526 
2527     ultag_close(lt);
2528     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2529 }
2530 
2531 
2532