• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 2009-2012, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
12 #include "ustr_imp.h"
13 #include "cmemory.h"
14 #include "cstring.h"
15 #include "putilimp.h"
16 #include "uinvchar.h"
17 #include "ulocimp.h"
18 #include "uassert.h"
19 
20 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
21 
22 /* struct holding a single variant */
23 typedef struct VariantListEntry {
24     const char              *variant;
25     struct VariantListEntry *next;
26 } VariantListEntry;
27 
28 /* struct holding a single attribute value */
29 typedef struct AttributeListEntry {
30     const char              *attribute;
31     struct AttributeListEntry *next;
32 } AttributeListEntry;
33 
34 /* struct holding a single extension */
35 typedef struct ExtensionListEntry {
36     const char                  *key;
37     const char                  *value;
38     struct ExtensionListEntry   *next;
39 } ExtensionListEntry;
40 
41 #define MAXEXTLANG 3
42 typedef struct ULanguageTag {
43     char                *buf;   /* holding parsed subtags */
44     const char          *language;
45     const char          *extlang[MAXEXTLANG];
46     const char          *script;
47     const char          *region;
48     VariantListEntry    *variants;
49     ExtensionListEntry  *extensions;
50     const char          *privateuse;
51     const char          *grandfathered;
52 } ULanguageTag;
53 
54 #define MINLEN 2
55 #define SEP '-'
56 #define PRIVATEUSE 'x'
57 #define LDMLEXT 'u'
58 
59 #define LOCALE_SEP '_'
60 #define LOCALE_EXT_SEP '@'
61 #define LOCALE_KEYWORD_SEP ';'
62 #define LOCALE_KEY_TYPE_SEP '='
63 
64 #define ISALPHA(c) uprv_isASCIILetter(c)
65 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
66 
67 static const char EMPTY[] = "";
68 static const char LANG_UND[] = "und";
69 static const char PRIVATEUSE_KEY[] = "x";
70 static const char _POSIX[] = "_POSIX";
71 static const char POSIX_KEY[] = "va";
72 static const char POSIX_VALUE[] = "posix";
73 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
74 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
75 static const char LOCALE_TYPE_YES[] = "yes";
76 
77 #define LANG_UND_LEN 3
78 
79 static const char* const GRANDFATHERED[] = {
80 /*  grandfathered   preferred */
81     "art-lojban",   "jbo",
82     "cel-gaulish",  "xtg-x-cel-gaulish",
83     "en-GB-oed",    "en-GB-x-oed",
84     "i-ami",        "ami",
85     "i-bnn",        "bnn",
86     "i-default",    "en-x-i-default",
87     "i-enochian",   "und-x-i-enochian",
88     "i-hak",        "hak",
89     "i-klingon",    "tlh",
90     "i-lux",        "lb",
91     "i-mingo",      "see-x-i-mingo",
92     "i-navajo",     "nv",
93     "i-pwn",        "pwn",
94     "i-tao",        "tao",
95     "i-tay",        "tay",
96     "i-tsu",        "tsu",
97     "no-bok",       "nb",
98     "no-nyn",       "nn",
99     "sgn-be-fr",    "sfb",
100     "sgn-be-nl",    "vgt",
101     "sgn-ch-de",    "sgg",
102     "zh-guoyu",     "cmn",
103     "zh-hakka",     "hak",
104     "zh-min",       "nan-x-zh-min",
105     "zh-min-nan",   "nan",
106     "zh-xiang",     "hsn",
107     NULL,           NULL
108 };
109 
110 static const char DEPRECATEDLANGS[][4] = {
111 /*  deprecated  new */
112     "iw",       "he",
113     "ji",       "yi",
114     "in",       "id"
115 };
116 
117 /*
118 * -------------------------------------------------
119 *
120 * These ultag_ functions may be exposed as APIs later
121 *
122 * -------------------------------------------------
123 */
124 
125 static ULanguageTag*
126 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
127 
128 static void
129 ultag_close(ULanguageTag* langtag);
130 
131 static const char*
132 ultag_getLanguage(const ULanguageTag* langtag);
133 
134 #if 0
135 static const char*
136 ultag_getJDKLanguage(const ULanguageTag* langtag);
137 #endif
138 
139 static const char*
140 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
141 
142 static int32_t
143 ultag_getExtlangSize(const ULanguageTag* langtag);
144 
145 static const char*
146 ultag_getScript(const ULanguageTag* langtag);
147 
148 static const char*
149 ultag_getRegion(const ULanguageTag* langtag);
150 
151 static const char*
152 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
153 
154 static int32_t
155 ultag_getVariantsSize(const ULanguageTag* langtag);
156 
157 static const char*
158 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
159 
160 static const char*
161 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
162 
163 static int32_t
164 ultag_getExtensionsSize(const ULanguageTag* langtag);
165 
166 static const char*
167 ultag_getPrivateUse(const ULanguageTag* langtag);
168 
169 #if 0
170 static const char*
171 ultag_getGrandfathered(const ULanguageTag* langtag);
172 #endif
173 
174 /*
175 * -------------------------------------------------
176 *
177 * Language subtag syntax validation functions
178 *
179 * -------------------------------------------------
180 */
181 
182 static UBool
_isAlphaString(const char * s,int32_t len)183 _isAlphaString(const char* s, int32_t len) {
184     int32_t i;
185     for (i = 0; i < len; i++) {
186         if (!ISALPHA(*(s + i))) {
187             return FALSE;
188         }
189     }
190     return TRUE;
191 }
192 
193 static UBool
_isNumericString(const char * s,int32_t len)194 _isNumericString(const char* s, int32_t len) {
195     int32_t i;
196     for (i = 0; i < len; i++) {
197         if (!ISNUMERIC(*(s + i))) {
198             return FALSE;
199         }
200     }
201     return TRUE;
202 }
203 
204 static UBool
_isAlphaNumericString(const char * s,int32_t len)205 _isAlphaNumericString(const char* s, int32_t len) {
206     int32_t i;
207     for (i = 0; i < len; i++) {
208         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
209             return FALSE;
210         }
211     }
212     return TRUE;
213 }
214 
215 static UBool
_isLanguageSubtag(const char * s,int32_t len)216 _isLanguageSubtag(const char* s, int32_t len) {
217     /*
218      * language      = 2*3ALPHA            ; shortest ISO 639 code
219      *                 ["-" extlang]       ; sometimes followed by
220      *                                     ;   extended language subtags
221      *               / 4ALPHA              ; or reserved for future use
222      *               / 5*8ALPHA            ; or registered language subtag
223      */
224     if (len < 0) {
225         len = (int32_t)uprv_strlen(s);
226     }
227     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
228         return TRUE;
229     }
230     return FALSE;
231 }
232 
233 static UBool
_isExtlangSubtag(const char * s,int32_t len)234 _isExtlangSubtag(const char* s, int32_t len) {
235     /*
236      * extlang       = 3ALPHA              ; selected ISO 639 codes
237      *                 *2("-" 3ALPHA)      ; permanently reserved
238      */
239     if (len < 0) {
240         len = (int32_t)uprv_strlen(s);
241     }
242     if (len == 3 && _isAlphaString(s, len)) {
243         return TRUE;
244     }
245     return FALSE;
246 }
247 
248 static UBool
_isScriptSubtag(const char * s,int32_t len)249 _isScriptSubtag(const char* s, int32_t len) {
250     /*
251      * script        = 4ALPHA              ; ISO 15924 code
252      */
253     if (len < 0) {
254         len = (int32_t)uprv_strlen(s);
255     }
256     if (len == 4 && _isAlphaString(s, len)) {
257         return TRUE;
258     }
259     return FALSE;
260 }
261 
262 static UBool
_isRegionSubtag(const char * s,int32_t len)263 _isRegionSubtag(const char* s, int32_t len) {
264     /*
265      * region        = 2ALPHA              ; ISO 3166-1 code
266      *               / 3DIGIT              ; UN M.49 code
267      */
268     if (len < 0) {
269         len = (int32_t)uprv_strlen(s);
270     }
271     if (len == 2 && _isAlphaString(s, len)) {
272         return TRUE;
273     }
274     if (len == 3 && _isNumericString(s, len)) {
275         return TRUE;
276     }
277     return FALSE;
278 }
279 
280 static UBool
_isVariantSubtag(const char * s,int32_t len)281 _isVariantSubtag(const char* s, int32_t len) {
282     /*
283      * variant       = 5*8alphanum         ; registered variants
284      *               / (DIGIT 3alphanum)
285      */
286     if (len < 0) {
287         len = (int32_t)uprv_strlen(s);
288     }
289     if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
290         return TRUE;
291     }
292     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
293         return TRUE;
294     }
295     return FALSE;
296 }
297 
298 static UBool
_isPrivateuseVariantSubtag(const char * s,int32_t len)299 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
300     /*
301      * variant       = 1*8alphanum         ; registered variants
302      *               / (DIGIT 3alphanum)
303      */
304     if (len < 0) {
305         len = (int32_t)uprv_strlen(s);
306     }
307     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
308         return TRUE;
309     }
310     return FALSE;
311 }
312 
313 static UBool
_isExtensionSingleton(const char * s,int32_t len)314 _isExtensionSingleton(const char* s, int32_t len) {
315     /*
316      * extension     = singleton 1*("-" (2*8alphanum))
317      */
318     if (len < 0) {
319         len = (int32_t)uprv_strlen(s);
320     }
321     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
322         return TRUE;
323     }
324     return FALSE;
325 }
326 
327 static UBool
_isExtensionSubtag(const char * s,int32_t len)328 _isExtensionSubtag(const char* s, int32_t len) {
329     /*
330      * extension     = singleton 1*("-" (2*8alphanum))
331      */
332     if (len < 0) {
333         len = (int32_t)uprv_strlen(s);
334     }
335     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
336         return TRUE;
337     }
338     return FALSE;
339 }
340 
341 static UBool
_isExtensionSubtags(const char * s,int32_t len)342 _isExtensionSubtags(const char* s, int32_t len) {
343     const char *p = s;
344     const char *pSubtag = NULL;
345 
346     if (len < 0) {
347         len = (int32_t)uprv_strlen(s);
348     }
349 
350     while ((p - s) < len) {
351         if (*p == SEP) {
352             if (pSubtag == NULL) {
353                 return FALSE;
354             }
355             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
356                 return FALSE;
357             }
358             pSubtag = NULL;
359         } else if (pSubtag == NULL) {
360             pSubtag = p;
361         }
362         p++;
363     }
364     if (pSubtag == NULL) {
365         return FALSE;
366     }
367     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
368 }
369 
370 static UBool
_isPrivateuseValueSubtag(const char * s,int32_t len)371 _isPrivateuseValueSubtag(const char* s, int32_t len) {
372     /*
373      * privateuse    = "x" 1*("-" (1*8alphanum))
374      */
375     if (len < 0) {
376         len = (int32_t)uprv_strlen(s);
377     }
378     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
379         return TRUE;
380     }
381     return FALSE;
382 }
383 
384 static UBool
_isPrivateuseValueSubtags(const char * s,int32_t len)385 _isPrivateuseValueSubtags(const char* s, int32_t len) {
386     const char *p = s;
387     const char *pSubtag = NULL;
388 
389     if (len < 0) {
390         len = (int32_t)uprv_strlen(s);
391     }
392 
393     while ((p - s) < len) {
394         if (*p == SEP) {
395             if (pSubtag == NULL) {
396                 return FALSE;
397             }
398             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
399                 return FALSE;
400             }
401             pSubtag = NULL;
402         } else if (pSubtag == NULL) {
403             pSubtag = p;
404         }
405         p++;
406     }
407     if (pSubtag == NULL) {
408         return FALSE;
409     }
410     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
411 }
412 
413 static UBool
_isLDMLKey(const char * s,int32_t len)414 _isLDMLKey(const char* s, int32_t len) {
415     if (len < 0) {
416         len = (int32_t)uprv_strlen(s);
417     }
418     if (len == 2 && _isAlphaNumericString(s, len)) {
419         return TRUE;
420     }
421     return FALSE;
422 }
423 
424 static UBool
_isLDMLType(const char * s,int32_t len)425 _isLDMLType(const char* s, int32_t len) {
426     if (len < 0) {
427         len = (int32_t)uprv_strlen(s);
428     }
429     if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
430         return TRUE;
431     }
432     return FALSE;
433 }
434 
435 /*
436 * -------------------------------------------------
437 *
438 * Helper functions
439 *
440 * -------------------------------------------------
441 */
442 
443 static UBool
_addVariantToList(VariantListEntry ** first,VariantListEntry * var)444 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
445     UBool bAdded = TRUE;
446 
447     if (*first == NULL) {
448         var->next = NULL;
449         *first = var;
450     } else {
451         VariantListEntry *prev, *cur;
452         int32_t cmp;
453 
454         /* variants order should be preserved */
455         prev = NULL;
456         cur = *first;
457         while (TRUE) {
458             if (cur == NULL) {
459                 prev->next = var;
460                 var->next = NULL;
461                 break;
462             }
463 
464             /* Checking for duplicate variant */
465             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
466             if (cmp == 0) {
467                 /* duplicated variant */
468                 bAdded = FALSE;
469                 break;
470             }
471             prev = cur;
472             cur = cur->next;
473         }
474     }
475 
476     return bAdded;
477 }
478 
479 static UBool
_addAttributeToList(AttributeListEntry ** first,AttributeListEntry * attr)480 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
481     UBool bAdded = TRUE;
482 
483     if (*first == NULL) {
484         attr->next = NULL;
485         *first = attr;
486     } else {
487         AttributeListEntry *prev, *cur;
488         int32_t cmp;
489 
490         /* reorder variants in alphabetical order */
491         prev = NULL;
492         cur = *first;
493         while (TRUE) {
494             if (cur == NULL) {
495                 prev->next = attr;
496                 attr->next = NULL;
497                 break;
498             }
499             cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
500             if (cmp < 0) {
501                 if (prev == NULL) {
502                     *first = attr;
503                 } else {
504                     prev->next = attr;
505                 }
506                 attr->next = cur;
507                 break;
508             }
509             if (cmp == 0) {
510                 /* duplicated variant */
511                 bAdded = FALSE;
512                 break;
513             }
514             prev = cur;
515             cur = cur->next;
516         }
517     }
518 
519     return bAdded;
520 }
521 
522 
523 static UBool
_addExtensionToList(ExtensionListEntry ** first,ExtensionListEntry * ext,UBool localeToBCP)524 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
525     UBool bAdded = TRUE;
526 
527     if (*first == NULL) {
528         ext->next = NULL;
529         *first = ext;
530     } else {
531         ExtensionListEntry *prev, *cur;
532         int32_t cmp;
533 
534         /* reorder variants in alphabetical order */
535         prev = NULL;
536         cur = *first;
537         while (TRUE) {
538             if (cur == NULL) {
539                 prev->next = ext;
540                 ext->next = NULL;
541                 break;
542             }
543             if (localeToBCP) {
544                 /* special handling for locale to bcp conversion */
545                 int32_t len, curlen;
546 
547                 len = (int32_t)uprv_strlen(ext->key);
548                 curlen = (int32_t)uprv_strlen(cur->key);
549 
550                 if (len == 1 && curlen == 1) {
551                     if (*(ext->key) == *(cur->key)) {
552                         cmp = 0;
553                     } else if (*(ext->key) == PRIVATEUSE) {
554                         cmp = 1;
555                     } else if (*(cur->key) == PRIVATEUSE) {
556                         cmp = -1;
557                     } else {
558                         cmp = *(ext->key) - *(cur->key);
559                     }
560                 } else if (len == 1) {
561                     cmp = *(ext->key) - LDMLEXT;
562                 } else if (curlen == 1) {
563                     cmp = LDMLEXT - *(cur->key);
564                 } else {
565                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
566                 }
567             } else {
568                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
569             }
570             if (cmp < 0) {
571                 if (prev == NULL) {
572                     *first = ext;
573                 } else {
574                     prev->next = ext;
575                 }
576                 ext->next = cur;
577                 break;
578             }
579             if (cmp == 0) {
580                 /* duplicated extension key */
581                 bAdded = FALSE;
582                 break;
583             }
584             prev = cur;
585             cur = cur->next;
586         }
587     }
588 
589     return bAdded;
590 }
591 
592 static void
_initializeULanguageTag(ULanguageTag * langtag)593 _initializeULanguageTag(ULanguageTag* langtag) {
594     int32_t i;
595 
596     langtag->buf = NULL;
597 
598     langtag->language = EMPTY;
599     for (i = 0; i < MAXEXTLANG; i++) {
600         langtag->extlang[i] = NULL;
601     }
602 
603     langtag->script = EMPTY;
604     langtag->region = EMPTY;
605 
606     langtag->variants = NULL;
607     langtag->extensions = NULL;
608 
609     langtag->grandfathered = EMPTY;
610     langtag->privateuse = EMPTY;
611 }
612 
613 #define KEYTYPEDATA     "keyTypeData"
614 #define KEYMAP          "keyMap"
615 #define TYPEMAP         "typeMap"
616 #define TYPEALIAS       "typeAlias"
617 #define MAX_BCP47_SUBTAG_LEN    9   /* including null terminator */
618 #define MAX_LDML_KEY_LEN        22
619 #define MAX_LDML_TYPE_LEN       32
620 
621 static int32_t
_ldmlKeyToBCP47(const char * key,int32_t keyLen,char * bcpKey,int32_t bcpKeyCapacity,UErrorCode * status)622 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
623                 char* bcpKey, int32_t bcpKeyCapacity,
624                 UErrorCode *status) {
625     UResourceBundle *rb;
626     char keyBuf[MAX_LDML_KEY_LEN];
627     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
628     int32_t resultLen = 0;
629     int32_t i;
630     UErrorCode tmpStatus = U_ZERO_ERROR;
631     const UChar *uBcpKey;
632     int32_t bcpKeyLen;
633 
634     if (keyLen < 0) {
635         keyLen = (int32_t)uprv_strlen(key);
636     }
637 
638     if (keyLen >= sizeof(keyBuf)) {
639         /* no known valid LDML key exceeding 21 */
640         *status = U_ILLEGAL_ARGUMENT_ERROR;
641         return 0;
642     }
643 
644     uprv_memcpy(keyBuf, key, keyLen);
645     keyBuf[keyLen] = 0;
646 
647     /* to lower case */
648     for (i = 0; i < keyLen; i++) {
649         keyBuf[i] = uprv_tolower(keyBuf[i]);
650     }
651 
652     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
653     ures_getByKey(rb, KEYMAP, rb, status);
654 
655     if (U_FAILURE(*status)) {
656         ures_close(rb);
657         return 0;
658     }
659 
660     uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
661     if (U_SUCCESS(tmpStatus)) {
662         u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
663         bcpKeyBuf[bcpKeyLen] = 0;
664         resultLen = bcpKeyLen;
665     } else {
666         if (_isLDMLKey(key, keyLen)) {
667             uprv_memcpy(bcpKeyBuf, key, keyLen);
668             bcpKeyBuf[keyLen] = 0;
669             resultLen = keyLen;
670         } else {
671             /* mapping not availabe */
672             *status = U_ILLEGAL_ARGUMENT_ERROR;
673         }
674     }
675     ures_close(rb);
676 
677     if (U_FAILURE(*status)) {
678         return 0;
679     }
680 
681     uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
682     return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
683 }
684 
685 static int32_t
_bcp47ToLDMLKey(const char * bcpKey,int32_t bcpKeyLen,char * key,int32_t keyCapacity,UErrorCode * status)686 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
687                 char* key, int32_t keyCapacity,
688                 UErrorCode *status) {
689     UResourceBundle *rb;
690     char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
691     int32_t resultLen = 0;
692     int32_t i;
693     const char *resKey = NULL;
694     UResourceBundle *mapData;
695 
696     if (bcpKeyLen < 0) {
697         bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
698     }
699 
700     if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
701         *status = U_ILLEGAL_ARGUMENT_ERROR;
702         return 0;
703     }
704 
705     uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
706     bcpKeyBuf[bcpKeyLen] = 0;
707 
708     /* to lower case */
709     for (i = 0; i < bcpKeyLen; i++) {
710         bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
711     }
712 
713     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
714     ures_getByKey(rb, KEYMAP, rb, status);
715     if (U_FAILURE(*status)) {
716         ures_close(rb);
717         return 0;
718     }
719 
720     mapData = ures_getNextResource(rb, NULL, status);
721     while (U_SUCCESS(*status)) {
722         const UChar *uBcpKey;
723         char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
724         int32_t tmpBcpKeyLen;
725 
726         uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
727         if (U_FAILURE(*status)) {
728             break;
729         }
730         u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
731         tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
732         if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
733             /* found a matching BCP47 key */
734             resKey = ures_getKey(mapData);
735             resultLen = (int32_t)uprv_strlen(resKey);
736             break;
737         }
738         if (!ures_hasNext(rb)) {
739             break;
740         }
741         ures_getNextResource(rb, mapData, status);
742     }
743     ures_close(mapData);
744     ures_close(rb);
745 
746     if (U_FAILURE(*status)) {
747         return 0;
748     }
749 
750     if (resKey == NULL) {
751         resKey = bcpKeyBuf;
752         resultLen = bcpKeyLen;
753     }
754 
755     uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
756     return u_terminateChars(key, keyCapacity, resultLen, status);
757 }
758 
759 static int32_t
_ldmlTypeToBCP47(const char * key,int32_t keyLen,const char * type,int32_t typeLen,char * bcpType,int32_t bcpTypeCapacity,UErrorCode * status)760 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
761                  const char* type, int32_t typeLen,
762                  char* bcpType, int32_t bcpTypeCapacity,
763                  UErrorCode *status) {
764     UResourceBundle *rb, *keyTypeData, *typeMapForKey;
765     char keyBuf[MAX_LDML_KEY_LEN];
766     char typeBuf[MAX_LDML_TYPE_LEN];
767     char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
768     int32_t resultLen = 0;
769     int32_t i;
770     UErrorCode tmpStatus = U_ZERO_ERROR;
771     const UChar *uBcpType, *uCanonicalType;
772     int32_t bcpTypeLen, canonicalTypeLen;
773     UBool isTimezone = FALSE;
774 
775     if (keyLen < 0) {
776         keyLen = (int32_t)uprv_strlen(key);
777     }
778     if (keyLen >= sizeof(keyBuf)) {
779         /* no known valid LDML key exceeding 21 */
780         *status = U_ILLEGAL_ARGUMENT_ERROR;
781         return 0;
782     }
783     uprv_memcpy(keyBuf, key, keyLen);
784     keyBuf[keyLen] = 0;
785 
786     /* to lower case */
787     for (i = 0; i < keyLen; i++) {
788         keyBuf[i] = uprv_tolower(keyBuf[i]);
789     }
790     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
791         isTimezone = TRUE;
792     }
793 
794     if (typeLen < 0) {
795         typeLen = (int32_t)uprv_strlen(type);
796     }
797     if (typeLen >= sizeof(typeBuf)) {
798         *status = U_ILLEGAL_ARGUMENT_ERROR;
799         return 0;
800     }
801 
802     if (isTimezone) {
803         /* replace '/' with ':' */
804         for (i = 0; i < typeLen; i++) {
805             if (*(type + i) == '/') {
806                 typeBuf[i] = ':';
807             } else {
808                 typeBuf[i] = *(type + i);
809             }
810         }
811         typeBuf[typeLen] = 0;
812         type = &typeBuf[0];
813     }
814 
815     keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
816     rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
817     if (U_FAILURE(*status)) {
818         ures_close(rb);
819         ures_close(keyTypeData);
820         return 0;
821     }
822 
823     typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
824     uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
825     if (U_SUCCESS(tmpStatus)) {
826         u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
827         resultLen = bcpTypeLen;
828     } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
829         /* is this type alias? */
830         tmpStatus = U_ZERO_ERROR;
831         ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
832         ures_getByKey(rb, keyBuf, rb, &tmpStatus);
833         uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
834         if (U_SUCCESS(tmpStatus)) {
835             u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
836             if (isTimezone) {
837                 /* replace '/' with ':' */
838                 for (i = 0; i < canonicalTypeLen; i++) {
839                     if (typeBuf[i] == '/') {
840                         typeBuf[i] = ':';
841                     }
842                 }
843             }
844             typeBuf[canonicalTypeLen] = 0;
845 
846             /* look up the canonical type */
847             uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
848             if (U_SUCCESS(tmpStatus)) {
849                 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
850                 resultLen = bcpTypeLen;
851             }
852         }
853         if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
854             if (_isLDMLType(type, typeLen)) {
855                 uprv_memcpy(bcpTypeBuf, type, typeLen);
856                 resultLen = typeLen;
857             } else {
858                 /* mapping not availabe */
859                 *status = U_ILLEGAL_ARGUMENT_ERROR;
860             }
861         }
862     } else {
863         *status = tmpStatus;
864     }
865     ures_close(rb);
866     ures_close(typeMapForKey);
867     ures_close(keyTypeData);
868 
869     if (U_FAILURE(*status)) {
870         return 0;
871     }
872 
873     uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
874     return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
875 }
876 
877 static int32_t
_bcp47ToLDMLType(const char * key,int32_t keyLen,const char * bcpType,int32_t bcpTypeLen,char * type,int32_t typeCapacity,UErrorCode * status)878 _bcp47ToLDMLType(const char* key, int32_t keyLen,
879                  const char* bcpType, int32_t bcpTypeLen,
880                  char* type, int32_t typeCapacity,
881                  UErrorCode *status) {
882     UResourceBundle *rb;
883     char keyBuf[MAX_LDML_KEY_LEN];
884     char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
885     int32_t resultLen = 0;
886     int32_t i, typeSize;
887     const char *resType = NULL;
888     UResourceBundle *mapData;
889     UErrorCode tmpStatus = U_ZERO_ERROR;
890     int32_t copyLen;
891 
892     if (keyLen < 0) {
893         keyLen = (int32_t)uprv_strlen(key);
894     }
895 
896     if (keyLen >= sizeof(keyBuf)) {
897         /* no known valid LDML key exceeding 21 */
898         *status = U_ILLEGAL_ARGUMENT_ERROR;
899         return 0;
900     }
901     uprv_memcpy(keyBuf, key, keyLen);
902     keyBuf[keyLen] = 0;
903 
904     /* to lower case */
905     for (i = 0; i < keyLen; i++) {
906         keyBuf[i] = uprv_tolower(keyBuf[i]);
907     }
908 
909 
910     if (bcpTypeLen < 0) {
911         bcpTypeLen = (int32_t)uprv_strlen(bcpType);
912     }
913 
914     typeSize = 0;
915     for (i = 0; i < bcpTypeLen; i++) {
916         if (bcpType[i] == SEP) {
917             if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
918                 *status = U_ILLEGAL_ARGUMENT_ERROR;
919                 return 0;
920             }
921             typeSize = 0;
922         } else {
923             typeSize++;
924         }
925     }
926 
927     uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
928     bcpTypeBuf[bcpTypeLen] = 0;
929 
930     /* to lower case */
931     for (i = 0; i < bcpTypeLen; i++) {
932         bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
933     }
934 
935     rb = ures_openDirect(NULL, KEYTYPEDATA, status);
936     ures_getByKey(rb, TYPEMAP, rb, status);
937     if (U_FAILURE(*status)) {
938         ures_close(rb);
939         return 0;
940     }
941 
942     ures_getByKey(rb, keyBuf, rb, &tmpStatus);
943     mapData = ures_getNextResource(rb, NULL, &tmpStatus);
944     while (U_SUCCESS(tmpStatus)) {
945         const UChar *uBcpType;
946         char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
947         int32_t tmpBcpTypeLen;
948 
949         uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
950         if (U_FAILURE(tmpStatus)) {
951             break;
952         }
953         u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
954         tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
955         if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
956             /* found a matching BCP47 type */
957             resType = ures_getKey(mapData);
958             resultLen = (int32_t)uprv_strlen(resType);
959             break;
960         }
961         if (!ures_hasNext(rb)) {
962             break;
963         }
964         ures_getNextResource(rb, mapData, &tmpStatus);
965     }
966     ures_close(mapData);
967     ures_close(rb);
968 
969     if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
970         *status = tmpStatus;
971         return 0;
972     }
973 
974     if (resType == NULL) {
975         resType = bcpTypeBuf;
976         resultLen = bcpTypeLen;
977     }
978 
979     copyLen = uprv_min(resultLen, typeCapacity);
980     uprv_memcpy(type, resType, copyLen);
981 
982     if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
983         for (i = 0; i < copyLen; i++) {
984             if (*(type + i) == ':') {
985                 *(type + i) = '/';
986             }
987         }
988     }
989 
990     return u_terminateChars(type, typeCapacity, resultLen, status);
991 }
992 
993 static int32_t
_appendLanguageToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)994 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
995     char buf[ULOC_LANG_CAPACITY];
996     UErrorCode tmpStatus = U_ZERO_ERROR;
997     int32_t len, i;
998     int32_t reslen = 0;
999 
1000     if (U_FAILURE(*status)) {
1001         return 0;
1002     }
1003 
1004     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
1005     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1006         if (strict) {
1007             *status = U_ILLEGAL_ARGUMENT_ERROR;
1008             return 0;
1009         }
1010         len = 0;
1011     }
1012 
1013     /* Note: returned language code is in lower case letters */
1014 
1015     if (len == 0) {
1016         if (reslen < capacity) {
1017             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1018         }
1019         reslen += LANG_UND_LEN;
1020     } else if (!_isLanguageSubtag(buf, len)) {
1021             /* invalid language code */
1022         if (strict) {
1023             *status = U_ILLEGAL_ARGUMENT_ERROR;
1024             return 0;
1025         }
1026         if (reslen < capacity) {
1027             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
1028         }
1029         reslen += LANG_UND_LEN;
1030     } else {
1031         /* resolve deprecated */
1032         for (i = 0; i < LENGTHOF(DEPRECATEDLANGS); i += 2) {
1033             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
1034                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
1035                 len = (int32_t)uprv_strlen(buf);
1036                 break;
1037             }
1038         }
1039         if (reslen < capacity) {
1040             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1041         }
1042         reslen += len;
1043     }
1044     u_terminateChars(appendAt, capacity, reslen, status);
1045     return reslen;
1046 }
1047 
1048 static int32_t
_appendScriptToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)1049 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1050     char buf[ULOC_SCRIPT_CAPACITY];
1051     UErrorCode tmpStatus = U_ZERO_ERROR;
1052     int32_t len;
1053     int32_t reslen = 0;
1054 
1055     if (U_FAILURE(*status)) {
1056         return 0;
1057     }
1058 
1059     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
1060     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1061         if (strict) {
1062             *status = U_ILLEGAL_ARGUMENT_ERROR;
1063         }
1064         return 0;
1065     }
1066 
1067     if (len > 0) {
1068         if (!_isScriptSubtag(buf, len)) {
1069             /* invalid script code */
1070             if (strict) {
1071                 *status = U_ILLEGAL_ARGUMENT_ERROR;
1072             }
1073             return 0;
1074         } else {
1075             if (reslen < capacity) {
1076                 *(appendAt + reslen) = SEP;
1077             }
1078             reslen++;
1079 
1080             if (reslen < capacity) {
1081                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1082             }
1083             reslen += len;
1084         }
1085     }
1086     u_terminateChars(appendAt, capacity, reslen, status);
1087     return reslen;
1088 }
1089 
1090 static int32_t
_appendRegionToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)1091 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
1092     char buf[ULOC_COUNTRY_CAPACITY];
1093     UErrorCode tmpStatus = U_ZERO_ERROR;
1094     int32_t len;
1095     int32_t reslen = 0;
1096 
1097     if (U_FAILURE(*status)) {
1098         return 0;
1099     }
1100 
1101     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
1102     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1103         if (strict) {
1104             *status = U_ILLEGAL_ARGUMENT_ERROR;
1105         }
1106         return 0;
1107     }
1108 
1109     if (len > 0) {
1110         if (!_isRegionSubtag(buf, len)) {
1111             /* invalid region code */
1112             if (strict) {
1113                 *status = U_ILLEGAL_ARGUMENT_ERROR;
1114             }
1115             return 0;
1116         } else {
1117             if (reslen < capacity) {
1118                 *(appendAt + reslen) = SEP;
1119             }
1120             reslen++;
1121 
1122             if (reslen < capacity) {
1123                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
1124             }
1125             reslen += len;
1126         }
1127     }
1128     u_terminateChars(appendAt, capacity, reslen, status);
1129     return reslen;
1130 }
1131 
1132 static int32_t
_appendVariantsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool * hadPosix,UErrorCode * status)1133 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
1134     char buf[ULOC_FULLNAME_CAPACITY];
1135     UErrorCode tmpStatus = U_ZERO_ERROR;
1136     int32_t len, i;
1137     int32_t reslen = 0;
1138 
1139     if (U_FAILURE(*status)) {
1140         return 0;
1141     }
1142 
1143     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1144     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1145         if (strict) {
1146             *status = U_ILLEGAL_ARGUMENT_ERROR;
1147         }
1148         return 0;
1149     }
1150 
1151     if (len > 0) {
1152         char *p, *pVar;
1153         UBool bNext = TRUE;
1154         VariantListEntry *var;
1155         VariantListEntry *varFirst = NULL;
1156 
1157         pVar = NULL;
1158         p = buf;
1159         while (bNext) {
1160             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1161                 if (*p == 0) {
1162                     bNext = FALSE;
1163                 } else {
1164                     *p = 0; /* terminate */
1165                 }
1166                 if (pVar == NULL) {
1167                     if (strict) {
1168                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1169                         break;
1170                     }
1171                     /* ignore empty variant */
1172                 } else {
1173                     /* ICU uses upper case letters for variants, but
1174                        the canonical format is lowercase in BCP47 */
1175                     for (i = 0; *(pVar + i) != 0; i++) {
1176                         *(pVar + i) = uprv_tolower(*(pVar + i));
1177                     }
1178 
1179                     /* validate */
1180                     if (_isVariantSubtag(pVar, -1)) {
1181                         if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
1182                             /* emit the variant to the list */
1183                             var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1184                             if (var == NULL) {
1185                                 *status = U_MEMORY_ALLOCATION_ERROR;
1186                                 break;
1187                             }
1188                             var->variant = pVar;
1189                             if (!_addVariantToList(&varFirst, var)) {
1190                                 /* duplicated variant */
1191                                 uprv_free(var);
1192                                 if (strict) {
1193                                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1194                                     break;
1195                                 }
1196                             }
1197                         } else {
1198                             /* Special handling for POSIX variant, need to remember that we had it and then */
1199                             /* treat it like an extension later. */
1200                             *hadPosix = TRUE;
1201                         }
1202                     } else if (strict) {
1203                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1204                         break;
1205                     } else if (_isPrivateuseValueSubtag(pVar, -1)) {
1206                         /* Handle private use subtags separately */
1207                         break;
1208                     }
1209                 }
1210                 /* reset variant starting position */
1211                 pVar = NULL;
1212             } else if (pVar == NULL) {
1213                 pVar = p;
1214             }
1215             p++;
1216         }
1217 
1218         if (U_SUCCESS(*status)) {
1219             if (varFirst != NULL) {
1220                 int32_t varLen;
1221 
1222                 /* write out validated/normalized variants to the target */
1223                 var = varFirst;
1224                 while (var != NULL) {
1225                     if (reslen < capacity) {
1226                         *(appendAt + reslen) = SEP;
1227                     }
1228                     reslen++;
1229                     varLen = (int32_t)uprv_strlen(var->variant);
1230                     if (reslen < capacity) {
1231                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
1232                     }
1233                     reslen += varLen;
1234                     var = var->next;
1235                 }
1236             }
1237         }
1238 
1239         /* clean up */
1240         var = varFirst;
1241         while (var != NULL) {
1242             VariantListEntry *tmpVar = var->next;
1243             uprv_free(var);
1244             var = tmpVar;
1245         }
1246 
1247         if (U_FAILURE(*status)) {
1248             return 0;
1249         }
1250     }
1251 
1252     u_terminateChars(appendAt, capacity, reslen, status);
1253     return reslen;
1254 }
1255 
1256 static int32_t
_appendKeywordsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1257 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1258     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1259     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
1260     int32_t attrBufLength = 0;
1261     UBool isAttribute = FALSE;
1262     UEnumeration *keywordEnum = NULL;
1263     int32_t reslen = 0;
1264 
1265     keywordEnum = uloc_openKeywords(localeID, status);
1266     if (U_FAILURE(*status) && !hadPosix) {
1267         uenum_close(keywordEnum);
1268         return 0;
1269     }
1270     if (keywordEnum != NULL || hadPosix) {
1271         /* reorder extensions */
1272         int32_t len;
1273         const char *key;
1274         ExtensionListEntry *firstExt = NULL;
1275         ExtensionListEntry *ext;
1276         AttributeListEntry *firstAttr = NULL;
1277         AttributeListEntry *attr;
1278         char *attrValue;
1279         char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1280         char *pExtBuf = extBuf;
1281         int32_t extBufCapacity = sizeof(extBuf);
1282         const char *bcpKey, *bcpValue;
1283         UErrorCode tmpStatus = U_ZERO_ERROR;
1284         int32_t keylen;
1285         UBool isLDMLKeyword;
1286 
1287         while (TRUE) {
1288             isAttribute = FALSE;
1289             key = uenum_next(keywordEnum, NULL, status);
1290             if (key == NULL) {
1291                 break;
1292             }
1293             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
1294             if (U_FAILURE(tmpStatus)) {
1295                 if (strict) {
1296                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1297                     break;
1298                 }
1299                 /* ignore this keyword */
1300                 tmpStatus = U_ZERO_ERROR;
1301                 continue;
1302             }
1303 
1304             keylen = (int32_t)uprv_strlen(key);
1305             isLDMLKeyword = (keylen > 1);
1306 
1307             /* special keyword used for representing Unicode locale attributes */
1308             if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
1309                 isAttribute = TRUE;
1310                 if (len > 0) {
1311                     int32_t i = 0;
1312                     while (TRUE) {
1313                         attrBufLength = 0;
1314                         for (; i < len; i++) {
1315                             if (buf[i] != '-') {
1316                                 attrBuf[attrBufLength++] = buf[i];
1317                             } else {
1318                                 i++;
1319                                 break;
1320                             }
1321                         }
1322                         if (attrBufLength > 0) {
1323                             attrBuf[attrBufLength] = 0;
1324 
1325                         } else if (i >= len){
1326                             break;
1327                         }
1328 
1329                         /* create AttributeListEntry */
1330                         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1331                         if (attr == NULL) {
1332                             *status = U_MEMORY_ALLOCATION_ERROR;
1333                             break;
1334                         }
1335                         attrValue = (char*)uprv_malloc(attrBufLength + 1);
1336                         if (attrValue == NULL) {
1337                             *status = U_MEMORY_ALLOCATION_ERROR;
1338                             break;
1339                         }
1340                         uprv_strcpy(attrValue, attrBuf);
1341                         attr->attribute = attrValue;
1342 
1343                         if (!_addAttributeToList(&firstAttr, attr)) {
1344                             uprv_free(attr);
1345                             uprv_free(attrValue);
1346                             if (strict) {
1347                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
1348                                 break;
1349                             }
1350                         }
1351                     }
1352                 }
1353             } else if (isLDMLKeyword) {
1354                 int32_t modKeyLen;
1355 
1356                 /* transform key and value to bcp47 style */
1357                 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
1358                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1359                     if (strict) {
1360                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1361                         break;
1362                     }
1363                     tmpStatus = U_ZERO_ERROR;
1364                     continue;
1365                 }
1366 
1367                 bcpKey = pExtBuf;
1368                 pExtBuf += (modKeyLen + 1);
1369                 extBufCapacity -= (modKeyLen + 1);
1370 
1371                 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
1372                 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1373                     if (strict) {
1374                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1375                         break;
1376                     }
1377                     tmpStatus = U_ZERO_ERROR;
1378                     continue;
1379                 }
1380                 bcpValue = pExtBuf;
1381                 pExtBuf += (len + 1);
1382                 extBufCapacity -= (len + 1);
1383             } else {
1384                 if (*key == PRIVATEUSE) {
1385                     if (!_isPrivateuseValueSubtags(buf, len)) {
1386                         if (strict) {
1387                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1388                             break;
1389                         }
1390                         continue;
1391                     }
1392                 } else {
1393                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1394                         if (strict) {
1395                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1396                             break;
1397                         }
1398                         continue;
1399                     }
1400                 }
1401                 bcpKey = key;
1402                 if ((len + 1) < extBufCapacity) {
1403                     uprv_memcpy(pExtBuf, buf, len);
1404                     bcpValue = pExtBuf;
1405 
1406                     pExtBuf += len;
1407 
1408                     *pExtBuf = 0;
1409                     pExtBuf++;
1410 
1411                     extBufCapacity -= (len + 1);
1412                 } else {
1413                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1414                     break;
1415                 }
1416             }
1417 
1418             if (!isAttribute) {
1419                 /* create ExtensionListEntry */
1420                 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1421                 if (ext == NULL) {
1422                     *status = U_MEMORY_ALLOCATION_ERROR;
1423                     break;
1424                 }
1425                 ext->key = bcpKey;
1426                 ext->value = bcpValue;
1427 
1428                 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1429                     uprv_free(ext);
1430                     if (strict) {
1431                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1432                         break;
1433                     }
1434                 }
1435             }
1436         }
1437 
1438         /* Special handling for POSIX variant - add the keywords for POSIX */
1439         if (hadPosix) {
1440             /* create ExtensionListEntry for POSIX */
1441             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1442             if (ext == NULL) {
1443                 *status = U_MEMORY_ALLOCATION_ERROR;
1444                 goto cleanup;
1445             }
1446             ext->key = POSIX_KEY;
1447             ext->value = POSIX_VALUE;
1448 
1449             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1450                 uprv_free(ext);
1451             }
1452         }
1453 
1454         if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1455             UBool startLDMLExtension = FALSE;
1456 
1457             attr = firstAttr;
1458             ext = firstExt;
1459             do {
1460                 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
1461                    /* write LDML singleton extension */
1462                    if (reslen < capacity) {
1463                        *(appendAt + reslen) = SEP;
1464                    }
1465                    reslen++;
1466                    if (reslen < capacity) {
1467                        *(appendAt + reslen) = LDMLEXT;
1468                    }
1469                    reslen++;
1470 
1471                    startLDMLExtension = TRUE;
1472                 }
1473 
1474                 /* write out the sorted BCP47 attributes, extensions and private use */
1475                 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
1476                     if (reslen < capacity) {
1477                         *(appendAt + reslen) = SEP;
1478                     }
1479                     reslen++;
1480                     len = (int32_t)uprv_strlen(ext->key);
1481                     if (reslen < capacity) {
1482                         uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1483                     }
1484                     reslen += len;
1485                     if (reslen < capacity) {
1486                         *(appendAt + reslen) = SEP;
1487                     }
1488                     reslen++;
1489                     len = (int32_t)uprv_strlen(ext->value);
1490                     if (reslen < capacity) {
1491                         uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1492                     }
1493                     reslen += len;
1494 
1495                     ext = ext->next;
1496                 } else if (attr) {
1497                     /* write the value for the attributes */
1498                     if (reslen < capacity) {
1499                         *(appendAt + reslen) = SEP;
1500                     }
1501                     reslen++;
1502                     len = (int32_t)uprv_strlen(attr->attribute);
1503                     if (reslen < capacity) {
1504                         uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1505                     }
1506                     reslen += len;
1507 
1508                     attr = attr->next;
1509                 }
1510             } while (attr != NULL || ext != NULL);
1511         }
1512 cleanup:
1513         /* clean up */
1514         ext = firstExt;
1515         while (ext != NULL) {
1516             ExtensionListEntry *tmpExt = ext->next;
1517             uprv_free(ext);
1518             ext = tmpExt;
1519         }
1520 
1521         attr = firstAttr;
1522         while (attr != NULL) {
1523             AttributeListEntry *tmpAttr = attr->next;
1524             char *pValue = (char *)attr->attribute;
1525             uprv_free(pValue);
1526             uprv_free(attr);
1527             attr = tmpAttr;
1528         }
1529 
1530         uenum_close(keywordEnum);
1531 
1532         if (U_FAILURE(*status)) {
1533             return 0;
1534         }
1535     }
1536 
1537     return u_terminateChars(appendAt, capacity, reslen, status);
1538 }
1539 
1540 /**
1541  * Append keywords parsed from LDML extension value
1542  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1543  * Note: char* buf is used for storing keywords
1544  */
1545 static void
_appendLDMLExtensionAsKeywords(const char * ldmlext,ExtensionListEntry ** appendTo,char * buf,int32_t bufSize,UBool * posixVariant,UErrorCode * status)1546 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1547     const char *pTag;   /* beginning of current subtag */
1548     const char *pKwds;  /* beginning of key-type pairs */
1549     UBool variantExists = *posixVariant;
1550 
1551     ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
1552     ExtensionListEntry *kwd, *nextKwd;
1553 
1554     AttributeListEntry *attrFirst = NULL;   /* first attribute */
1555     AttributeListEntry *attr, *nextAttr;
1556 
1557     int32_t len;
1558     int32_t bufIdx = 0;
1559 
1560     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1561     int32_t attrBufIdx = 0;
1562 
1563     /* Reset the posixVariant value */
1564     *posixVariant = FALSE;
1565 
1566     pTag = ldmlext;
1567     pKwds = NULL;
1568 
1569     /* Iterate through u extension attributes */
1570     while (*pTag) {
1571         /* locate next separator char */
1572         for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1573 
1574         if (_isLDMLKey(pTag, len)) {
1575             pKwds = pTag;
1576             break;
1577         }
1578 
1579         /* add this attribute to the list */
1580         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1581         if (attr == NULL) {
1582             *status = U_MEMORY_ALLOCATION_ERROR;
1583             goto cleanup;
1584         }
1585 
1586         if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1587             uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1588             attrBuf[attrBufIdx + len] = 0;
1589             attr->attribute = &attrBuf[attrBufIdx];
1590             attrBufIdx += (len + 1);
1591         } else {
1592             *status = U_ILLEGAL_ARGUMENT_ERROR;
1593             goto cleanup;
1594         }
1595 
1596         if (!_addAttributeToList(&attrFirst, attr)) {
1597             *status = U_ILLEGAL_ARGUMENT_ERROR;
1598             uprv_free(attr);
1599             goto cleanup;
1600         }
1601 
1602         /* next tag */
1603         pTag += len;
1604         if (*pTag) {
1605             /* next to the separator */
1606             pTag++;
1607         }
1608     }
1609 
1610     if (attrFirst) {
1611         /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1612 
1613         if (attrBufIdx > bufSize) {
1614             /* attrBufIdx == <total length of attribute subtag> + 1 */
1615             *status = U_ILLEGAL_ARGUMENT_ERROR;
1616             goto cleanup;
1617         }
1618 
1619         kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1620         if (kwd == NULL) {
1621             *status = U_MEMORY_ALLOCATION_ERROR;
1622             goto cleanup;
1623         }
1624 
1625         kwd->key = LOCALE_ATTRIBUTE_KEY;
1626         kwd->value = buf;
1627 
1628         /* attribute subtags sorted in alphabetical order as type */
1629         attr = attrFirst;
1630         while (attr != NULL) {
1631             nextAttr = attr->next;
1632 
1633             /* buffer size check is done above */
1634             if (attr != attrFirst) {
1635                 *(buf + bufIdx) = SEP;
1636                 bufIdx++;
1637             }
1638 
1639             len = uprv_strlen(attr->attribute);
1640             uprv_memcpy(buf + bufIdx, attr->attribute, len);
1641             bufIdx += len;
1642 
1643             attr = nextAttr;
1644         }
1645         *(buf + bufIdx) = 0;
1646         bufIdx++;
1647 
1648         if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1649             *status = U_ILLEGAL_ARGUMENT_ERROR;
1650             uprv_free(kwd);
1651             goto cleanup;
1652         }
1653 
1654         /* once keyword entry is created, delete the attribute list */
1655         attr = attrFirst;
1656         while (attr != NULL) {
1657             nextAttr = attr->next;
1658             uprv_free(attr);
1659             attr = nextAttr;
1660         }
1661         attrFirst = NULL;
1662     }
1663 
1664     if (pKwds) {
1665         const char *pBcpKey = NULL;     /* u extenstion key subtag */
1666         const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
1667         int32_t bcpKeyLen = 0;
1668         int32_t bcpTypeLen = 0;
1669         UBool isDone = FALSE;
1670 
1671         pTag = pKwds;
1672         /* BCP47 representation of LDML key/type pairs */
1673         while (!isDone) {
1674             const char *pNextBcpKey = NULL;
1675             int32_t nextBcpKeyLen;
1676             UBool emitKeyword = FALSE;
1677 
1678             if (*pTag) {
1679                 /* locate next separator char */
1680                 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1681 
1682                 if (_isLDMLKey(pTag, len)) {
1683                     if (pBcpKey) {
1684                         emitKeyword = TRUE;
1685                         pNextBcpKey = pTag;
1686                         nextBcpKeyLen = len;
1687                     } else {
1688                         pBcpKey = pTag;
1689                         bcpKeyLen = len;
1690                     }
1691                 } else {
1692                     U_ASSERT(pBcpKey != NULL);
1693                     /* within LDML type subtags */
1694                     if (pBcpType) {
1695                         bcpTypeLen += (len + 1);
1696                     } else {
1697                         pBcpType = pTag;
1698                         bcpTypeLen = len;
1699                     }
1700                 }
1701 
1702                 /* next tag */
1703                 pTag += len;
1704                 if (*pTag) {
1705                     /* next to the separator */
1706                     pTag++;
1707                 }
1708             } else {
1709                 /* processing last one */
1710                 emitKeyword = TRUE;
1711                 isDone = TRUE;
1712             }
1713 
1714             if (emitKeyword) {
1715                 const char *pKey = NULL;    /* LDML key */
1716                 const char *pType = NULL;   /* LDML type */
1717 
1718                 U_ASSERT(pBcpKey != NULL);
1719 
1720                 /* u extension key to LDML key */
1721                 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1722                 if (U_FAILURE(*status)) {
1723                     goto cleanup;
1724                 }
1725                 pKey = buf + bufIdx;
1726                 bufIdx += len;
1727                 *(buf + bufIdx) = 0;
1728                 bufIdx++;
1729 
1730                 if (pBcpType) {
1731                     /* BCP type to locale type */
1732                     len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1733                     if (U_FAILURE(*status)) {
1734                         goto cleanup;
1735                     }
1736                     pType = buf + bufIdx;
1737                     bufIdx += len;
1738                     *(buf + bufIdx) = 0;
1739                     bufIdx++;
1740                 } else {
1741                     /* typeless - default type value is "yes" */
1742                     pType = LOCALE_TYPE_YES;
1743                 }
1744 
1745                 /* Special handling for u-va-posix, since we want to treat this as a variant,
1746                    not as a keyword */
1747                 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1748                     *posixVariant = TRUE;
1749                 } else {
1750                     /* create an ExtensionListEntry for this keyword */
1751                     kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1752                     if (kwd == NULL) {
1753                         *status = U_MEMORY_ALLOCATION_ERROR;
1754                         goto cleanup;
1755                     }
1756 
1757                     kwd->key = pKey;
1758                     kwd->value = pType;
1759 
1760                     if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1761                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1762                         uprv_free(kwd);
1763                         goto cleanup;
1764                     }
1765                 }
1766 
1767                 pBcpKey = pNextBcpKey;
1768                 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1769                 pBcpType = NULL;
1770                 bcpTypeLen = 0;
1771             }
1772         }
1773     }
1774 
1775     kwd = kwdFirst;
1776     while (kwd != NULL) {
1777         nextKwd = kwd->next;
1778         _addExtensionToList(appendTo, kwd, FALSE);
1779         kwd = nextKwd;
1780     }
1781 
1782     return;
1783 
1784 cleanup:
1785     attr = attrFirst;
1786     while (attr != NULL) {
1787         nextAttr = attr->next;
1788         uprv_free(attr);
1789         attr = nextAttr;
1790     }
1791 
1792     kwd = kwdFirst;
1793     while (kwd != NULL) {
1794         nextKwd = kwd->next;
1795         uprv_free(kwd);
1796         kwd = nextKwd;
1797     }
1798 }
1799 
1800 
1801 static int32_t
_appendKeywords(ULanguageTag * langtag,char * appendAt,int32_t capacity,UErrorCode * status)1802 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1803     int32_t reslen = 0;
1804     int32_t i, n;
1805     int32_t len;
1806     ExtensionListEntry *kwdFirst = NULL;
1807     ExtensionListEntry *kwd;
1808     const char *key, *type;
1809     char *kwdBuf = NULL;
1810     int32_t kwdBufLength = capacity;
1811     UBool posixVariant = FALSE;
1812 
1813     if (U_FAILURE(*status)) {
1814         return 0;
1815     }
1816 
1817     kwdBuf = (char*)uprv_malloc(kwdBufLength);
1818     if (kwdBuf == NULL) {
1819         *status = U_MEMORY_ALLOCATION_ERROR;
1820         return 0;
1821     }
1822 
1823     /* Determine if variants already exists */
1824     if (ultag_getVariantsSize(langtag)) {
1825         posixVariant = TRUE;
1826     }
1827 
1828     n = ultag_getExtensionsSize(langtag);
1829 
1830     /* resolve locale keywords and reordering keys */
1831     for (i = 0; i < n; i++) {
1832         key = ultag_getExtensionKey(langtag, i);
1833         type = ultag_getExtensionValue(langtag, i);
1834         if (*key == LDMLEXT) {
1835             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1836             if (U_FAILURE(*status)) {
1837                 break;
1838             }
1839         } else {
1840             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1841             if (kwd == NULL) {
1842                 *status = U_MEMORY_ALLOCATION_ERROR;
1843                 break;
1844             }
1845             kwd->key = key;
1846             kwd->value = type;
1847             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1848                 uprv_free(kwd);
1849                 *status = U_ILLEGAL_ARGUMENT_ERROR;
1850                 break;
1851             }
1852         }
1853     }
1854 
1855     if (U_SUCCESS(*status)) {
1856         type = ultag_getPrivateUse(langtag);
1857         if ((int32_t)uprv_strlen(type) > 0) {
1858             /* add private use as a keyword */
1859             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1860             if (kwd == NULL) {
1861                 *status = U_MEMORY_ALLOCATION_ERROR;
1862             } else {
1863                 kwd->key = PRIVATEUSE_KEY;
1864                 kwd->value = type;
1865                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1866                     uprv_free(kwd);
1867                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1868                 }
1869             }
1870         }
1871     }
1872 
1873     /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1874 
1875     if (U_SUCCESS(*status) && posixVariant) {
1876         len = (int32_t) uprv_strlen(_POSIX);
1877         if (reslen < capacity) {
1878             uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1879         }
1880         reslen += len;
1881     }
1882 
1883     if (U_SUCCESS(*status) && kwdFirst != NULL) {
1884         /* write out the sorted keywords */
1885         UBool firstValue = TRUE;
1886         kwd = kwdFirst;
1887         do {
1888             if (reslen < capacity) {
1889                 if (firstValue) {
1890                     /* '@' */
1891                     *(appendAt + reslen) = LOCALE_EXT_SEP;
1892                     firstValue = FALSE;
1893                 } else {
1894                     /* ';' */
1895                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1896                 }
1897             }
1898             reslen++;
1899 
1900             /* key */
1901             len = (int32_t)uprv_strlen(kwd->key);
1902             if (reslen < capacity) {
1903                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1904             }
1905             reslen += len;
1906 
1907             /* '=' */
1908             if (reslen < capacity) {
1909                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1910             }
1911             reslen++;
1912 
1913             /* type */
1914             len = (int32_t)uprv_strlen(kwd->value);
1915             if (reslen < capacity) {
1916                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1917             }
1918             reslen += len;
1919 
1920             kwd = kwd->next;
1921         } while (kwd);
1922     }
1923 
1924     /* clean up */
1925     kwd = kwdFirst;
1926     while (kwd != NULL) {
1927         ExtensionListEntry *tmpKwd = kwd->next;
1928         uprv_free(kwd);
1929         kwd = tmpKwd;
1930     }
1931 
1932     uprv_free(kwdBuf);
1933 
1934     if (U_FAILURE(*status)) {
1935         return 0;
1936     }
1937 
1938     return u_terminateChars(appendAt, capacity, reslen, status);
1939 }
1940 
1941 static int32_t
_appendPrivateuseToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1942 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1943     char buf[ULOC_FULLNAME_CAPACITY];
1944     char tmpAppend[ULOC_FULLNAME_CAPACITY];
1945     UErrorCode tmpStatus = U_ZERO_ERROR;
1946     int32_t len, i;
1947     int32_t reslen = 0;
1948 
1949     if (U_FAILURE(*status)) {
1950         return 0;
1951     }
1952 
1953     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1954     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1955         if (strict) {
1956             *status = U_ILLEGAL_ARGUMENT_ERROR;
1957         }
1958         return 0;
1959     }
1960 
1961     if (len > 0) {
1962         char *p, *pPriv;
1963         UBool bNext = TRUE;
1964         UBool firstValue = TRUE;
1965         UBool writeValue;
1966 
1967         pPriv = NULL;
1968         p = buf;
1969         while (bNext) {
1970             writeValue = FALSE;
1971             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1972                 if (*p == 0) {
1973                     bNext = FALSE;
1974                 } else {
1975                     *p = 0; /* terminate */
1976                 }
1977                 if (pPriv != NULL) {
1978                     /* Private use in the canonical format is lowercase in BCP47 */
1979                     for (i = 0; *(pPriv + i) != 0; i++) {
1980                         *(pPriv + i) = uprv_tolower(*(pPriv + i));
1981                     }
1982 
1983                     /* validate */
1984                     if (_isPrivateuseValueSubtag(pPriv, -1)) {
1985                         if (firstValue) {
1986                             if (!_isVariantSubtag(pPriv, -1)) {
1987                                 writeValue = TRUE;
1988                             }
1989                         } else {
1990                             writeValue = TRUE;
1991                         }
1992                     } else if (strict) {
1993                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1994                         break;
1995                     } else {
1996                         break;
1997                     }
1998 
1999                     if (writeValue) {
2000                         if (reslen < capacity) {
2001                             tmpAppend[reslen++] = SEP;
2002                         }
2003 
2004                         if (firstValue) {
2005                             if (reslen < capacity) {
2006                                 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
2007                             }
2008 
2009                             if (reslen < capacity) {
2010                                 tmpAppend[reslen++] = SEP;
2011                             }
2012 
2013                             len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
2014                             if (reslen < capacity) {
2015                                 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
2016                             }
2017                             reslen += len;
2018 
2019                             if (reslen < capacity) {
2020                                 tmpAppend[reslen++] = SEP;
2021                             }
2022 
2023                             firstValue = FALSE;
2024                         }
2025 
2026                         len = (int32_t)uprv_strlen(pPriv);
2027                         if (reslen < capacity) {
2028                             uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
2029                         }
2030                         reslen += len;
2031                     }
2032                 }
2033                 /* reset private use starting position */
2034                 pPriv = NULL;
2035             } else if (pPriv == NULL) {
2036                 pPriv = p;
2037             }
2038             p++;
2039         }
2040 
2041         if (U_FAILURE(*status)) {
2042             return 0;
2043         }
2044     }
2045 
2046     if (U_SUCCESS(*status)) {
2047         len = reslen;
2048         if (reslen < capacity) {
2049             uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
2050         }
2051     }
2052 
2053     u_terminateChars(appendAt, capacity, reslen, status);
2054 
2055     return reslen;
2056 }
2057 
2058 /*
2059 * -------------------------------------------------
2060 *
2061 * ultag_ functions
2062 *
2063 * -------------------------------------------------
2064 */
2065 
2066 /* Bit flags used by the parser */
2067 #define LANG 0x0001
2068 #define EXTL 0x0002
2069 #define SCRT 0x0004
2070 #define REGN 0x0008
2071 #define VART 0x0010
2072 #define EXTS 0x0020
2073 #define EXTV 0x0040
2074 #define PRIV 0x0080
2075 
2076 static ULanguageTag*
ultag_parse(const char * tag,int32_t tagLen,int32_t * parsedLen,UErrorCode * status)2077 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
2078     ULanguageTag *t;
2079     char *tagBuf;
2080     int16_t next;
2081     char *pSubtag, *pNext, *pLastGoodPosition;
2082     int32_t subtagLen;
2083     int32_t extlangIdx;
2084     ExtensionListEntry *pExtension;
2085     AttributeListEntry *pAttribute;
2086     char *pExtValueSubtag, *pExtValueSubtagEnd;
2087     int32_t i;
2088     UBool isLDMLExtension, privateuseVar = FALSE;
2089     int32_t grandfatheredLen = 0;
2090 
2091     if (parsedLen != NULL) {
2092         *parsedLen = 0;
2093     }
2094 
2095     if (U_FAILURE(*status)) {
2096         return NULL;
2097     }
2098 
2099     if (tagLen < 0) {
2100         tagLen = (int32_t)uprv_strlen(tag);
2101     }
2102 
2103     /* copy the entire string */
2104     tagBuf = (char*)uprv_malloc(tagLen + 1);
2105     if (tagBuf == NULL) {
2106         *status = U_MEMORY_ALLOCATION_ERROR;
2107         return NULL;
2108     }
2109     uprv_memcpy(tagBuf, tag, tagLen);
2110     *(tagBuf + tagLen) = 0;
2111 
2112     /* create a ULanguageTag */
2113     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
2114     if (t == NULL) {
2115         uprv_free(tagBuf);
2116         *status = U_MEMORY_ALLOCATION_ERROR;
2117         return NULL;
2118     }
2119     _initializeULanguageTag(t);
2120     t->buf = tagBuf;
2121 
2122     if (tagLen < MINLEN) {
2123         /* the input tag is too short - return empty ULanguageTag */
2124         return t;
2125     }
2126 
2127     /* check if the tag is grandfathered */
2128     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
2129         if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
2130             int32_t newTagLength;
2131 
2132             grandfatheredLen = tagLen;  /* back up for output parsedLen */
2133             newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
2134             if (tagLen < newTagLength) {
2135                 uprv_free(tagBuf);
2136                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
2137                 if (tagBuf == NULL) {
2138                     *status = U_MEMORY_ALLOCATION_ERROR;
2139                     return NULL;
2140                 }
2141                 t->buf = tagBuf;
2142                 tagLen = newTagLength;
2143             }
2144             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
2145             break;
2146         }
2147     }
2148 
2149     /*
2150      * langtag      =   language
2151      *                  ["-" script]
2152      *                  ["-" region]
2153      *                  *("-" variant)
2154      *                  *("-" extension)
2155      *                  ["-" privateuse]
2156      */
2157 
2158     next = LANG | PRIV;
2159     pNext = pLastGoodPosition = tagBuf;
2160     extlangIdx = 0;
2161     pExtension = NULL;
2162     pExtValueSubtag = NULL;
2163     pExtValueSubtagEnd = NULL;
2164     pAttribute = NULL;
2165     isLDMLExtension = FALSE;
2166 
2167     while (pNext) {
2168         char *pSep;
2169 
2170         pSubtag = pNext;
2171 
2172         /* locate next separator char */
2173         pSep = pSubtag;
2174         while (*pSep) {
2175             if (*pSep == SEP) {
2176                 break;
2177             }
2178             pSep++;
2179         }
2180         if (*pSep == 0) {
2181             /* last subtag */
2182             pNext = NULL;
2183         } else {
2184             pNext = pSep + 1;
2185         }
2186         subtagLen = (int32_t)(pSep - pSubtag);
2187 
2188         if (next & LANG) {
2189             if (_isLanguageSubtag(pSubtag, subtagLen)) {
2190                 *pSep = 0;  /* terminate */
2191                 t->language = T_CString_toLowerCase(pSubtag);
2192 
2193                 pLastGoodPosition = pSep;
2194                 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2195                 continue;
2196             }
2197         }
2198         if (next & EXTL) {
2199             if (_isExtlangSubtag(pSubtag, subtagLen)) {
2200                 *pSep = 0;
2201                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
2202 
2203                 pLastGoodPosition = pSep;
2204                 if (extlangIdx < 3) {
2205                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
2206                 } else {
2207                     next = SCRT | REGN | VART | EXTS | PRIV;
2208                 }
2209                 continue;
2210             }
2211         }
2212         if (next & SCRT) {
2213             if (_isScriptSubtag(pSubtag, subtagLen)) {
2214                 char *p = pSubtag;
2215 
2216                 *pSep = 0;
2217 
2218                 /* to title case */
2219                 *p = uprv_toupper(*p);
2220                 p++;
2221                 for (; *p; p++) {
2222                     *p = uprv_tolower(*p);
2223                 }
2224 
2225                 t->script = pSubtag;
2226 
2227                 pLastGoodPosition = pSep;
2228                 next = REGN | VART | EXTS | PRIV;
2229                 continue;
2230             }
2231         }
2232         if (next & REGN) {
2233             if (_isRegionSubtag(pSubtag, subtagLen)) {
2234                 *pSep = 0;
2235                 t->region = T_CString_toUpperCase(pSubtag);
2236 
2237                 pLastGoodPosition = pSep;
2238                 next = VART | EXTS | PRIV;
2239                 continue;
2240             }
2241         }
2242         if (next & VART) {
2243             if (_isVariantSubtag(pSubtag, subtagLen) ||
2244                (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
2245                 VariantListEntry *var;
2246                 UBool isAdded;
2247 
2248                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
2249                 if (var == NULL) {
2250                     *status = U_MEMORY_ALLOCATION_ERROR;
2251                     goto error;
2252                 }
2253                 *pSep = 0;
2254                 var->variant = T_CString_toUpperCase(pSubtag);
2255                 isAdded = _addVariantToList(&(t->variants), var);
2256                 if (!isAdded) {
2257                     /* duplicated variant entry */
2258                     uprv_free(var);
2259                     break;
2260                 }
2261                 pLastGoodPosition = pSep;
2262                 next = VART | EXTS | PRIV;
2263                 continue;
2264             }
2265         }
2266         if (next & EXTS) {
2267             if (_isExtensionSingleton(pSubtag, subtagLen)) {
2268                 if (pExtension != NULL) {
2269                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2270                         /* the previous extension is incomplete */
2271                         uprv_free(pExtension);
2272                         pExtension = NULL;
2273                         break;
2274                     }
2275 
2276                     /* terminate the previous extension value */
2277                     *pExtValueSubtagEnd = 0;
2278                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2279 
2280                     /* insert the extension to the list */
2281                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2282                         pLastGoodPosition = pExtValueSubtagEnd;
2283                     } else {
2284                         /* stop parsing here */
2285                         uprv_free(pExtension);
2286                         pExtension = NULL;
2287                         break;
2288                     }
2289                 }
2290 
2291                 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT);
2292 
2293                 /* create a new extension */
2294                 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
2295                 if (pExtension == NULL) {
2296                     *status = U_MEMORY_ALLOCATION_ERROR;
2297                     goto error;
2298                 }
2299                 *pSep = 0;
2300                 pExtension->key = T_CString_toLowerCase(pSubtag);
2301                 pExtension->value = NULL;   /* will be set later */
2302 
2303                 /*
2304                  * reset the start and the end location of extension value
2305                  * subtags for this extension
2306                  */
2307                 pExtValueSubtag = NULL;
2308                 pExtValueSubtagEnd = NULL;
2309 
2310                 next = EXTV;
2311                 continue;
2312             }
2313         }
2314         if (next & EXTV) {
2315             if (_isExtensionSubtag(pSubtag, subtagLen)) {
2316                 if (pExtValueSubtag == NULL) {
2317                     /* if the start postion of this extension's value is not yet,
2318                         this one is the first value subtag */
2319                     pExtValueSubtag = pSubtag;
2320                 }
2321 
2322                 /* Mark the end of this subtag */
2323                 pExtValueSubtagEnd = pSep;
2324                 next = EXTS | EXTV | PRIV;
2325 
2326                 continue;
2327             }
2328         }
2329         if (next & PRIV) {
2330             if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2331                 char *pPrivuseVal;
2332 
2333                 if (pExtension != NULL) {
2334                     /* Process the last extension */
2335                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2336                         /* the previous extension is incomplete */
2337                         uprv_free(pExtension);
2338                         pExtension = NULL;
2339                         break;
2340                     } else {
2341                         /* terminate the previous extension value */
2342                         *pExtValueSubtagEnd = 0;
2343                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2344 
2345                         /* insert the extension to the list */
2346                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2347                             pLastGoodPosition = pExtValueSubtagEnd;
2348                             pExtension = NULL;
2349                         } else {
2350                         /* stop parsing here */
2351                             uprv_free(pExtension);
2352                             pExtension = NULL;
2353                             break;
2354                         }
2355                     }
2356                 }
2357 
2358                 /* The rest of part will be private use value subtags */
2359                 if (pNext == NULL) {
2360                     /* empty private use subtag */
2361                     break;
2362                 }
2363                 /* back up the private use value start position */
2364                 pPrivuseVal = pNext;
2365 
2366                 /* validate private use value subtags */
2367                 while (pNext) {
2368                     pSubtag = pNext;
2369                     pSep = pSubtag;
2370                     while (*pSep) {
2371                         if (*pSep == SEP) {
2372                             break;
2373                         }
2374                         pSep++;
2375                     }
2376                     if (*pSep == 0) {
2377                         /* last subtag */
2378                         pNext = NULL;
2379                     } else {
2380                         pNext = pSep + 1;
2381                     }
2382                     subtagLen = (int32_t)(pSep - pSubtag);
2383 
2384                     if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2385                         *pSep = 0;
2386                         next = VART;
2387                         privateuseVar = TRUE;
2388                         break;
2389                     } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2390                         pLastGoodPosition = pSep;
2391                     } else {
2392                         break;
2393                     }
2394                 }
2395 
2396                 if (next == VART) {
2397                     continue;
2398                 }
2399 
2400                 if (pLastGoodPosition - pPrivuseVal > 0) {
2401                     *pLastGoodPosition = 0;
2402                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2403                 }
2404                 /* No more subtags, exiting the parse loop */
2405                 break;
2406             }
2407             break;
2408         }
2409 
2410         /* If we fell through here, it means this subtag is illegal - quit parsing */
2411         break;
2412     }
2413 
2414     if (pExtension != NULL) {
2415         /* Process the last extension */
2416         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2417             /* the previous extension is incomplete */
2418             uprv_free(pExtension);
2419         } else {
2420             /* terminate the previous extension value */
2421             *pExtValueSubtagEnd = 0;
2422             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2423             /* insert the extension to the list */
2424             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2425                 pLastGoodPosition = pExtValueSubtagEnd;
2426             } else {
2427                 uprv_free(pExtension);
2428             }
2429         }
2430     }
2431 
2432     if (parsedLen != NULL) {
2433         *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2434     }
2435 
2436     return t;
2437 
2438 error:
2439     uprv_free(t);
2440     return NULL;
2441 }
2442 
2443 static void
ultag_close(ULanguageTag * langtag)2444 ultag_close(ULanguageTag* langtag) {
2445 
2446     if (langtag == NULL) {
2447         return;
2448     }
2449 
2450     uprv_free(langtag->buf);
2451 
2452     if (langtag->variants) {
2453         VariantListEntry *curVar = langtag->variants;
2454         while (curVar) {
2455             VariantListEntry *nextVar = curVar->next;
2456             uprv_free(curVar);
2457             curVar = nextVar;
2458         }
2459     }
2460 
2461     if (langtag->extensions) {
2462         ExtensionListEntry *curExt = langtag->extensions;
2463         while (curExt) {
2464             ExtensionListEntry *nextExt = curExt->next;
2465             uprv_free(curExt);
2466             curExt = nextExt;
2467         }
2468     }
2469 
2470     uprv_free(langtag);
2471 }
2472 
2473 static const char*
ultag_getLanguage(const ULanguageTag * langtag)2474 ultag_getLanguage(const ULanguageTag* langtag) {
2475     return langtag->language;
2476 }
2477 
2478 #if 0
2479 static const char*
2480 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2481     int32_t i;
2482     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2483         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2484             return DEPRECATEDLANGS[i + 1];
2485         }
2486     }
2487     return langtag->language;
2488 }
2489 #endif
2490 
2491 static const char*
ultag_getExtlang(const ULanguageTag * langtag,int32_t idx)2492 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2493     if (idx >= 0 && idx < MAXEXTLANG) {
2494         return langtag->extlang[idx];
2495     }
2496     return NULL;
2497 }
2498 
2499 static int32_t
ultag_getExtlangSize(const ULanguageTag * langtag)2500 ultag_getExtlangSize(const ULanguageTag* langtag) {
2501     int32_t size = 0;
2502     int32_t i;
2503     for (i = 0; i < MAXEXTLANG; i++) {
2504         if (langtag->extlang[i]) {
2505             size++;
2506         }
2507     }
2508     return size;
2509 }
2510 
2511 static const char*
ultag_getScript(const ULanguageTag * langtag)2512 ultag_getScript(const ULanguageTag* langtag) {
2513     return langtag->script;
2514 }
2515 
2516 static const char*
ultag_getRegion(const ULanguageTag * langtag)2517 ultag_getRegion(const ULanguageTag* langtag) {
2518     return langtag->region;
2519 }
2520 
2521 static const char*
ultag_getVariant(const ULanguageTag * langtag,int32_t idx)2522 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2523     const char *var = NULL;
2524     VariantListEntry *cur = langtag->variants;
2525     int32_t i = 0;
2526     while (cur) {
2527         if (i == idx) {
2528             var = cur->variant;
2529             break;
2530         }
2531         cur = cur->next;
2532         i++;
2533     }
2534     return var;
2535 }
2536 
2537 static int32_t
ultag_getVariantsSize(const ULanguageTag * langtag)2538 ultag_getVariantsSize(const ULanguageTag* langtag) {
2539     int32_t size = 0;
2540     VariantListEntry *cur = langtag->variants;
2541     while (TRUE) {
2542         if (cur == NULL) {
2543             break;
2544         }
2545         size++;
2546         cur = cur->next;
2547     }
2548     return size;
2549 }
2550 
2551 static const char*
ultag_getExtensionKey(const ULanguageTag * langtag,int32_t idx)2552 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2553     const char *key = NULL;
2554     ExtensionListEntry *cur = langtag->extensions;
2555     int32_t i = 0;
2556     while (cur) {
2557         if (i == idx) {
2558             key = cur->key;
2559             break;
2560         }
2561         cur = cur->next;
2562         i++;
2563     }
2564     return key;
2565 }
2566 
2567 static const char*
ultag_getExtensionValue(const ULanguageTag * langtag,int32_t idx)2568 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2569     const char *val = NULL;
2570     ExtensionListEntry *cur = langtag->extensions;
2571     int32_t i = 0;
2572     while (cur) {
2573         if (i == idx) {
2574             val = cur->value;
2575             break;
2576         }
2577         cur = cur->next;
2578         i++;
2579     }
2580     return val;
2581 }
2582 
2583 static int32_t
ultag_getExtensionsSize(const ULanguageTag * langtag)2584 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2585     int32_t size = 0;
2586     ExtensionListEntry *cur = langtag->extensions;
2587     while (TRUE) {
2588         if (cur == NULL) {
2589             break;
2590         }
2591         size++;
2592         cur = cur->next;
2593     }
2594     return size;
2595 }
2596 
2597 static const char*
ultag_getPrivateUse(const ULanguageTag * langtag)2598 ultag_getPrivateUse(const ULanguageTag* langtag) {
2599     return langtag->privateuse;
2600 }
2601 
2602 #if 0
2603 static const char*
2604 ultag_getGrandfathered(const ULanguageTag* langtag) {
2605     return langtag->grandfathered;
2606 }
2607 #endif
2608 
2609 
2610 /*
2611 * -------------------------------------------------
2612 *
2613 * Locale/BCP47 conversion APIs, exposed as uloc_*
2614 *
2615 * -------------------------------------------------
2616 */
2617 U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char * localeID,char * langtag,int32_t langtagCapacity,UBool strict,UErrorCode * status)2618 uloc_toLanguageTag(const char* localeID,
2619                    char* langtag,
2620                    int32_t langtagCapacity,
2621                    UBool strict,
2622                    UErrorCode* status) {
2623     /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2624     char canonical[256];
2625     int32_t reslen = 0;
2626     UErrorCode tmpStatus = U_ZERO_ERROR;
2627     UBool hadPosix = FALSE;
2628     const char* pKeywordStart;
2629 
2630     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
2631     canonical[0] = 0;
2632     if (uprv_strlen(localeID) > 0) {
2633         uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2634         if (tmpStatus != U_ZERO_ERROR) {
2635             *status = U_ILLEGAL_ARGUMENT_ERROR;
2636             return 0;
2637         }
2638     }
2639 
2640     /* For handling special case - private use only tag */
2641     pKeywordStart = locale_getKeywordsStart(canonical);
2642     if (pKeywordStart == canonical) {
2643         UEnumeration *kwdEnum;
2644         int kwdCnt = 0;
2645         UBool done = FALSE;
2646 
2647         kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2648         if (kwdEnum != NULL) {
2649             kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2650             if (kwdCnt == 1) {
2651                 const char *key;
2652                 int32_t len = 0;
2653 
2654                 key = uenum_next(kwdEnum, &len, &tmpStatus);
2655                 if (len == 1 && *key == PRIVATEUSE) {
2656                     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2657                     buf[0] = PRIVATEUSE;
2658                     buf[1] = SEP;
2659                     len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2660                     if (U_SUCCESS(tmpStatus)) {
2661                         if (_isPrivateuseValueSubtags(&buf[2], len)) {
2662                             /* return private use only tag */
2663                             reslen = len + 2;
2664                             uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2665                             u_terminateChars(langtag, langtagCapacity, reslen, status);
2666                             done = TRUE;
2667                         } else if (strict) {
2668                             *status = U_ILLEGAL_ARGUMENT_ERROR;
2669                             done = TRUE;
2670                         }
2671                         /* if not strict mode, then "und" will be returned */
2672                     } else {
2673                         *status = U_ILLEGAL_ARGUMENT_ERROR;
2674                         done = TRUE;
2675                     }
2676                 }
2677             }
2678             uenum_close(kwdEnum);
2679             if (done) {
2680                 return reslen;
2681             }
2682         }
2683     }
2684 
2685     reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2686     reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2687     reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2688     reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2689     reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2690     reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2691 
2692     return reslen;
2693 }
2694 
2695 
2696 U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char * langtag,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2697 uloc_forLanguageTag(const char* langtag,
2698                     char* localeID,
2699                     int32_t localeIDCapacity,
2700                     int32_t* parsedLength,
2701                     UErrorCode* status) {
2702     ULanguageTag *lt;
2703     int32_t reslen = 0;
2704     const char *subtag, *p;
2705     int32_t len;
2706     int32_t i, n;
2707     UBool noRegion = TRUE;
2708 
2709     lt = ultag_parse(langtag, -1, parsedLength, status);
2710     if (U_FAILURE(*status)) {
2711         return 0;
2712     }
2713 
2714     /* language */
2715     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2716     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2717         len = (int32_t)uprv_strlen(subtag);
2718         if (len > 0) {
2719             if (reslen < localeIDCapacity) {
2720                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2721             }
2722             reslen += len;
2723         }
2724     }
2725 
2726     /* script */
2727     subtag = ultag_getScript(lt);
2728     len = (int32_t)uprv_strlen(subtag);
2729     if (len > 0) {
2730         if (reslen < localeIDCapacity) {
2731             *(localeID + reslen) = LOCALE_SEP;
2732         }
2733         reslen++;
2734 
2735         /* write out the script in title case */
2736         p = subtag;
2737         while (*p) {
2738             if (reslen < localeIDCapacity) {
2739                 if (p == subtag) {
2740                     *(localeID + reslen) = uprv_toupper(*p);
2741                 } else {
2742                     *(localeID + reslen) = *p;
2743                 }
2744             }
2745             reslen++;
2746             p++;
2747         }
2748     }
2749 
2750     /* region */
2751     subtag = ultag_getRegion(lt);
2752     len = (int32_t)uprv_strlen(subtag);
2753     if (len > 0) {
2754         if (reslen < localeIDCapacity) {
2755             *(localeID + reslen) = LOCALE_SEP;
2756         }
2757         reslen++;
2758         /* write out the retion in upper case */
2759         p = subtag;
2760         while (*p) {
2761             if (reslen < localeIDCapacity) {
2762                 *(localeID + reslen) = uprv_toupper(*p);
2763             }
2764             reslen++;
2765             p++;
2766         }
2767         noRegion = FALSE;
2768     }
2769 
2770     /* variants */
2771     n = ultag_getVariantsSize(lt);
2772     if (n > 0) {
2773         if (noRegion) {
2774             if (reslen < localeIDCapacity) {
2775                 *(localeID + reslen) = LOCALE_SEP;
2776             }
2777             reslen++;
2778         }
2779 
2780         for (i = 0; i < n; i++) {
2781             subtag = ultag_getVariant(lt, i);
2782             if (reslen < localeIDCapacity) {
2783                 *(localeID + reslen) = LOCALE_SEP;
2784             }
2785             reslen++;
2786             /* write out the variant in upper case */
2787             p = subtag;
2788             while (*p) {
2789                 if (reslen < localeIDCapacity) {
2790                     *(localeID + reslen) = uprv_toupper(*p);
2791                 }
2792                 reslen++;
2793                 p++;
2794             }
2795         }
2796     }
2797 
2798     /* keywords */
2799     n = ultag_getExtensionsSize(lt);
2800     subtag = ultag_getPrivateUse(lt);
2801     if (n > 0 || uprv_strlen(subtag) > 0) {
2802         if (reslen == 0 && n > 0) {
2803             /* need a language */
2804             if (reslen < localeIDCapacity) {
2805                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2806             }
2807             reslen += LANG_UND_LEN;
2808         }
2809         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2810         reslen += len;
2811     }
2812 
2813     ultag_close(lt);
2814     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2815 }
2816 
2817 
2818