1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2009-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9
10 #include "unicode/utypes.h"
11 #include "unicode/ures.h"
12 #include "unicode/putil.h"
13 #include "unicode/uloc.h"
14 #include "ustr_imp.h"
15 #include "cmemory.h"
16 #include "cstring.h"
17 #include "putilimp.h"
18 #include "uinvchar.h"
19 #include "ulocimp.h"
20 #include "uassert.h"
21
22
23 /* struct holding a single variant */
24 typedef struct VariantListEntry {
25 const char *variant;
26 struct VariantListEntry *next;
27 } VariantListEntry;
28
29 /* struct holding a single attribute value */
30 typedef struct AttributeListEntry {
31 const char *attribute;
32 struct AttributeListEntry *next;
33 } AttributeListEntry;
34
35 /* struct holding a single extension */
36 typedef struct ExtensionListEntry {
37 const char *key;
38 const char *value;
39 struct ExtensionListEntry *next;
40 } ExtensionListEntry;
41
42 #define MAXEXTLANG 3
43 typedef struct ULanguageTag {
44 char *buf; /* holding parsed subtags */
45 const char *language;
46 const char *extlang[MAXEXTLANG];
47 const char *script;
48 const char *region;
49 VariantListEntry *variants;
50 ExtensionListEntry *extensions;
51 const char *privateuse;
52 const char *grandfathered;
53 } ULanguageTag;
54
55 #define MINLEN 2
56 #define SEP '-'
57 #define PRIVATEUSE 'x'
58 #define LDMLEXT 'u'
59
60 #define LOCALE_SEP '_'
61 #define LOCALE_EXT_SEP '@'
62 #define LOCALE_KEYWORD_SEP ';'
63 #define LOCALE_KEY_TYPE_SEP '='
64
65 #define ISALPHA(c) uprv_isASCIILetter(c)
66 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
67
68 static const char EMPTY[] = "";
69 static const char LANG_UND[] = "und";
70 static const char PRIVATEUSE_KEY[] = "x";
71 static const char _POSIX[] = "_POSIX";
72 static const char POSIX_KEY[] = "va";
73 static const char POSIX_VALUE[] = "posix";
74 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
75 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
76 static const char LOCALE_TYPE_YES[] = "yes";
77
78 #define LANG_UND_LEN 3
79
80 static const char* const GRANDFATHERED[] = {
81 /* grandfathered preferred */
82 "art-lojban", "jbo",
83 "cel-gaulish", "xtg-x-cel-gaulish",
84 "en-GB-oed", "en-GB-x-oed",
85 "i-ami", "ami",
86 "i-bnn", "bnn",
87 "i-default", "en-x-i-default",
88 "i-enochian", "und-x-i-enochian",
89 "i-hak", "hak",
90 "i-klingon", "tlh",
91 "i-lux", "lb",
92 "i-mingo", "see-x-i-mingo",
93 "i-navajo", "nv",
94 "i-pwn", "pwn",
95 "i-tao", "tao",
96 "i-tay", "tay",
97 "i-tsu", "tsu",
98 "no-bok", "nb",
99 "no-nyn", "nn",
100 "sgn-be-fr", "sfb",
101 "sgn-be-nl", "vgt",
102 "sgn-ch-de", "sgg",
103 "zh-guoyu", "cmn",
104 "zh-hakka", "hak",
105 "zh-min", "nan-x-zh-min",
106 "zh-min-nan", "nan",
107 "zh-xiang", "hsn",
108 NULL, NULL
109 };
110
111 static const char DEPRECATEDLANGS[][4] = {
112 /* deprecated new */
113 "iw", "he",
114 "ji", "yi",
115 "in", "id"
116 };
117
118 /*
119 * -------------------------------------------------
120 *
121 * These ultag_ functions may be exposed as APIs later
122 *
123 * -------------------------------------------------
124 */
125
126 static ULanguageTag*
127 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
128
129 static void
130 ultag_close(ULanguageTag* langtag);
131
132 static const char*
133 ultag_getLanguage(const ULanguageTag* langtag);
134
135 #if 0
136 static const char*
137 ultag_getJDKLanguage(const ULanguageTag* langtag);
138 #endif
139
140 static const char*
141 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
142
143 static int32_t
144 ultag_getExtlangSize(const ULanguageTag* langtag);
145
146 static const char*
147 ultag_getScript(const ULanguageTag* langtag);
148
149 static const char*
150 ultag_getRegion(const ULanguageTag* langtag);
151
152 static const char*
153 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
154
155 static int32_t
156 ultag_getVariantsSize(const ULanguageTag* langtag);
157
158 static const char*
159 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
160
161 static const char*
162 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
163
164 static int32_t
165 ultag_getExtensionsSize(const ULanguageTag* langtag);
166
167 static const char*
168 ultag_getPrivateUse(const ULanguageTag* langtag);
169
170 #if 0
171 static const char*
172 ultag_getGrandfathered(const ULanguageTag* langtag);
173 #endif
174
175 /*
176 * -------------------------------------------------
177 *
178 * Language subtag syntax validation functions
179 *
180 * -------------------------------------------------
181 */
182
183 static UBool
_isAlphaString(const char * s,int32_t len)184 _isAlphaString(const char* s, int32_t len) {
185 int32_t i;
186 for (i = 0; i < len; i++) {
187 if (!ISALPHA(*(s + i))) {
188 return FALSE;
189 }
190 }
191 return TRUE;
192 }
193
194 static UBool
_isNumericString(const char * s,int32_t len)195 _isNumericString(const char* s, int32_t len) {
196 int32_t i;
197 for (i = 0; i < len; i++) {
198 if (!ISNUMERIC(*(s + i))) {
199 return FALSE;
200 }
201 }
202 return TRUE;
203 }
204
205 static UBool
_isAlphaNumericString(const char * s,int32_t len)206 _isAlphaNumericString(const char* s, int32_t len) {
207 int32_t i;
208 for (i = 0; i < len; i++) {
209 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
210 return FALSE;
211 }
212 }
213 return TRUE;
214 }
215
216 static UBool
_isLanguageSubtag(const char * s,int32_t len)217 _isLanguageSubtag(const char* s, int32_t len) {
218 /*
219 * language = 2*3ALPHA ; shortest ISO 639 code
220 * ["-" extlang] ; sometimes followed by
221 * ; extended language subtags
222 * / 4ALPHA ; or reserved for future use
223 * / 5*8ALPHA ; or registered language subtag
224 */
225 if (len < 0) {
226 len = (int32_t)uprv_strlen(s);
227 }
228 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
229 return TRUE;
230 }
231 return FALSE;
232 }
233
234 static UBool
_isExtlangSubtag(const char * s,int32_t len)235 _isExtlangSubtag(const char* s, int32_t len) {
236 /*
237 * extlang = 3ALPHA ; selected ISO 639 codes
238 * *2("-" 3ALPHA) ; permanently reserved
239 */
240 if (len < 0) {
241 len = (int32_t)uprv_strlen(s);
242 }
243 if (len == 3 && _isAlphaString(s, len)) {
244 return TRUE;
245 }
246 return FALSE;
247 }
248
249 static UBool
_isScriptSubtag(const char * s,int32_t len)250 _isScriptSubtag(const char* s, int32_t len) {
251 /*
252 * script = 4ALPHA ; ISO 15924 code
253 */
254 if (len < 0) {
255 len = (int32_t)uprv_strlen(s);
256 }
257 if (len == 4 && _isAlphaString(s, len)) {
258 return TRUE;
259 }
260 return FALSE;
261 }
262
263 static UBool
_isRegionSubtag(const char * s,int32_t len)264 _isRegionSubtag(const char* s, int32_t len) {
265 /*
266 * region = 2ALPHA ; ISO 3166-1 code
267 * / 3DIGIT ; UN M.49 code
268 */
269 if (len < 0) {
270 len = (int32_t)uprv_strlen(s);
271 }
272 if (len == 2 && _isAlphaString(s, len)) {
273 return TRUE;
274 }
275 if (len == 3 && _isNumericString(s, len)) {
276 return TRUE;
277 }
278 return FALSE;
279 }
280
281 static UBool
_isVariantSubtag(const char * s,int32_t len)282 _isVariantSubtag(const char* s, int32_t len) {
283 /*
284 * variant = 5*8alphanum ; registered variants
285 * / (DIGIT 3alphanum)
286 */
287 if (len < 0) {
288 len = (int32_t)uprv_strlen(s);
289 }
290 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
291 return TRUE;
292 }
293 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
294 return TRUE;
295 }
296 return FALSE;
297 }
298
299 static UBool
_isPrivateuseVariantSubtag(const char * s,int32_t len)300 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
301 /*
302 * variant = 1*8alphanum ; registered variants
303 * / (DIGIT 3alphanum)
304 */
305 if (len < 0) {
306 len = (int32_t)uprv_strlen(s);
307 }
308 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
309 return TRUE;
310 }
311 return FALSE;
312 }
313
314 static UBool
_isExtensionSingleton(const char * s,int32_t len)315 _isExtensionSingleton(const char* s, int32_t len) {
316 /*
317 * extension = singleton 1*("-" (2*8alphanum))
318 */
319 if (len < 0) {
320 len = (int32_t)uprv_strlen(s);
321 }
322 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
323 return TRUE;
324 }
325 return FALSE;
326 }
327
328 static UBool
_isExtensionSubtag(const char * s,int32_t len)329 _isExtensionSubtag(const char* s, int32_t len) {
330 /*
331 * extension = singleton 1*("-" (2*8alphanum))
332 */
333 if (len < 0) {
334 len = (int32_t)uprv_strlen(s);
335 }
336 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
337 return TRUE;
338 }
339 return FALSE;
340 }
341
342 static UBool
_isExtensionSubtags(const char * s,int32_t len)343 _isExtensionSubtags(const char* s, int32_t len) {
344 const char *p = s;
345 const char *pSubtag = NULL;
346
347 if (len < 0) {
348 len = (int32_t)uprv_strlen(s);
349 }
350
351 while ((p - s) < len) {
352 if (*p == SEP) {
353 if (pSubtag == NULL) {
354 return FALSE;
355 }
356 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
357 return FALSE;
358 }
359 pSubtag = NULL;
360 } else if (pSubtag == NULL) {
361 pSubtag = p;
362 }
363 p++;
364 }
365 if (pSubtag == NULL) {
366 return FALSE;
367 }
368 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
369 }
370
371 static UBool
_isPrivateuseValueSubtag(const char * s,int32_t len)372 _isPrivateuseValueSubtag(const char* s, int32_t len) {
373 /*
374 * privateuse = "x" 1*("-" (1*8alphanum))
375 */
376 if (len < 0) {
377 len = (int32_t)uprv_strlen(s);
378 }
379 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
380 return TRUE;
381 }
382 return FALSE;
383 }
384
385 static UBool
_isPrivateuseValueSubtags(const char * s,int32_t len)386 _isPrivateuseValueSubtags(const char* s, int32_t len) {
387 const char *p = s;
388 const char *pSubtag = NULL;
389
390 if (len < 0) {
391 len = (int32_t)uprv_strlen(s);
392 }
393
394 while ((p - s) < len) {
395 if (*p == SEP) {
396 if (pSubtag == NULL) {
397 return FALSE;
398 }
399 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
400 return FALSE;
401 }
402 pSubtag = NULL;
403 } else if (pSubtag == NULL) {
404 pSubtag = p;
405 }
406 p++;
407 }
408 if (pSubtag == NULL) {
409 return FALSE;
410 }
411 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
412 }
413
414 U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char * s,int32_t len)415 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
416 if (len < 0) {
417 len = (int32_t)uprv_strlen(s);
418 }
419 if (len == 2 && _isAlphaNumericString(s, len)) {
420 return TRUE;
421 }
422 return FALSE;
423 }
424
425 U_CFUNC UBool
ultag_isUnicodeLocaleType(const char * s,int32_t len)426 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
427 const char* p;
428 int32_t subtagLen = 0;
429
430 if (len < 0) {
431 len = (int32_t)uprv_strlen(s);
432 }
433
434 for (p = s; len > 0; p++, len--) {
435 if (*p == SEP) {
436 if (subtagLen < 3) {
437 return FALSE;
438 }
439 subtagLen = 0;
440 } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
441 subtagLen++;
442 if (subtagLen > 8) {
443 return FALSE;
444 }
445 } else {
446 return FALSE;
447 }
448 }
449
450 return (subtagLen >= 3);
451 }
452 /*
453 * -------------------------------------------------
454 *
455 * Helper functions
456 *
457 * -------------------------------------------------
458 */
459
460 static UBool
_addVariantToList(VariantListEntry ** first,VariantListEntry * var)461 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
462 UBool bAdded = TRUE;
463
464 if (*first == NULL) {
465 var->next = NULL;
466 *first = var;
467 } else {
468 VariantListEntry *prev, *cur;
469 int32_t cmp;
470
471 /* variants order should be preserved */
472 prev = NULL;
473 cur = *first;
474 while (TRUE) {
475 if (cur == NULL) {
476 prev->next = var;
477 var->next = NULL;
478 break;
479 }
480
481 /* Checking for duplicate variant */
482 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
483 if (cmp == 0) {
484 /* duplicated variant */
485 bAdded = FALSE;
486 break;
487 }
488 prev = cur;
489 cur = cur->next;
490 }
491 }
492
493 return bAdded;
494 }
495
496 static UBool
_addAttributeToList(AttributeListEntry ** first,AttributeListEntry * attr)497 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
498 UBool bAdded = TRUE;
499
500 if (*first == NULL) {
501 attr->next = NULL;
502 *first = attr;
503 } else {
504 AttributeListEntry *prev, *cur;
505 int32_t cmp;
506
507 /* reorder variants in alphabetical order */
508 prev = NULL;
509 cur = *first;
510 while (TRUE) {
511 if (cur == NULL) {
512 prev->next = attr;
513 attr->next = NULL;
514 break;
515 }
516 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
517 if (cmp < 0) {
518 if (prev == NULL) {
519 *first = attr;
520 } else {
521 prev->next = attr;
522 }
523 attr->next = cur;
524 break;
525 }
526 if (cmp == 0) {
527 /* duplicated variant */
528 bAdded = FALSE;
529 break;
530 }
531 prev = cur;
532 cur = cur->next;
533 }
534 }
535
536 return bAdded;
537 }
538
539
540 static UBool
_addExtensionToList(ExtensionListEntry ** first,ExtensionListEntry * ext,UBool localeToBCP)541 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
542 UBool bAdded = TRUE;
543
544 if (*first == NULL) {
545 ext->next = NULL;
546 *first = ext;
547 } else {
548 ExtensionListEntry *prev, *cur;
549 int32_t cmp;
550
551 /* reorder variants in alphabetical order */
552 prev = NULL;
553 cur = *first;
554 while (TRUE) {
555 if (cur == NULL) {
556 prev->next = ext;
557 ext->next = NULL;
558 break;
559 }
560 if (localeToBCP) {
561 /* special handling for locale to bcp conversion */
562 int32_t len, curlen;
563
564 len = (int32_t)uprv_strlen(ext->key);
565 curlen = (int32_t)uprv_strlen(cur->key);
566
567 if (len == 1 && curlen == 1) {
568 if (*(ext->key) == *(cur->key)) {
569 cmp = 0;
570 } else if (*(ext->key) == PRIVATEUSE) {
571 cmp = 1;
572 } else if (*(cur->key) == PRIVATEUSE) {
573 cmp = -1;
574 } else {
575 cmp = *(ext->key) - *(cur->key);
576 }
577 } else if (len == 1) {
578 cmp = *(ext->key) - LDMLEXT;
579 } else if (curlen == 1) {
580 cmp = LDMLEXT - *(cur->key);
581 } else {
582 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
583 /* Both are u extension keys - we need special handling for 'attribute' */
584 if (cmp != 0) {
585 if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
586 cmp = 1;
587 } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
588 cmp = -1;
589 }
590 }
591 }
592 } else {
593 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
594 }
595 if (cmp < 0) {
596 if (prev == NULL) {
597 *first = ext;
598 } else {
599 prev->next = ext;
600 }
601 ext->next = cur;
602 break;
603 }
604 if (cmp == 0) {
605 /* duplicated extension key */
606 bAdded = FALSE;
607 break;
608 }
609 prev = cur;
610 cur = cur->next;
611 }
612 }
613
614 return bAdded;
615 }
616
617 static void
_initializeULanguageTag(ULanguageTag * langtag)618 _initializeULanguageTag(ULanguageTag* langtag) {
619 int32_t i;
620
621 langtag->buf = NULL;
622
623 langtag->language = EMPTY;
624 for (i = 0; i < MAXEXTLANG; i++) {
625 langtag->extlang[i] = NULL;
626 }
627
628 langtag->script = EMPTY;
629 langtag->region = EMPTY;
630
631 langtag->variants = NULL;
632 langtag->extensions = NULL;
633
634 langtag->grandfathered = EMPTY;
635 langtag->privateuse = EMPTY;
636 }
637
638 static int32_t
_appendLanguageToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)639 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
640 char buf[ULOC_LANG_CAPACITY];
641 UErrorCode tmpStatus = U_ZERO_ERROR;
642 int32_t len, i;
643 int32_t reslen = 0;
644
645 if (U_FAILURE(*status)) {
646 return 0;
647 }
648
649 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
650 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
651 if (strict) {
652 *status = U_ILLEGAL_ARGUMENT_ERROR;
653 return 0;
654 }
655 len = 0;
656 }
657
658 /* Note: returned language code is in lower case letters */
659
660 if (len == 0) {
661 if (reslen < capacity) {
662 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
663 }
664 reslen += LANG_UND_LEN;
665 } else if (!_isLanguageSubtag(buf, len)) {
666 /* invalid language code */
667 if (strict) {
668 *status = U_ILLEGAL_ARGUMENT_ERROR;
669 return 0;
670 }
671 if (reslen < capacity) {
672 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
673 }
674 reslen += LANG_UND_LEN;
675 } else {
676 /* resolve deprecated */
677 for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
678 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
679 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
680 len = (int32_t)uprv_strlen(buf);
681 break;
682 }
683 }
684 if (reslen < capacity) {
685 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
686 }
687 reslen += len;
688 }
689 u_terminateChars(appendAt, capacity, reslen, status);
690 return reslen;
691 }
692
693 static int32_t
_appendScriptToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)694 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
695 char buf[ULOC_SCRIPT_CAPACITY];
696 UErrorCode tmpStatus = U_ZERO_ERROR;
697 int32_t len;
698 int32_t reslen = 0;
699
700 if (U_FAILURE(*status)) {
701 return 0;
702 }
703
704 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
705 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
706 if (strict) {
707 *status = U_ILLEGAL_ARGUMENT_ERROR;
708 }
709 return 0;
710 }
711
712 if (len > 0) {
713 if (!_isScriptSubtag(buf, len)) {
714 /* invalid script code */
715 if (strict) {
716 *status = U_ILLEGAL_ARGUMENT_ERROR;
717 }
718 return 0;
719 } else {
720 if (reslen < capacity) {
721 *(appendAt + reslen) = SEP;
722 }
723 reslen++;
724
725 if (reslen < capacity) {
726 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
727 }
728 reslen += len;
729 }
730 }
731 u_terminateChars(appendAt, capacity, reslen, status);
732 return reslen;
733 }
734
735 static int32_t
_appendRegionToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)736 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
737 char buf[ULOC_COUNTRY_CAPACITY];
738 UErrorCode tmpStatus = U_ZERO_ERROR;
739 int32_t len;
740 int32_t reslen = 0;
741
742 if (U_FAILURE(*status)) {
743 return 0;
744 }
745
746 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
747 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
748 if (strict) {
749 *status = U_ILLEGAL_ARGUMENT_ERROR;
750 }
751 return 0;
752 }
753
754 if (len > 0) {
755 if (!_isRegionSubtag(buf, len)) {
756 /* invalid region code */
757 if (strict) {
758 *status = U_ILLEGAL_ARGUMENT_ERROR;
759 }
760 return 0;
761 } else {
762 if (reslen < capacity) {
763 *(appendAt + reslen) = SEP;
764 }
765 reslen++;
766
767 if (reslen < capacity) {
768 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
769 }
770 reslen += len;
771 }
772 }
773 u_terminateChars(appendAt, capacity, reslen, status);
774 return reslen;
775 }
776
777 static int32_t
_appendVariantsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool * hadPosix,UErrorCode * status)778 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
779 char buf[ULOC_FULLNAME_CAPACITY];
780 UErrorCode tmpStatus = U_ZERO_ERROR;
781 int32_t len, i;
782 int32_t reslen = 0;
783
784 if (U_FAILURE(*status)) {
785 return 0;
786 }
787
788 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
789 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
790 if (strict) {
791 *status = U_ILLEGAL_ARGUMENT_ERROR;
792 }
793 return 0;
794 }
795
796 if (len > 0) {
797 char *p, *pVar;
798 UBool bNext = TRUE;
799 VariantListEntry *var;
800 VariantListEntry *varFirst = NULL;
801
802 pVar = NULL;
803 p = buf;
804 while (bNext) {
805 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
806 if (*p == 0) {
807 bNext = FALSE;
808 } else {
809 *p = 0; /* terminate */
810 }
811 if (pVar == NULL) {
812 if (strict) {
813 *status = U_ILLEGAL_ARGUMENT_ERROR;
814 break;
815 }
816 /* ignore empty variant */
817 } else {
818 /* ICU uses upper case letters for variants, but
819 the canonical format is lowercase in BCP47 */
820 for (i = 0; *(pVar + i) != 0; i++) {
821 *(pVar + i) = uprv_tolower(*(pVar + i));
822 }
823
824 /* validate */
825 if (_isVariantSubtag(pVar, -1)) {
826 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
827 /* emit the variant to the list */
828 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
829 if (var == NULL) {
830 *status = U_MEMORY_ALLOCATION_ERROR;
831 break;
832 }
833 var->variant = pVar;
834 if (!_addVariantToList(&varFirst, var)) {
835 /* duplicated variant */
836 uprv_free(var);
837 if (strict) {
838 *status = U_ILLEGAL_ARGUMENT_ERROR;
839 break;
840 }
841 }
842 } else {
843 /* Special handling for POSIX variant, need to remember that we had it and then */
844 /* treat it like an extension later. */
845 *hadPosix = TRUE;
846 }
847 } else if (strict) {
848 *status = U_ILLEGAL_ARGUMENT_ERROR;
849 break;
850 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
851 /* Handle private use subtags separately */
852 break;
853 }
854 }
855 /* reset variant starting position */
856 pVar = NULL;
857 } else if (pVar == NULL) {
858 pVar = p;
859 }
860 p++;
861 }
862
863 if (U_SUCCESS(*status)) {
864 if (varFirst != NULL) {
865 int32_t varLen;
866
867 /* write out validated/normalized variants to the target */
868 var = varFirst;
869 while (var != NULL) {
870 if (reslen < capacity) {
871 *(appendAt + reslen) = SEP;
872 }
873 reslen++;
874 varLen = (int32_t)uprv_strlen(var->variant);
875 if (reslen < capacity) {
876 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
877 }
878 reslen += varLen;
879 var = var->next;
880 }
881 }
882 }
883
884 /* clean up */
885 var = varFirst;
886 while (var != NULL) {
887 VariantListEntry *tmpVar = var->next;
888 uprv_free(var);
889 var = tmpVar;
890 }
891
892 if (U_FAILURE(*status)) {
893 return 0;
894 }
895 }
896
897 u_terminateChars(appendAt, capacity, reslen, status);
898 return reslen;
899 }
900
901 static int32_t
_appendKeywordsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)902 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
903 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
904 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
905 int32_t attrBufLength = 0;
906 UEnumeration *keywordEnum = NULL;
907 int32_t reslen = 0;
908
909 keywordEnum = uloc_openKeywords(localeID, status);
910 if (U_FAILURE(*status) && !hadPosix) {
911 uenum_close(keywordEnum);
912 return 0;
913 }
914 if (keywordEnum != NULL || hadPosix) {
915 /* reorder extensions */
916 int32_t len;
917 const char *key;
918 ExtensionListEntry *firstExt = NULL;
919 ExtensionListEntry *ext;
920 AttributeListEntry *firstAttr = NULL;
921 AttributeListEntry *attr;
922 char *attrValue;
923 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
924 char *pExtBuf = extBuf;
925 int32_t extBufCapacity = sizeof(extBuf);
926 const char *bcpKey, *bcpValue;
927 UErrorCode tmpStatus = U_ZERO_ERROR;
928 int32_t keylen;
929 UBool isBcpUExt;
930
931 while (TRUE) {
932 key = uenum_next(keywordEnum, NULL, status);
933 if (key == NULL) {
934 break;
935 }
936 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
937 /* buf must be null-terminated */
938 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
939 if (strict) {
940 *status = U_ILLEGAL_ARGUMENT_ERROR;
941 break;
942 }
943 /* ignore this keyword */
944 tmpStatus = U_ZERO_ERROR;
945 continue;
946 }
947
948 keylen = (int32_t)uprv_strlen(key);
949 isBcpUExt = (keylen > 1);
950
951 /* special keyword used for representing Unicode locale attributes */
952 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
953 if (len > 0) {
954 int32_t i = 0;
955 while (TRUE) {
956 attrBufLength = 0;
957 for (; i < len; i++) {
958 if (buf[i] != '-') {
959 attrBuf[attrBufLength++] = buf[i];
960 } else {
961 i++;
962 break;
963 }
964 }
965 if (attrBufLength > 0) {
966 attrBuf[attrBufLength] = 0;
967
968 } else if (i >= len){
969 break;
970 }
971
972 /* create AttributeListEntry */
973 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
974 if (attr == NULL) {
975 *status = U_MEMORY_ALLOCATION_ERROR;
976 break;
977 }
978 attrValue = (char*)uprv_malloc(attrBufLength + 1);
979 if (attrValue == NULL) {
980 *status = U_MEMORY_ALLOCATION_ERROR;
981 break;
982 }
983 uprv_strcpy(attrValue, attrBuf);
984 attr->attribute = attrValue;
985
986 if (!_addAttributeToList(&firstAttr, attr)) {
987 uprv_free(attr);
988 uprv_free(attrValue);
989 if (strict) {
990 *status = U_ILLEGAL_ARGUMENT_ERROR;
991 break;
992 }
993 }
994 }
995 /* for a place holder ExtensionListEntry */
996 bcpKey = LOCALE_ATTRIBUTE_KEY;
997 bcpValue = NULL;
998 }
999 } else if (isBcpUExt) {
1000 bcpKey = uloc_toUnicodeLocaleKey(key);
1001 if (bcpKey == NULL) {
1002 if (strict) {
1003 *status = U_ILLEGAL_ARGUMENT_ERROR;
1004 break;
1005 }
1006 continue;
1007 }
1008
1009 /* we've checked buf is null-terminated above */
1010 bcpValue = uloc_toUnicodeLocaleType(key, buf);
1011 if (bcpValue == NULL) {
1012 if (strict) {
1013 *status = U_ILLEGAL_ARGUMENT_ERROR;
1014 break;
1015 }
1016 continue;
1017 }
1018 if (bcpValue == buf) {
1019 /*
1020 When uloc_toUnicodeLocaleType(key, buf) returns the
1021 input value as is, the value is well-formed, but has
1022 no known mapping. This implementation normalizes the
1023 the value to lower case
1024 */
1025 int32_t bcpValueLen = uprv_strlen(bcpValue);
1026 if (bcpValueLen < extBufCapacity) {
1027 uprv_strcpy(pExtBuf, bcpValue);
1028 T_CString_toLowerCase(pExtBuf);
1029
1030 bcpValue = pExtBuf;
1031
1032 pExtBuf += (bcpValueLen + 1);
1033 extBufCapacity -= (bcpValueLen + 1);
1034 } else {
1035 if (strict) {
1036 *status = U_ILLEGAL_ARGUMENT_ERROR;
1037 break;
1038 }
1039 continue;
1040 }
1041 }
1042 } else {
1043 if (*key == PRIVATEUSE) {
1044 if (!_isPrivateuseValueSubtags(buf, len)) {
1045 if (strict) {
1046 *status = U_ILLEGAL_ARGUMENT_ERROR;
1047 break;
1048 }
1049 continue;
1050 }
1051 } else {
1052 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1053 if (strict) {
1054 *status = U_ILLEGAL_ARGUMENT_ERROR;
1055 break;
1056 }
1057 continue;
1058 }
1059 }
1060 bcpKey = key;
1061 if ((len + 1) < extBufCapacity) {
1062 uprv_memcpy(pExtBuf, buf, len);
1063 bcpValue = pExtBuf;
1064
1065 pExtBuf += len;
1066
1067 *pExtBuf = 0;
1068 pExtBuf++;
1069
1070 extBufCapacity -= (len + 1);
1071 } else {
1072 *status = U_ILLEGAL_ARGUMENT_ERROR;
1073 break;
1074 }
1075 }
1076
1077 /* create ExtensionListEntry */
1078 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1079 if (ext == NULL) {
1080 *status = U_MEMORY_ALLOCATION_ERROR;
1081 break;
1082 }
1083 ext->key = bcpKey;
1084 ext->value = bcpValue;
1085
1086 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1087 uprv_free(ext);
1088 if (strict) {
1089 *status = U_ILLEGAL_ARGUMENT_ERROR;
1090 break;
1091 }
1092 }
1093 }
1094
1095 /* Special handling for POSIX variant - add the keywords for POSIX */
1096 if (hadPosix) {
1097 /* create ExtensionListEntry for POSIX */
1098 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1099 if (ext == NULL) {
1100 *status = U_MEMORY_ALLOCATION_ERROR;
1101 goto cleanup;
1102 }
1103 ext->key = POSIX_KEY;
1104 ext->value = POSIX_VALUE;
1105
1106 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1107 uprv_free(ext);
1108 }
1109 }
1110
1111 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1112 UBool startLDMLExtension = FALSE;
1113 for (ext = firstExt; ext; ext = ext->next) {
1114 if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
1115 /* first LDML u singlton extension */
1116 if (reslen < capacity) {
1117 *(appendAt + reslen) = SEP;
1118 }
1119 reslen++;
1120 if (reslen < capacity) {
1121 *(appendAt + reslen) = LDMLEXT;
1122 }
1123 reslen++;
1124
1125 startLDMLExtension = TRUE;
1126 }
1127
1128 /* write out the sorted BCP47 attributes, extensions and private use */
1129 if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
1130 /* write the value for the attributes */
1131 for (attr = firstAttr; attr; attr = attr->next) {
1132 if (reslen < capacity) {
1133 *(appendAt + reslen) = SEP;
1134 }
1135 reslen++;
1136 len = (int32_t)uprv_strlen(attr->attribute);
1137 if (reslen < capacity) {
1138 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1139 }
1140 reslen += len;
1141 }
1142 } else {
1143 if (reslen < capacity) {
1144 *(appendAt + reslen) = SEP;
1145 }
1146 reslen++;
1147 len = (int32_t)uprv_strlen(ext->key);
1148 if (reslen < capacity) {
1149 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1150 }
1151 reslen += len;
1152 if (reslen < capacity) {
1153 *(appendAt + reslen) = SEP;
1154 }
1155 reslen++;
1156 len = (int32_t)uprv_strlen(ext->value);
1157 if (reslen < capacity) {
1158 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1159 }
1160 reslen += len;
1161 }
1162 }
1163 }
1164 cleanup:
1165 /* clean up */
1166 ext = firstExt;
1167 while (ext != NULL) {
1168 ExtensionListEntry *tmpExt = ext->next;
1169 uprv_free(ext);
1170 ext = tmpExt;
1171 }
1172
1173 attr = firstAttr;
1174 while (attr != NULL) {
1175 AttributeListEntry *tmpAttr = attr->next;
1176 char *pValue = (char *)attr->attribute;
1177 uprv_free(pValue);
1178 uprv_free(attr);
1179 attr = tmpAttr;
1180 }
1181
1182 uenum_close(keywordEnum);
1183
1184 if (U_FAILURE(*status)) {
1185 return 0;
1186 }
1187 }
1188
1189 return u_terminateChars(appendAt, capacity, reslen, status);
1190 }
1191
1192 /**
1193 * Append keywords parsed from LDML extension value
1194 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1195 * Note: char* buf is used for storing keywords
1196 */
1197 static void
_appendLDMLExtensionAsKeywords(const char * ldmlext,ExtensionListEntry ** appendTo,char * buf,int32_t bufSize,UBool * posixVariant,UErrorCode * status)1198 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1199 const char *pTag; /* beginning of current subtag */
1200 const char *pKwds; /* beginning of key-type pairs */
1201 UBool variantExists = *posixVariant;
1202
1203 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
1204 ExtensionListEntry *kwd, *nextKwd;
1205
1206 AttributeListEntry *attrFirst = NULL; /* first attribute */
1207 AttributeListEntry *attr, *nextAttr;
1208
1209 int32_t len;
1210 int32_t bufIdx = 0;
1211
1212 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1213 int32_t attrBufIdx = 0;
1214
1215 /* Reset the posixVariant value */
1216 *posixVariant = FALSE;
1217
1218 pTag = ldmlext;
1219 pKwds = NULL;
1220
1221 /* Iterate through u extension attributes */
1222 while (*pTag) {
1223 /* locate next separator char */
1224 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1225
1226 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1227 pKwds = pTag;
1228 break;
1229 }
1230
1231 /* add this attribute to the list */
1232 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1233 if (attr == NULL) {
1234 *status = U_MEMORY_ALLOCATION_ERROR;
1235 goto cleanup;
1236 }
1237
1238 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1239 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1240 attrBuf[attrBufIdx + len] = 0;
1241 attr->attribute = &attrBuf[attrBufIdx];
1242 attrBufIdx += (len + 1);
1243 } else {
1244 *status = U_ILLEGAL_ARGUMENT_ERROR;
1245 goto cleanup;
1246 }
1247
1248 if (!_addAttributeToList(&attrFirst, attr)) {
1249 *status = U_ILLEGAL_ARGUMENT_ERROR;
1250 uprv_free(attr);
1251 goto cleanup;
1252 }
1253
1254 /* next tag */
1255 pTag += len;
1256 if (*pTag) {
1257 /* next to the separator */
1258 pTag++;
1259 }
1260 }
1261
1262 if (attrFirst) {
1263 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1264
1265 if (attrBufIdx > bufSize) {
1266 /* attrBufIdx == <total length of attribute subtag> + 1 */
1267 *status = U_ILLEGAL_ARGUMENT_ERROR;
1268 goto cleanup;
1269 }
1270
1271 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1272 if (kwd == NULL) {
1273 *status = U_MEMORY_ALLOCATION_ERROR;
1274 goto cleanup;
1275 }
1276
1277 kwd->key = LOCALE_ATTRIBUTE_KEY;
1278 kwd->value = buf;
1279
1280 /* attribute subtags sorted in alphabetical order as type */
1281 attr = attrFirst;
1282 while (attr != NULL) {
1283 nextAttr = attr->next;
1284
1285 /* buffer size check is done above */
1286 if (attr != attrFirst) {
1287 *(buf + bufIdx) = SEP;
1288 bufIdx++;
1289 }
1290
1291 len = uprv_strlen(attr->attribute);
1292 uprv_memcpy(buf + bufIdx, attr->attribute, len);
1293 bufIdx += len;
1294
1295 attr = nextAttr;
1296 }
1297 *(buf + bufIdx) = 0;
1298 bufIdx++;
1299
1300 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1301 *status = U_ILLEGAL_ARGUMENT_ERROR;
1302 uprv_free(kwd);
1303 goto cleanup;
1304 }
1305
1306 /* once keyword entry is created, delete the attribute list */
1307 attr = attrFirst;
1308 while (attr != NULL) {
1309 nextAttr = attr->next;
1310 uprv_free(attr);
1311 attr = nextAttr;
1312 }
1313 attrFirst = NULL;
1314 }
1315
1316 if (pKwds) {
1317 const char *pBcpKey = NULL; /* u extenstion key subtag */
1318 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
1319 int32_t bcpKeyLen = 0;
1320 int32_t bcpTypeLen = 0;
1321 UBool isDone = FALSE;
1322
1323 pTag = pKwds;
1324 /* BCP47 representation of LDML key/type pairs */
1325 while (!isDone) {
1326 const char *pNextBcpKey = NULL;
1327 int32_t nextBcpKeyLen = 0;
1328 UBool emitKeyword = FALSE;
1329
1330 if (*pTag) {
1331 /* locate next separator char */
1332 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1333
1334 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1335 if (pBcpKey) {
1336 emitKeyword = TRUE;
1337 pNextBcpKey = pTag;
1338 nextBcpKeyLen = len;
1339 } else {
1340 pBcpKey = pTag;
1341 bcpKeyLen = len;
1342 }
1343 } else {
1344 U_ASSERT(pBcpKey != NULL);
1345 /* within LDML type subtags */
1346 if (pBcpType) {
1347 bcpTypeLen += (len + 1);
1348 } else {
1349 pBcpType = pTag;
1350 bcpTypeLen = len;
1351 }
1352 }
1353
1354 /* next tag */
1355 pTag += len;
1356 if (*pTag) {
1357 /* next to the separator */
1358 pTag++;
1359 }
1360 } else {
1361 /* processing last one */
1362 emitKeyword = TRUE;
1363 isDone = TRUE;
1364 }
1365
1366 if (emitKeyword) {
1367 const char *pKey = NULL; /* LDML key */
1368 const char *pType = NULL; /* LDML type */
1369
1370 char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
1371
1372 U_ASSERT(pBcpKey != NULL);
1373
1374 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
1375 /* the BCP key is invalid */
1376 *status = U_ILLEGAL_ARGUMENT_ERROR;
1377 goto cleanup;
1378 }
1379
1380 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1381 bcpKeyBuf[bcpKeyLen] = 0;
1382
1383 /* u extension key to LDML key */
1384 pKey = uloc_toLegacyKey(bcpKeyBuf);
1385 if (pKey == NULL) {
1386 *status = U_ILLEGAL_ARGUMENT_ERROR;
1387 goto cleanup;
1388 }
1389 if (pKey == bcpKeyBuf) {
1390 /*
1391 The key returned by toLegacyKey points to the input buffer.
1392 We normalize the result key to lower case.
1393 */
1394 T_CString_toLowerCase(bcpKeyBuf);
1395 if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1396 uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1397 pKey = buf + bufIdx;
1398 bufIdx += bcpKeyLen;
1399 *(buf + bufIdx) = 0;
1400 bufIdx++;
1401 } else {
1402 *status = U_BUFFER_OVERFLOW_ERROR;
1403 goto cleanup;
1404 }
1405 }
1406
1407 if (pBcpType) {
1408 char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
1409 if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
1410 /* the BCP type is too long */
1411 *status = U_ILLEGAL_ARGUMENT_ERROR;
1412 goto cleanup;
1413 }
1414
1415 uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1416 bcpTypeBuf[bcpTypeLen] = 0;
1417
1418 /* BCP type to locale type */
1419 pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1420 if (pType == NULL) {
1421 *status = U_ILLEGAL_ARGUMENT_ERROR;
1422 goto cleanup;
1423 }
1424 if (pType == bcpTypeBuf) {
1425 /*
1426 The type returned by toLegacyType points to the input buffer.
1427 We normalize the result type to lower case.
1428 */
1429 /* normalize to lower case */
1430 T_CString_toLowerCase(bcpTypeBuf);
1431 if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1432 uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1433 pType = buf + bufIdx;
1434 bufIdx += bcpTypeLen;
1435 *(buf + bufIdx) = 0;
1436 bufIdx++;
1437 } else {
1438 *status = U_BUFFER_OVERFLOW_ERROR;
1439 goto cleanup;
1440 }
1441 }
1442 } else {
1443 /* typeless - default type value is "yes" */
1444 pType = LOCALE_TYPE_YES;
1445 }
1446
1447 /* Special handling for u-va-posix, since we want to treat this as a variant,
1448 not as a keyword */
1449 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1450 *posixVariant = TRUE;
1451 } else {
1452 /* create an ExtensionListEntry for this keyword */
1453 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1454 if (kwd == NULL) {
1455 *status = U_MEMORY_ALLOCATION_ERROR;
1456 goto cleanup;
1457 }
1458
1459 kwd->key = pKey;
1460 kwd->value = pType;
1461
1462 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1463 *status = U_ILLEGAL_ARGUMENT_ERROR;
1464 uprv_free(kwd);
1465 goto cleanup;
1466 }
1467 }
1468
1469 pBcpKey = pNextBcpKey;
1470 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1471 pBcpType = NULL;
1472 bcpTypeLen = 0;
1473 }
1474 }
1475 }
1476
1477 kwd = kwdFirst;
1478 while (kwd != NULL) {
1479 nextKwd = kwd->next;
1480 _addExtensionToList(appendTo, kwd, FALSE);
1481 kwd = nextKwd;
1482 }
1483
1484 return;
1485
1486 cleanup:
1487 attr = attrFirst;
1488 while (attr != NULL) {
1489 nextAttr = attr->next;
1490 uprv_free(attr);
1491 attr = nextAttr;
1492 }
1493
1494 kwd = kwdFirst;
1495 while (kwd != NULL) {
1496 nextKwd = kwd->next;
1497 uprv_free(kwd);
1498 kwd = nextKwd;
1499 }
1500 }
1501
1502
1503 static int32_t
_appendKeywords(ULanguageTag * langtag,char * appendAt,int32_t capacity,UErrorCode * status)1504 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1505 int32_t reslen = 0;
1506 int32_t i, n;
1507 int32_t len;
1508 ExtensionListEntry *kwdFirst = NULL;
1509 ExtensionListEntry *kwd;
1510 const char *key, *type;
1511 char *kwdBuf = NULL;
1512 int32_t kwdBufLength = capacity;
1513 UBool posixVariant = FALSE;
1514
1515 if (U_FAILURE(*status)) {
1516 return 0;
1517 }
1518
1519 kwdBuf = (char*)uprv_malloc(kwdBufLength);
1520 if (kwdBuf == NULL) {
1521 *status = U_MEMORY_ALLOCATION_ERROR;
1522 return 0;
1523 }
1524
1525 /* Determine if variants already exists */
1526 if (ultag_getVariantsSize(langtag)) {
1527 posixVariant = TRUE;
1528 }
1529
1530 n = ultag_getExtensionsSize(langtag);
1531
1532 /* resolve locale keywords and reordering keys */
1533 for (i = 0; i < n; i++) {
1534 key = ultag_getExtensionKey(langtag, i);
1535 type = ultag_getExtensionValue(langtag, i);
1536 if (*key == LDMLEXT) {
1537 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1538 if (U_FAILURE(*status)) {
1539 break;
1540 }
1541 } else {
1542 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1543 if (kwd == NULL) {
1544 *status = U_MEMORY_ALLOCATION_ERROR;
1545 break;
1546 }
1547 kwd->key = key;
1548 kwd->value = type;
1549 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1550 uprv_free(kwd);
1551 *status = U_ILLEGAL_ARGUMENT_ERROR;
1552 break;
1553 }
1554 }
1555 }
1556
1557 if (U_SUCCESS(*status)) {
1558 type = ultag_getPrivateUse(langtag);
1559 if ((int32_t)uprv_strlen(type) > 0) {
1560 /* add private use as a keyword */
1561 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1562 if (kwd == NULL) {
1563 *status = U_MEMORY_ALLOCATION_ERROR;
1564 } else {
1565 kwd->key = PRIVATEUSE_KEY;
1566 kwd->value = type;
1567 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1568 uprv_free(kwd);
1569 *status = U_ILLEGAL_ARGUMENT_ERROR;
1570 }
1571 }
1572 }
1573 }
1574
1575 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1576
1577 if (U_SUCCESS(*status) && posixVariant) {
1578 len = (int32_t) uprv_strlen(_POSIX);
1579 if (reslen < capacity) {
1580 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1581 }
1582 reslen += len;
1583 }
1584
1585 if (U_SUCCESS(*status) && kwdFirst != NULL) {
1586 /* write out the sorted keywords */
1587 UBool firstValue = TRUE;
1588 kwd = kwdFirst;
1589 do {
1590 if (reslen < capacity) {
1591 if (firstValue) {
1592 /* '@' */
1593 *(appendAt + reslen) = LOCALE_EXT_SEP;
1594 firstValue = FALSE;
1595 } else {
1596 /* ';' */
1597 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1598 }
1599 }
1600 reslen++;
1601
1602 /* key */
1603 len = (int32_t)uprv_strlen(kwd->key);
1604 if (reslen < capacity) {
1605 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1606 }
1607 reslen += len;
1608
1609 /* '=' */
1610 if (reslen < capacity) {
1611 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1612 }
1613 reslen++;
1614
1615 /* type */
1616 len = (int32_t)uprv_strlen(kwd->value);
1617 if (reslen < capacity) {
1618 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1619 }
1620 reslen += len;
1621
1622 kwd = kwd->next;
1623 } while (kwd);
1624 }
1625
1626 /* clean up */
1627 kwd = kwdFirst;
1628 while (kwd != NULL) {
1629 ExtensionListEntry *tmpKwd = kwd->next;
1630 uprv_free(kwd);
1631 kwd = tmpKwd;
1632 }
1633
1634 uprv_free(kwdBuf);
1635
1636 if (U_FAILURE(*status)) {
1637 return 0;
1638 }
1639
1640 return u_terminateChars(appendAt, capacity, reslen, status);
1641 }
1642
1643 static int32_t
_appendPrivateuseToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1644 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1645 char buf[ULOC_FULLNAME_CAPACITY];
1646 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1647 UErrorCode tmpStatus = U_ZERO_ERROR;
1648 int32_t len, i;
1649 int32_t reslen = 0;
1650
1651 if (U_FAILURE(*status)) {
1652 return 0;
1653 }
1654
1655 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1656 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1657 if (strict) {
1658 *status = U_ILLEGAL_ARGUMENT_ERROR;
1659 }
1660 return 0;
1661 }
1662
1663 if (len > 0) {
1664 char *p, *pPriv;
1665 UBool bNext = TRUE;
1666 UBool firstValue = TRUE;
1667 UBool writeValue;
1668
1669 pPriv = NULL;
1670 p = buf;
1671 while (bNext) {
1672 writeValue = FALSE;
1673 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1674 if (*p == 0) {
1675 bNext = FALSE;
1676 } else {
1677 *p = 0; /* terminate */
1678 }
1679 if (pPriv != NULL) {
1680 /* Private use in the canonical format is lowercase in BCP47 */
1681 for (i = 0; *(pPriv + i) != 0; i++) {
1682 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1683 }
1684
1685 /* validate */
1686 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1687 if (firstValue) {
1688 if (!_isVariantSubtag(pPriv, -1)) {
1689 writeValue = TRUE;
1690 }
1691 } else {
1692 writeValue = TRUE;
1693 }
1694 } else if (strict) {
1695 *status = U_ILLEGAL_ARGUMENT_ERROR;
1696 break;
1697 } else {
1698 break;
1699 }
1700
1701 if (writeValue) {
1702 if (reslen < capacity) {
1703 tmpAppend[reslen++] = SEP;
1704 }
1705
1706 if (firstValue) {
1707 if (reslen < capacity) {
1708 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1709 }
1710
1711 if (reslen < capacity) {
1712 tmpAppend[reslen++] = SEP;
1713 }
1714
1715 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1716 if (reslen < capacity) {
1717 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1718 }
1719 reslen += len;
1720
1721 if (reslen < capacity) {
1722 tmpAppend[reslen++] = SEP;
1723 }
1724
1725 firstValue = FALSE;
1726 }
1727
1728 len = (int32_t)uprv_strlen(pPriv);
1729 if (reslen < capacity) {
1730 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1731 }
1732 reslen += len;
1733 }
1734 }
1735 /* reset private use starting position */
1736 pPriv = NULL;
1737 } else if (pPriv == NULL) {
1738 pPriv = p;
1739 }
1740 p++;
1741 }
1742
1743 if (U_FAILURE(*status)) {
1744 return 0;
1745 }
1746 }
1747
1748 if (U_SUCCESS(*status)) {
1749 len = reslen;
1750 if (reslen < capacity) {
1751 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
1752 }
1753 }
1754
1755 u_terminateChars(appendAt, capacity, reslen, status);
1756
1757 return reslen;
1758 }
1759
1760 /*
1761 * -------------------------------------------------
1762 *
1763 * ultag_ functions
1764 *
1765 * -------------------------------------------------
1766 */
1767
1768 /* Bit flags used by the parser */
1769 #define LANG 0x0001
1770 #define EXTL 0x0002
1771 #define SCRT 0x0004
1772 #define REGN 0x0008
1773 #define VART 0x0010
1774 #define EXTS 0x0020
1775 #define EXTV 0x0040
1776 #define PRIV 0x0080
1777
1778 /**
1779 * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
1780 * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
1781 * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
1782 */
1783 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
1784 #pragma optimize( "", off )
1785 #endif
1786
1787 static ULanguageTag*
ultag_parse(const char * tag,int32_t tagLen,int32_t * parsedLen,UErrorCode * status)1788 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
1789 ULanguageTag *t;
1790 char *tagBuf;
1791 int16_t next;
1792 char *pSubtag, *pNext, *pLastGoodPosition;
1793 int32_t subtagLen;
1794 int32_t extlangIdx;
1795 ExtensionListEntry *pExtension;
1796 char *pExtValueSubtag, *pExtValueSubtagEnd;
1797 int32_t i;
1798 UBool privateuseVar = FALSE;
1799 int32_t grandfatheredLen = 0;
1800
1801 if (parsedLen != NULL) {
1802 *parsedLen = 0;
1803 }
1804
1805 if (U_FAILURE(*status)) {
1806 return NULL;
1807 }
1808
1809 if (tagLen < 0) {
1810 tagLen = (int32_t)uprv_strlen(tag);
1811 }
1812
1813 /* copy the entire string */
1814 tagBuf = (char*)uprv_malloc(tagLen + 1);
1815 if (tagBuf == NULL) {
1816 *status = U_MEMORY_ALLOCATION_ERROR;
1817 return NULL;
1818 }
1819 uprv_memcpy(tagBuf, tag, tagLen);
1820 *(tagBuf + tagLen) = 0;
1821
1822 /* create a ULanguageTag */
1823 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
1824 if (t == NULL) {
1825 uprv_free(tagBuf);
1826 *status = U_MEMORY_ALLOCATION_ERROR;
1827 return NULL;
1828 }
1829 _initializeULanguageTag(t);
1830 t->buf = tagBuf;
1831
1832 if (tagLen < MINLEN) {
1833 /* the input tag is too short - return empty ULanguageTag */
1834 return t;
1835 }
1836
1837 /* check if the tag is grandfathered */
1838 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
1839 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
1840 int32_t newTagLength;
1841
1842 grandfatheredLen = tagLen; /* back up for output parsedLen */
1843 newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
1844 if (tagLen < newTagLength) {
1845 uprv_free(tagBuf);
1846 tagBuf = (char*)uprv_malloc(newTagLength + 1);
1847 if (tagBuf == NULL) {
1848 *status = U_MEMORY_ALLOCATION_ERROR;
1849 ultag_close(t);
1850 return NULL;
1851 }
1852 t->buf = tagBuf;
1853 tagLen = newTagLength;
1854 }
1855 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
1856 break;
1857 }
1858 }
1859
1860 /*
1861 * langtag = language
1862 * ["-" script]
1863 * ["-" region]
1864 * *("-" variant)
1865 * *("-" extension)
1866 * ["-" privateuse]
1867 */
1868
1869 next = LANG | PRIV;
1870 pNext = pLastGoodPosition = tagBuf;
1871 extlangIdx = 0;
1872 pExtension = NULL;
1873 pExtValueSubtag = NULL;
1874 pExtValueSubtagEnd = NULL;
1875
1876 while (pNext) {
1877 char *pSep;
1878
1879 pSubtag = pNext;
1880
1881 /* locate next separator char */
1882 pSep = pSubtag;
1883 while (*pSep) {
1884 if (*pSep == SEP) {
1885 break;
1886 }
1887 pSep++;
1888 }
1889 if (*pSep == 0) {
1890 /* last subtag */
1891 pNext = NULL;
1892 } else {
1893 pNext = pSep + 1;
1894 }
1895 subtagLen = (int32_t)(pSep - pSubtag);
1896
1897 if (next & LANG) {
1898 if (_isLanguageSubtag(pSubtag, subtagLen)) {
1899 *pSep = 0; /* terminate */
1900 t->language = T_CString_toLowerCase(pSubtag);
1901
1902 pLastGoodPosition = pSep;
1903 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1904 continue;
1905 }
1906 }
1907 if (next & EXTL) {
1908 if (_isExtlangSubtag(pSubtag, subtagLen)) {
1909 *pSep = 0;
1910 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1911
1912 pLastGoodPosition = pSep;
1913 if (extlangIdx < 3) {
1914 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1915 } else {
1916 next = SCRT | REGN | VART | EXTS | PRIV;
1917 }
1918 continue;
1919 }
1920 }
1921 if (next & SCRT) {
1922 if (_isScriptSubtag(pSubtag, subtagLen)) {
1923 char *p = pSubtag;
1924
1925 *pSep = 0;
1926
1927 /* to title case */
1928 *p = uprv_toupper(*p);
1929 p++;
1930 for (; *p; p++) {
1931 *p = uprv_tolower(*p);
1932 }
1933
1934 t->script = pSubtag;
1935
1936 pLastGoodPosition = pSep;
1937 next = REGN | VART | EXTS | PRIV;
1938 continue;
1939 }
1940 }
1941 if (next & REGN) {
1942 if (_isRegionSubtag(pSubtag, subtagLen)) {
1943 *pSep = 0;
1944 t->region = T_CString_toUpperCase(pSubtag);
1945
1946 pLastGoodPosition = pSep;
1947 next = VART | EXTS | PRIV;
1948 continue;
1949 }
1950 }
1951 if (next & VART) {
1952 if (_isVariantSubtag(pSubtag, subtagLen) ||
1953 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
1954 VariantListEntry *var;
1955 UBool isAdded;
1956
1957 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1958 if (var == NULL) {
1959 *status = U_MEMORY_ALLOCATION_ERROR;
1960 goto error;
1961 }
1962 *pSep = 0;
1963 var->variant = T_CString_toUpperCase(pSubtag);
1964 isAdded = _addVariantToList(&(t->variants), var);
1965 if (!isAdded) {
1966 /* duplicated variant entry */
1967 uprv_free(var);
1968 break;
1969 }
1970 pLastGoodPosition = pSep;
1971 next = VART | EXTS | PRIV;
1972 continue;
1973 }
1974 }
1975 if (next & EXTS) {
1976 if (_isExtensionSingleton(pSubtag, subtagLen)) {
1977 if (pExtension != NULL) {
1978 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1979 /* the previous extension is incomplete */
1980 uprv_free(pExtension);
1981 pExtension = NULL;
1982 break;
1983 }
1984
1985 /* terminate the previous extension value */
1986 *pExtValueSubtagEnd = 0;
1987 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1988
1989 /* insert the extension to the list */
1990 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1991 pLastGoodPosition = pExtValueSubtagEnd;
1992 } else {
1993 /* stop parsing here */
1994 uprv_free(pExtension);
1995 pExtension = NULL;
1996 break;
1997 }
1998 }
1999
2000 /* create a new extension */
2001 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
2002 if (pExtension == NULL) {
2003 *status = U_MEMORY_ALLOCATION_ERROR;
2004 goto error;
2005 }
2006 *pSep = 0;
2007 pExtension->key = T_CString_toLowerCase(pSubtag);
2008 pExtension->value = NULL; /* will be set later */
2009
2010 /*
2011 * reset the start and the end location of extension value
2012 * subtags for this extension
2013 */
2014 pExtValueSubtag = NULL;
2015 pExtValueSubtagEnd = NULL;
2016
2017 next = EXTV;
2018 continue;
2019 }
2020 }
2021 if (next & EXTV) {
2022 if (_isExtensionSubtag(pSubtag, subtagLen)) {
2023 if (pExtValueSubtag == NULL) {
2024 /* if the start postion of this extension's value is not yet,
2025 this one is the first value subtag */
2026 pExtValueSubtag = pSubtag;
2027 }
2028
2029 /* Mark the end of this subtag */
2030 pExtValueSubtagEnd = pSep;
2031 next = EXTS | EXTV | PRIV;
2032
2033 continue;
2034 }
2035 }
2036 if (next & PRIV) {
2037 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2038 char *pPrivuseVal;
2039
2040 if (pExtension != NULL) {
2041 /* Process the last extension */
2042 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2043 /* the previous extension is incomplete */
2044 uprv_free(pExtension);
2045 pExtension = NULL;
2046 break;
2047 } else {
2048 /* terminate the previous extension value */
2049 *pExtValueSubtagEnd = 0;
2050 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2051
2052 /* insert the extension to the list */
2053 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2054 pLastGoodPosition = pExtValueSubtagEnd;
2055 pExtension = NULL;
2056 } else {
2057 /* stop parsing here */
2058 uprv_free(pExtension);
2059 pExtension = NULL;
2060 break;
2061 }
2062 }
2063 }
2064
2065 /* The rest of part will be private use value subtags */
2066 if (pNext == NULL) {
2067 /* empty private use subtag */
2068 break;
2069 }
2070 /* back up the private use value start position */
2071 pPrivuseVal = pNext;
2072
2073 /* validate private use value subtags */
2074 while (pNext) {
2075 pSubtag = pNext;
2076 pSep = pSubtag;
2077 while (*pSep) {
2078 if (*pSep == SEP) {
2079 break;
2080 }
2081 pSep++;
2082 }
2083 if (*pSep == 0) {
2084 /* last subtag */
2085 pNext = NULL;
2086 } else {
2087 pNext = pSep + 1;
2088 }
2089 subtagLen = (int32_t)(pSep - pSubtag);
2090
2091 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2092 *pSep = 0;
2093 next = VART;
2094 privateuseVar = TRUE;
2095 break;
2096 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2097 pLastGoodPosition = pSep;
2098 } else {
2099 break;
2100 }
2101 }
2102
2103 if (next == VART) {
2104 continue;
2105 }
2106
2107 if (pLastGoodPosition - pPrivuseVal > 0) {
2108 *pLastGoodPosition = 0;
2109 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2110 }
2111 /* No more subtags, exiting the parse loop */
2112 break;
2113 }
2114 break;
2115 }
2116
2117 /* If we fell through here, it means this subtag is illegal - quit parsing */
2118 break;
2119 }
2120
2121 if (pExtension != NULL) {
2122 /* Process the last extension */
2123 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2124 /* the previous extension is incomplete */
2125 uprv_free(pExtension);
2126 } else {
2127 /* terminate the previous extension value */
2128 *pExtValueSubtagEnd = 0;
2129 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2130 /* insert the extension to the list */
2131 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2132 pLastGoodPosition = pExtValueSubtagEnd;
2133 } else {
2134 uprv_free(pExtension);
2135 }
2136 }
2137 }
2138
2139 if (parsedLen != NULL) {
2140 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2141 }
2142
2143 return t;
2144
2145 error:
2146 ultag_close(t);
2147 return NULL;
2148 }
2149
2150 /**
2151 * Ticket #12705 - Turn optimization back on.
2152 */
2153 #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
2154 #pragma optimize( "", on )
2155 #endif
2156
2157 static void
ultag_close(ULanguageTag * langtag)2158 ultag_close(ULanguageTag* langtag) {
2159
2160 if (langtag == NULL) {
2161 return;
2162 }
2163
2164 uprv_free(langtag->buf);
2165
2166 if (langtag->variants) {
2167 VariantListEntry *curVar = langtag->variants;
2168 while (curVar) {
2169 VariantListEntry *nextVar = curVar->next;
2170 uprv_free(curVar);
2171 curVar = nextVar;
2172 }
2173 }
2174
2175 if (langtag->extensions) {
2176 ExtensionListEntry *curExt = langtag->extensions;
2177 while (curExt) {
2178 ExtensionListEntry *nextExt = curExt->next;
2179 uprv_free(curExt);
2180 curExt = nextExt;
2181 }
2182 }
2183
2184 uprv_free(langtag);
2185 }
2186
2187 static const char*
ultag_getLanguage(const ULanguageTag * langtag)2188 ultag_getLanguage(const ULanguageTag* langtag) {
2189 return langtag->language;
2190 }
2191
2192 #if 0
2193 static const char*
2194 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2195 int32_t i;
2196 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2197 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2198 return DEPRECATEDLANGS[i + 1];
2199 }
2200 }
2201 return langtag->language;
2202 }
2203 #endif
2204
2205 static const char*
ultag_getExtlang(const ULanguageTag * langtag,int32_t idx)2206 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2207 if (idx >= 0 && idx < MAXEXTLANG) {
2208 return langtag->extlang[idx];
2209 }
2210 return NULL;
2211 }
2212
2213 static int32_t
ultag_getExtlangSize(const ULanguageTag * langtag)2214 ultag_getExtlangSize(const ULanguageTag* langtag) {
2215 int32_t size = 0;
2216 int32_t i;
2217 for (i = 0; i < MAXEXTLANG; i++) {
2218 if (langtag->extlang[i]) {
2219 size++;
2220 }
2221 }
2222 return size;
2223 }
2224
2225 static const char*
ultag_getScript(const ULanguageTag * langtag)2226 ultag_getScript(const ULanguageTag* langtag) {
2227 return langtag->script;
2228 }
2229
2230 static const char*
ultag_getRegion(const ULanguageTag * langtag)2231 ultag_getRegion(const ULanguageTag* langtag) {
2232 return langtag->region;
2233 }
2234
2235 static const char*
ultag_getVariant(const ULanguageTag * langtag,int32_t idx)2236 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2237 const char *var = NULL;
2238 VariantListEntry *cur = langtag->variants;
2239 int32_t i = 0;
2240 while (cur) {
2241 if (i == idx) {
2242 var = cur->variant;
2243 break;
2244 }
2245 cur = cur->next;
2246 i++;
2247 }
2248 return var;
2249 }
2250
2251 static int32_t
ultag_getVariantsSize(const ULanguageTag * langtag)2252 ultag_getVariantsSize(const ULanguageTag* langtag) {
2253 int32_t size = 0;
2254 VariantListEntry *cur = langtag->variants;
2255 while (TRUE) {
2256 if (cur == NULL) {
2257 break;
2258 }
2259 size++;
2260 cur = cur->next;
2261 }
2262 return size;
2263 }
2264
2265 static const char*
ultag_getExtensionKey(const ULanguageTag * langtag,int32_t idx)2266 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2267 const char *key = NULL;
2268 ExtensionListEntry *cur = langtag->extensions;
2269 int32_t i = 0;
2270 while (cur) {
2271 if (i == idx) {
2272 key = cur->key;
2273 break;
2274 }
2275 cur = cur->next;
2276 i++;
2277 }
2278 return key;
2279 }
2280
2281 static const char*
ultag_getExtensionValue(const ULanguageTag * langtag,int32_t idx)2282 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2283 const char *val = NULL;
2284 ExtensionListEntry *cur = langtag->extensions;
2285 int32_t i = 0;
2286 while (cur) {
2287 if (i == idx) {
2288 val = cur->value;
2289 break;
2290 }
2291 cur = cur->next;
2292 i++;
2293 }
2294 return val;
2295 }
2296
2297 static int32_t
ultag_getExtensionsSize(const ULanguageTag * langtag)2298 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2299 int32_t size = 0;
2300 ExtensionListEntry *cur = langtag->extensions;
2301 while (TRUE) {
2302 if (cur == NULL) {
2303 break;
2304 }
2305 size++;
2306 cur = cur->next;
2307 }
2308 return size;
2309 }
2310
2311 static const char*
ultag_getPrivateUse(const ULanguageTag * langtag)2312 ultag_getPrivateUse(const ULanguageTag* langtag) {
2313 return langtag->privateuse;
2314 }
2315
2316 #if 0
2317 static const char*
2318 ultag_getGrandfathered(const ULanguageTag* langtag) {
2319 return langtag->grandfathered;
2320 }
2321 #endif
2322
2323
2324 /*
2325 * -------------------------------------------------
2326 *
2327 * Locale/BCP47 conversion APIs, exposed as uloc_*
2328 *
2329 * -------------------------------------------------
2330 */
2331 U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char * localeID,char * langtag,int32_t langtagCapacity,UBool strict,UErrorCode * status)2332 uloc_toLanguageTag(const char* localeID,
2333 char* langtag,
2334 int32_t langtagCapacity,
2335 UBool strict,
2336 UErrorCode* status) {
2337 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2338 char canonical[256];
2339 int32_t reslen = 0;
2340 UErrorCode tmpStatus = U_ZERO_ERROR;
2341 UBool hadPosix = FALSE;
2342 const char* pKeywordStart;
2343
2344 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2345 canonical[0] = 0;
2346 if (uprv_strlen(localeID) > 0) {
2347 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2348 if (tmpStatus != U_ZERO_ERROR) {
2349 *status = U_ILLEGAL_ARGUMENT_ERROR;
2350 return 0;
2351 }
2352 }
2353
2354 /* For handling special case - private use only tag */
2355 pKeywordStart = locale_getKeywordsStart(canonical);
2356 if (pKeywordStart == canonical) {
2357 UEnumeration *kwdEnum;
2358 int kwdCnt = 0;
2359 UBool done = FALSE;
2360
2361 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2362 if (kwdEnum != NULL) {
2363 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2364 if (kwdCnt == 1) {
2365 const char *key;
2366 int32_t len = 0;
2367
2368 key = uenum_next(kwdEnum, &len, &tmpStatus);
2369 if (len == 1 && *key == PRIVATEUSE) {
2370 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2371 buf[0] = PRIVATEUSE;
2372 buf[1] = SEP;
2373 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2374 if (U_SUCCESS(tmpStatus)) {
2375 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2376 /* return private use only tag */
2377 reslen = len + 2;
2378 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2379 u_terminateChars(langtag, langtagCapacity, reslen, status);
2380 done = TRUE;
2381 } else if (strict) {
2382 *status = U_ILLEGAL_ARGUMENT_ERROR;
2383 done = TRUE;
2384 }
2385 /* if not strict mode, then "und" will be returned */
2386 } else {
2387 *status = U_ILLEGAL_ARGUMENT_ERROR;
2388 done = TRUE;
2389 }
2390 }
2391 }
2392 uenum_close(kwdEnum);
2393 if (done) {
2394 return reslen;
2395 }
2396 }
2397 }
2398
2399 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2400 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2401 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2402 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2403 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2404 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2405
2406 return reslen;
2407 }
2408
2409
2410 U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char * langtag,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2411 uloc_forLanguageTag(const char* langtag,
2412 char* localeID,
2413 int32_t localeIDCapacity,
2414 int32_t* parsedLength,
2415 UErrorCode* status) {
2416 ULanguageTag *lt;
2417 int32_t reslen = 0;
2418 const char *subtag, *p;
2419 int32_t len;
2420 int32_t i, n;
2421 UBool noRegion = TRUE;
2422
2423 lt = ultag_parse(langtag, -1, parsedLength, status);
2424 if (U_FAILURE(*status)) {
2425 return 0;
2426 }
2427
2428 /* language */
2429 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2430 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2431 len = (int32_t)uprv_strlen(subtag);
2432 if (len > 0) {
2433 if (reslen < localeIDCapacity) {
2434 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2435 }
2436 reslen += len;
2437 }
2438 }
2439
2440 /* script */
2441 subtag = ultag_getScript(lt);
2442 len = (int32_t)uprv_strlen(subtag);
2443 if (len > 0) {
2444 if (reslen < localeIDCapacity) {
2445 *(localeID + reslen) = LOCALE_SEP;
2446 }
2447 reslen++;
2448
2449 /* write out the script in title case */
2450 p = subtag;
2451 while (*p) {
2452 if (reslen < localeIDCapacity) {
2453 if (p == subtag) {
2454 *(localeID + reslen) = uprv_toupper(*p);
2455 } else {
2456 *(localeID + reslen) = *p;
2457 }
2458 }
2459 reslen++;
2460 p++;
2461 }
2462 }
2463
2464 /* region */
2465 subtag = ultag_getRegion(lt);
2466 len = (int32_t)uprv_strlen(subtag);
2467 if (len > 0) {
2468 if (reslen < localeIDCapacity) {
2469 *(localeID + reslen) = LOCALE_SEP;
2470 }
2471 reslen++;
2472 /* write out the retion in upper case */
2473 p = subtag;
2474 while (*p) {
2475 if (reslen < localeIDCapacity) {
2476 *(localeID + reslen) = uprv_toupper(*p);
2477 }
2478 reslen++;
2479 p++;
2480 }
2481 noRegion = FALSE;
2482 }
2483
2484 /* variants */
2485 n = ultag_getVariantsSize(lt);
2486 if (n > 0) {
2487 if (noRegion) {
2488 if (reslen < localeIDCapacity) {
2489 *(localeID + reslen) = LOCALE_SEP;
2490 }
2491 reslen++;
2492 }
2493
2494 for (i = 0; i < n; i++) {
2495 subtag = ultag_getVariant(lt, i);
2496 if (reslen < localeIDCapacity) {
2497 *(localeID + reslen) = LOCALE_SEP;
2498 }
2499 reslen++;
2500 /* write out the variant in upper case */
2501 p = subtag;
2502 while (*p) {
2503 if (reslen < localeIDCapacity) {
2504 *(localeID + reslen) = uprv_toupper(*p);
2505 }
2506 reslen++;
2507 p++;
2508 }
2509 }
2510 }
2511
2512 /* keywords */
2513 n = ultag_getExtensionsSize(lt);
2514 subtag = ultag_getPrivateUse(lt);
2515 if (n > 0 || uprv_strlen(subtag) > 0) {
2516 if (reslen == 0 && n > 0) {
2517 /* need a language */
2518 if (reslen < localeIDCapacity) {
2519 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2520 }
2521 reslen += LANG_UND_LEN;
2522 }
2523 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2524 reslen += len;
2525 }
2526
2527 ultag_close(lt);
2528 return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2529 }
2530
2531
2532