1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File locid.cpp
10 *
11 * Created by: Richard Gillam
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17 * methods to get and set it.
18 * 04/02/97 aliu Made operator!= inline; fixed return value
19 * of getName().
20 * 04/15/97 aliu Cleanup for AIX/Win32.
21 * 04/24/97 aliu Numerous changes per code review.
22 * 08/18/98 stephen Changed getDisplayName()
23 * Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24 * Added getISOCountries(), getISOLanguages(),
25 * getLanguagesForCountry()
26 * 03/16/99 bertrand rehaul.
27 * 07/21/99 stephen Added U_CFUNC setDefault
28 * 11/09/99 weiv Added const char * getName() const;
29 * 04/12/00 srl removing unicodestring api's and cached hash code
30 * 08/10/01 grhoten Change the static Locales to accessor functions
31 ******************************************************************************
32 */
33
34 #include <utility>
35
36 #include "unicode/bytestream.h"
37 #include "unicode/locid.h"
38 #include "unicode/strenum.h"
39 #include "unicode/stringpiece.h"
40 #include "unicode/uloc.h"
41
42 #include "bytesinkutil.h"
43 #include "charstr.h"
44 #include "cmemory.h"
45 #include "cstring.h"
46 #include "mutex.h"
47 #include "putilimp.h"
48 #include "uassert.h"
49 #include "ucln_cmn.h"
50 #include "uhash.h"
51 #include "ulocimp.h"
52 #include "umutex.h"
53 #include "ustr_imp.h"
54
55 U_CDECL_BEGIN
56 static UBool U_CALLCONV locale_cleanup(void);
57 U_CDECL_END
58
59 U_NAMESPACE_BEGIN
60
61 static Locale *gLocaleCache = NULL;
62 static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
63
64 // gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
65 static UMutex gDefaultLocaleMutex;
66 static UHashtable *gDefaultLocalesHashT = NULL;
67 static Locale *gDefaultLocale = NULL;
68
69 /**
70 * \def ULOC_STRING_LIMIT
71 * strings beyond this value crash in CharString
72 */
73 #define ULOC_STRING_LIMIT 357913941
74
75 U_NAMESPACE_END
76
77 typedef enum ELocalePos {
78 eENGLISH,
79 eFRENCH,
80 eGERMAN,
81 eITALIAN,
82 eJAPANESE,
83 eKOREAN,
84 eCHINESE,
85
86 eFRANCE,
87 eGERMANY,
88 eITALY,
89 eJAPAN,
90 eKOREA,
91 eCHINA, /* Alias for PRC */
92 eTAIWAN,
93 eUK,
94 eUS,
95 eCANADA,
96 eCANADA_FRENCH,
97 eROOT,
98
99
100 //eDEFAULT,
101 eMAX_LOCALES
102 } ELocalePos;
103
104 U_CFUNC int32_t locale_getKeywords(const char *localeID,
105 char prev,
106 char *keywords, int32_t keywordCapacity,
107 char *values, int32_t valuesCapacity, int32_t *valLen,
108 UBool valuesToo,
109 UErrorCode *status);
110
111 U_CDECL_BEGIN
112 //
113 // Deleter function for Locales owned by the default Locale hash table/
114 //
115 static void U_CALLCONV
deleteLocale(void * obj)116 deleteLocale(void *obj) {
117 delete (icu::Locale *) obj;
118 }
119
locale_cleanup(void)120 static UBool U_CALLCONV locale_cleanup(void)
121 {
122 U_NAMESPACE_USE
123
124 delete [] gLocaleCache;
125 gLocaleCache = NULL;
126 gLocaleCacheInitOnce.reset();
127
128 if (gDefaultLocalesHashT) {
129 uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
130 gDefaultLocalesHashT = NULL;
131 }
132 gDefaultLocale = NULL;
133 return TRUE;
134 }
135
136
locale_init(UErrorCode & status)137 static void U_CALLCONV locale_init(UErrorCode &status) {
138 U_NAMESPACE_USE
139
140 U_ASSERT(gLocaleCache == NULL);
141 gLocaleCache = new Locale[(int)eMAX_LOCALES];
142 if (gLocaleCache == NULL) {
143 status = U_MEMORY_ALLOCATION_ERROR;
144 return;
145 }
146 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
147 gLocaleCache[eROOT] = Locale("");
148 gLocaleCache[eENGLISH] = Locale("en");
149 gLocaleCache[eFRENCH] = Locale("fr");
150 gLocaleCache[eGERMAN] = Locale("de");
151 gLocaleCache[eITALIAN] = Locale("it");
152 gLocaleCache[eJAPANESE] = Locale("ja");
153 gLocaleCache[eKOREAN] = Locale("ko");
154 gLocaleCache[eCHINESE] = Locale("zh");
155 gLocaleCache[eFRANCE] = Locale("fr", "FR");
156 gLocaleCache[eGERMANY] = Locale("de", "DE");
157 gLocaleCache[eITALY] = Locale("it", "IT");
158 gLocaleCache[eJAPAN] = Locale("ja", "JP");
159 gLocaleCache[eKOREA] = Locale("ko", "KR");
160 gLocaleCache[eCHINA] = Locale("zh", "CN");
161 gLocaleCache[eTAIWAN] = Locale("zh", "TW");
162 gLocaleCache[eUK] = Locale("en", "GB");
163 gLocaleCache[eUS] = Locale("en", "US");
164 gLocaleCache[eCANADA] = Locale("en", "CA");
165 gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
166 }
167
168 U_CDECL_END
169
170 U_NAMESPACE_BEGIN
171
locale_set_default_internal(const char * id,UErrorCode & status)172 Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
173 // Synchronize this entire function.
174 Mutex lock(&gDefaultLocaleMutex);
175
176 UBool canonicalize = FALSE;
177
178 // If given a NULL string for the locale id, grab the default
179 // name from the system.
180 // (Different from most other locale APIs, where a null name means use
181 // the current ICU default locale.)
182 if (id == NULL) {
183 id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
184 canonicalize = TRUE; // always canonicalize host ID
185 }
186
187 char localeNameBuf[512];
188
189 if (canonicalize) {
190 uloc_canonicalize(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
191 } else {
192 uloc_getName(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
193 }
194 localeNameBuf[sizeof(localeNameBuf)-1] = 0; // Force null termination in event of
195 // a long name filling the buffer.
196 // (long names are truncated.)
197 //
198 if (U_FAILURE(status)) {
199 return gDefaultLocale;
200 }
201
202 if (gDefaultLocalesHashT == NULL) {
203 gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
204 if (U_FAILURE(status)) {
205 return gDefaultLocale;
206 }
207 uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
208 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
209 }
210
211 Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf);
212 if (newDefault == NULL) {
213 newDefault = new Locale(Locale::eBOGUS);
214 if (newDefault == NULL) {
215 status = U_MEMORY_ALLOCATION_ERROR;
216 return gDefaultLocale;
217 }
218 newDefault->init(localeNameBuf, FALSE);
219 uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
220 if (U_FAILURE(status)) {
221 return gDefaultLocale;
222 }
223 }
224 gDefaultLocale = newDefault;
225 return gDefaultLocale;
226 }
227
228 U_NAMESPACE_END
229
230 /* sfb 07/21/99 */
231 U_CFUNC void
locale_set_default(const char * id)232 locale_set_default(const char *id)
233 {
234 U_NAMESPACE_USE
235 UErrorCode status = U_ZERO_ERROR;
236 locale_set_default_internal(id, status);
237 }
238 /* end */
239
240 U_CFUNC const char *
locale_get_default(void)241 locale_get_default(void)
242 {
243 U_NAMESPACE_USE
244 return Locale::getDefault().getName();
245 }
246
247
248 U_NAMESPACE_BEGIN
249
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)250 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
251
252 /*Character separating the posix id fields*/
253 // '_'
254 // In the platform codepage.
255 #define SEP_CHAR '_'
256
257 Locale::~Locale()
258 {
259 if (baseName != fullName) {
260 uprv_free(baseName);
261 }
262 baseName = NULL;
263 /*if fullName is on the heap, we free it*/
264 if (fullName != fullNameBuffer)
265 {
266 uprv_free(fullName);
267 fullName = NULL;
268 }
269 }
270
Locale()271 Locale::Locale()
272 : UObject(), fullName(fullNameBuffer), baseName(NULL)
273 {
274 init(NULL, FALSE);
275 }
276
277 /*
278 * Internal constructor to allow construction of a locale object with
279 * NO side effects. (Default constructor tries to get
280 * the default locale.)
281 */
Locale(Locale::ELocaleType)282 Locale::Locale(Locale::ELocaleType)
283 : UObject(), fullName(fullNameBuffer), baseName(NULL)
284 {
285 setToBogus();
286 }
287
288
Locale(const char * newLanguage,const char * newCountry,const char * newVariant,const char * newKeywords)289 Locale::Locale( const char * newLanguage,
290 const char * newCountry,
291 const char * newVariant,
292 const char * newKeywords)
293 : UObject(), fullName(fullNameBuffer), baseName(NULL)
294 {
295 if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
296 {
297 init(NULL, FALSE); /* shortcut */
298 }
299 else
300 {
301 UErrorCode status = U_ZERO_ERROR;
302 int32_t size = 0;
303 int32_t lsize = 0;
304 int32_t csize = 0;
305 int32_t vsize = 0;
306 int32_t ksize = 0;
307
308 // Calculate the size of the resulting string.
309
310 // Language
311 if ( newLanguage != NULL )
312 {
313 lsize = (int32_t)uprv_strlen(newLanguage);
314 if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
315 setToBogus();
316 return;
317 }
318 size = lsize;
319 }
320
321 CharString togo(newLanguage, lsize, status); // start with newLanguage
322
323 // _Country
324 if ( newCountry != NULL )
325 {
326 csize = (int32_t)uprv_strlen(newCountry);
327 if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
328 setToBogus();
329 return;
330 }
331 size += csize;
332 }
333
334 // _Variant
335 if ( newVariant != NULL )
336 {
337 // remove leading _'s
338 while(newVariant[0] == SEP_CHAR)
339 {
340 newVariant++;
341 }
342
343 // remove trailing _'s
344 vsize = (int32_t)uprv_strlen(newVariant);
345 if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
346 setToBogus();
347 return;
348 }
349 while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
350 {
351 vsize--;
352 }
353 }
354
355 if( vsize > 0 )
356 {
357 size += vsize;
358 }
359
360 // Separator rules:
361 if ( vsize > 0 )
362 {
363 size += 2; // at least: __v
364 }
365 else if ( csize > 0 )
366 {
367 size += 1; // at least: _v
368 }
369
370 if ( newKeywords != NULL)
371 {
372 ksize = (int32_t)uprv_strlen(newKeywords);
373 if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
374 setToBogus();
375 return;
376 }
377 size += ksize + 1;
378 }
379
380 // NOW we have the full locale string..
381 // Now, copy it back.
382
383 // newLanguage is already copied
384
385 if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
386 { // ^
387 togo.append(SEP_CHAR, status);
388 }
389
390 if ( csize != 0 )
391 {
392 togo.append(newCountry, status);
393 }
394
395 if ( vsize != 0)
396 {
397 togo.append(SEP_CHAR, status)
398 .append(newVariant, vsize, status);
399 }
400
401 if ( ksize != 0)
402 {
403 if (uprv_strchr(newKeywords, '=')) {
404 togo.append('@', status); /* keyword parsing */
405 }
406 else {
407 togo.append('_', status); /* Variant parsing with a script */
408 if ( vsize == 0) {
409 togo.append('_', status); /* No country found */
410 }
411 }
412 togo.append(newKeywords, status);
413 }
414
415 if (U_FAILURE(status)) {
416 // Something went wrong with appending, etc.
417 setToBogus();
418 return;
419 }
420 // Parse it, because for example 'language' might really be a complete
421 // string.
422 init(togo.data(), FALSE);
423 }
424 }
425
Locale(const Locale & other)426 Locale::Locale(const Locale &other)
427 : UObject(other), fullName(fullNameBuffer), baseName(NULL)
428 {
429 *this = other;
430 }
431
Locale(Locale && other)432 Locale::Locale(Locale&& other) U_NOEXCEPT
433 : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
434 *this = std::move(other);
435 }
436
operator =(const Locale & other)437 Locale& Locale::operator=(const Locale& other) {
438 if (this == &other) {
439 return *this;
440 }
441
442 setToBogus();
443
444 if (other.fullName == other.fullNameBuffer) {
445 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
446 } else if (other.fullName == nullptr) {
447 fullName = nullptr;
448 } else {
449 fullName = uprv_strdup(other.fullName);
450 if (fullName == nullptr) return *this;
451 }
452
453 if (other.baseName == other.fullName) {
454 baseName = fullName;
455 } else if (other.baseName != nullptr) {
456 baseName = uprv_strdup(other.baseName);
457 if (baseName == nullptr) return *this;
458 }
459
460 uprv_strcpy(language, other.language);
461 uprv_strcpy(script, other.script);
462 uprv_strcpy(country, other.country);
463
464 variantBegin = other.variantBegin;
465 fIsBogus = other.fIsBogus;
466
467 return *this;
468 }
469
operator =(Locale && other)470 Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
471 if (baseName != fullName) uprv_free(baseName);
472 if (fullName != fullNameBuffer) uprv_free(fullName);
473
474 if (other.fullName == other.fullNameBuffer) {
475 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
476 fullName = fullNameBuffer;
477 } else {
478 fullName = other.fullName;
479 }
480
481 if (other.baseName == other.fullName) {
482 baseName = fullName;
483 } else {
484 baseName = other.baseName;
485 }
486
487 uprv_strcpy(language, other.language);
488 uprv_strcpy(script, other.script);
489 uprv_strcpy(country, other.country);
490
491 variantBegin = other.variantBegin;
492 fIsBogus = other.fIsBogus;
493
494 other.baseName = other.fullName = other.fullNameBuffer;
495
496 return *this;
497 }
498
499 Locale *
clone() const500 Locale::clone() const {
501 return new Locale(*this);
502 }
503
504 UBool
operator ==(const Locale & other) const505 Locale::operator==( const Locale& other) const
506 {
507 return (uprv_strcmp(other.fullName, fullName) == 0);
508 }
509
510 #define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
511
512 /*This function initializes a Locale from a C locale ID*/
init(const char * localeID,UBool canonicalize)513 Locale& Locale::init(const char* localeID, UBool canonicalize)
514 {
515 fIsBogus = FALSE;
516 /* Free our current storage */
517 if (baseName != fullName) {
518 uprv_free(baseName);
519 }
520 baseName = NULL;
521 if(fullName != fullNameBuffer) {
522 uprv_free(fullName);
523 fullName = fullNameBuffer;
524 }
525
526 // not a loop:
527 // just an easy way to have a common error-exit
528 // without goto and without another function
529 do {
530 char *separator;
531 char *field[5] = {0};
532 int32_t fieldLen[5] = {0};
533 int32_t fieldIdx;
534 int32_t variantField;
535 int32_t length;
536 UErrorCode err;
537
538 if(localeID == NULL) {
539 // not an error, just set the default locale
540 return *this = getDefault();
541 }
542
543 /* preset all fields to empty */
544 language[0] = script[0] = country[0] = 0;
545
546 // "canonicalize" the locale ID to ICU/Java format
547 err = U_ZERO_ERROR;
548 length = canonicalize ?
549 uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
550 uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
551
552 if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
553 /*Go to heap for the fullName if necessary*/
554 fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
555 if(fullName == 0) {
556 fullName = fullNameBuffer;
557 break; // error: out of memory
558 }
559 err = U_ZERO_ERROR;
560 length = canonicalize ?
561 uloc_canonicalize(localeID, fullName, length+1, &err) :
562 uloc_getName(localeID, fullName, length+1, &err);
563 }
564 if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
565 /* should never occur */
566 break;
567 }
568
569 variantBegin = length;
570
571 /* after uloc_getName/canonicalize() we know that only '_' are separators */
572 /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
573 separator = field[0] = fullName;
574 fieldIdx = 1;
575 char* at = uprv_strchr(fullName, '@');
576 while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
577 fieldIdx < UPRV_LENGTHOF(field)-1 &&
578 (at == nullptr || separator < at)) {
579 field[fieldIdx] = separator + 1;
580 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
581 fieldIdx++;
582 }
583 // variant may contain @foo or .foo POSIX cruft; remove it
584 separator = uprv_strchr(field[fieldIdx-1], '@');
585 char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
586 if (separator!=NULL || sep2!=NULL) {
587 if (separator==NULL || (sep2!=NULL && separator > sep2)) {
588 separator = sep2;
589 }
590 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
591 } else {
592 fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
593 }
594
595 if (fieldLen[0] >= (int32_t)(sizeof(language)))
596 {
597 break; // error: the language field is too long
598 }
599
600 variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
601 if (fieldLen[0] > 0) {
602 /* We have a language */
603 uprv_memcpy(language, fullName, fieldLen[0]);
604 language[fieldLen[0]] = 0;
605 }
606 if (fieldLen[1] == 4 && ISASCIIALPHA(field[1][0]) &&
607 ISASCIIALPHA(field[1][1]) && ISASCIIALPHA(field[1][2]) &&
608 ISASCIIALPHA(field[1][3])) {
609 /* We have at least a script */
610 uprv_memcpy(script, field[1], fieldLen[1]);
611 script[fieldLen[1]] = 0;
612 variantField++;
613 }
614
615 if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
616 /* We have a country */
617 uprv_memcpy(country, field[variantField], fieldLen[variantField]);
618 country[fieldLen[variantField]] = 0;
619 variantField++;
620 } else if (fieldLen[variantField] == 0) {
621 variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
622 }
623
624 if (fieldLen[variantField] > 0) {
625 /* We have a variant */
626 variantBegin = (int32_t)(field[variantField] - fullName);
627 }
628
629 err = U_ZERO_ERROR;
630 initBaseName(err);
631 if (U_FAILURE(err)) {
632 break;
633 }
634
635 // successful end of init()
636 return *this;
637 } while(0); /*loop doesn't iterate*/
638
639 // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
640 setToBogus();
641
642 return *this;
643 }
644
645 /*
646 * Set up the base name.
647 * If there are no key words, it's exactly the full name.
648 * If key words exist, it's the full name truncated at the '@' character.
649 * Need to set up both at init() and after setting a keyword.
650 */
651 void
initBaseName(UErrorCode & status)652 Locale::initBaseName(UErrorCode &status) {
653 if (U_FAILURE(status)) {
654 return;
655 }
656 U_ASSERT(baseName==NULL || baseName==fullName);
657 const char *atPtr = uprv_strchr(fullName, '@');
658 const char *eqPtr = uprv_strchr(fullName, '=');
659 if (atPtr && eqPtr && atPtr < eqPtr) {
660 // Key words exist.
661 int32_t baseNameLength = (int32_t)(atPtr - fullName);
662 baseName = (char *)uprv_malloc(baseNameLength + 1);
663 if (baseName == NULL) {
664 status = U_MEMORY_ALLOCATION_ERROR;
665 return;
666 }
667 uprv_strncpy(baseName, fullName, baseNameLength);
668 baseName[baseNameLength] = 0;
669
670 // The original computation of variantBegin leaves it equal to the length
671 // of fullName if there is no variant. It should instead be
672 // the length of the baseName.
673 if (variantBegin > baseNameLength) {
674 variantBegin = baseNameLength;
675 }
676 } else {
677 baseName = fullName;
678 }
679 }
680
681
682 int32_t
hashCode() const683 Locale::hashCode() const
684 {
685 return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
686 }
687
688 void
setToBogus()689 Locale::setToBogus() {
690 /* Free our current storage */
691 if(baseName != fullName) {
692 uprv_free(baseName);
693 }
694 baseName = NULL;
695 if(fullName != fullNameBuffer) {
696 uprv_free(fullName);
697 fullName = fullNameBuffer;
698 }
699 *fullNameBuffer = 0;
700 *language = 0;
701 *script = 0;
702 *country = 0;
703 fIsBogus = TRUE;
704 variantBegin = 0;
705 }
706
707 const Locale& U_EXPORT2
getDefault()708 Locale::getDefault()
709 {
710 {
711 Mutex lock(&gDefaultLocaleMutex);
712 if (gDefaultLocale != NULL) {
713 return *gDefaultLocale;
714 }
715 }
716 UErrorCode status = U_ZERO_ERROR;
717 return *locale_set_default_internal(NULL, status);
718 }
719
720
721
722 void U_EXPORT2
setDefault(const Locale & newLocale,UErrorCode & status)723 Locale::setDefault( const Locale& newLocale,
724 UErrorCode& status)
725 {
726 if (U_FAILURE(status)) {
727 return;
728 }
729
730 /* Set the default from the full name string of the supplied locale.
731 * This is a convenient way to access the default locale caching mechanisms.
732 */
733 const char *localeID = newLocale.getName();
734 locale_set_default_internal(localeID, status);
735 }
736
737 void
addLikelySubtags(UErrorCode & status)738 Locale::addLikelySubtags(UErrorCode& status) {
739 if (U_FAILURE(status)) {
740 return;
741 }
742
743 CharString maximizedLocaleID;
744 {
745 CharStringByteSink sink(&maximizedLocaleID);
746 ulocimp_addLikelySubtags(fullName, sink, &status);
747 }
748
749 if (U_FAILURE(status)) {
750 return;
751 }
752
753 init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
754 if (isBogus()) {
755 status = U_ILLEGAL_ARGUMENT_ERROR;
756 }
757 }
758
759 void
minimizeSubtags(UErrorCode & status)760 Locale::minimizeSubtags(UErrorCode& status) {
761 if (U_FAILURE(status)) {
762 return;
763 }
764
765 CharString minimizedLocaleID;
766 {
767 CharStringByteSink sink(&minimizedLocaleID);
768 ulocimp_minimizeSubtags(fullName, sink, &status);
769 }
770
771 if (U_FAILURE(status)) {
772 return;
773 }
774
775 init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
776 if (isBogus()) {
777 status = U_ILLEGAL_ARGUMENT_ERROR;
778 }
779 }
780
781 Locale U_EXPORT2
forLanguageTag(StringPiece tag,UErrorCode & status)782 Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
783 {
784 Locale result(Locale::eBOGUS);
785
786 if (U_FAILURE(status)) {
787 return result;
788 }
789
790 // If a BCP-47 language tag is passed as the language parameter to the
791 // normal Locale constructor, it will actually fall back to invoking
792 // uloc_forLanguageTag() to parse it if it somehow is able to detect that
793 // the string actually is BCP-47. This works well for things like strings
794 // using BCP-47 extensions, but it does not at all work for things like
795 // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
796 // interpret as ICU locale IDs and because of that won't trigger the BCP-47
797 // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
798 // and then Locale::init(), instead of just calling the normal constructor.
799
800 CharString localeID;
801 int32_t parsedLength;
802 {
803 CharStringByteSink sink(&localeID);
804 ulocimp_forLanguageTag(
805 tag.data(),
806 tag.length(),
807 sink,
808 &parsedLength,
809 &status);
810 }
811
812 if (U_FAILURE(status)) {
813 return result;
814 }
815
816 if (parsedLength != tag.size()) {
817 status = U_ILLEGAL_ARGUMENT_ERROR;
818 return result;
819 }
820
821 result.init(localeID.data(), /*canonicalize=*/FALSE);
822 if (result.isBogus()) {
823 status = U_ILLEGAL_ARGUMENT_ERROR;
824 }
825 return result;
826 }
827
828 void
toLanguageTag(ByteSink & sink,UErrorCode & status) const829 Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
830 {
831 if (U_FAILURE(status)) {
832 return;
833 }
834
835 if (fIsBogus) {
836 status = U_ILLEGAL_ARGUMENT_ERROR;
837 return;
838 }
839
840 ulocimp_toLanguageTag(fullName, sink, /*strict=*/FALSE, &status);
841 }
842
843 Locale U_EXPORT2
createFromName(const char * name)844 Locale::createFromName (const char *name)
845 {
846 if (name) {
847 Locale l("");
848 l.init(name, FALSE);
849 return l;
850 }
851 else {
852 return getDefault();
853 }
854 }
855
856 Locale U_EXPORT2
createCanonical(const char * name)857 Locale::createCanonical(const char* name) {
858 Locale loc("");
859 loc.init(name, TRUE);
860 return loc;
861 }
862
863 const char *
getISO3Language() const864 Locale::getISO3Language() const
865 {
866 return uloc_getISO3Language(fullName);
867 }
868
869
870 const char *
getISO3Country() const871 Locale::getISO3Country() const
872 {
873 return uloc_getISO3Country(fullName);
874 }
875
876 /**
877 * Return the LCID value as specified in the "LocaleID" resource for this
878 * locale. The LocaleID must be expressed as a hexadecimal number, from
879 * one to four digits. If the LocaleID resource is not present, or is
880 * in an incorrect format, 0 is returned. The LocaleID is for use in
881 * Windows (it is an LCID), but is available on all platforms.
882 */
883 uint32_t
getLCID() const884 Locale::getLCID() const
885 {
886 return uloc_getLCID(fullName);
887 }
888
getISOCountries()889 const char* const* U_EXPORT2 Locale::getISOCountries()
890 {
891 return uloc_getISOCountries();
892 }
893
getISOLanguages()894 const char* const* U_EXPORT2 Locale::getISOLanguages()
895 {
896 return uloc_getISOLanguages();
897 }
898
899 // Set the locale's data based on a posix id.
setFromPOSIXID(const char * posixID)900 void Locale::setFromPOSIXID(const char *posixID)
901 {
902 init(posixID, TRUE);
903 }
904
905 const Locale & U_EXPORT2
getRoot(void)906 Locale::getRoot(void)
907 {
908 return getLocale(eROOT);
909 }
910
911 const Locale & U_EXPORT2
getEnglish(void)912 Locale::getEnglish(void)
913 {
914 return getLocale(eENGLISH);
915 }
916
917 const Locale & U_EXPORT2
getFrench(void)918 Locale::getFrench(void)
919 {
920 return getLocale(eFRENCH);
921 }
922
923 const Locale & U_EXPORT2
getGerman(void)924 Locale::getGerman(void)
925 {
926 return getLocale(eGERMAN);
927 }
928
929 const Locale & U_EXPORT2
getItalian(void)930 Locale::getItalian(void)
931 {
932 return getLocale(eITALIAN);
933 }
934
935 const Locale & U_EXPORT2
getJapanese(void)936 Locale::getJapanese(void)
937 {
938 return getLocale(eJAPANESE);
939 }
940
941 const Locale & U_EXPORT2
getKorean(void)942 Locale::getKorean(void)
943 {
944 return getLocale(eKOREAN);
945 }
946
947 const Locale & U_EXPORT2
getChinese(void)948 Locale::getChinese(void)
949 {
950 return getLocale(eCHINESE);
951 }
952
953 const Locale & U_EXPORT2
getSimplifiedChinese(void)954 Locale::getSimplifiedChinese(void)
955 {
956 return getLocale(eCHINA);
957 }
958
959 const Locale & U_EXPORT2
getTraditionalChinese(void)960 Locale::getTraditionalChinese(void)
961 {
962 return getLocale(eTAIWAN);
963 }
964
965
966 const Locale & U_EXPORT2
getFrance(void)967 Locale::getFrance(void)
968 {
969 return getLocale(eFRANCE);
970 }
971
972 const Locale & U_EXPORT2
getGermany(void)973 Locale::getGermany(void)
974 {
975 return getLocale(eGERMANY);
976 }
977
978 const Locale & U_EXPORT2
getItaly(void)979 Locale::getItaly(void)
980 {
981 return getLocale(eITALY);
982 }
983
984 const Locale & U_EXPORT2
getJapan(void)985 Locale::getJapan(void)
986 {
987 return getLocale(eJAPAN);
988 }
989
990 const Locale & U_EXPORT2
getKorea(void)991 Locale::getKorea(void)
992 {
993 return getLocale(eKOREA);
994 }
995
996 const Locale & U_EXPORT2
getChina(void)997 Locale::getChina(void)
998 {
999 return getLocale(eCHINA);
1000 }
1001
1002 const Locale & U_EXPORT2
getPRC(void)1003 Locale::getPRC(void)
1004 {
1005 return getLocale(eCHINA);
1006 }
1007
1008 const Locale & U_EXPORT2
getTaiwan(void)1009 Locale::getTaiwan(void)
1010 {
1011 return getLocale(eTAIWAN);
1012 }
1013
1014 const Locale & U_EXPORT2
getUK(void)1015 Locale::getUK(void)
1016 {
1017 return getLocale(eUK);
1018 }
1019
1020 const Locale & U_EXPORT2
getUS(void)1021 Locale::getUS(void)
1022 {
1023 return getLocale(eUS);
1024 }
1025
1026 const Locale & U_EXPORT2
getCanada(void)1027 Locale::getCanada(void)
1028 {
1029 return getLocale(eCANADA);
1030 }
1031
1032 const Locale & U_EXPORT2
getCanadaFrench(void)1033 Locale::getCanadaFrench(void)
1034 {
1035 return getLocale(eCANADA_FRENCH);
1036 }
1037
1038 const Locale &
getLocale(int locid)1039 Locale::getLocale(int locid)
1040 {
1041 Locale *localeCache = getLocaleCache();
1042 U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
1043 if (localeCache == NULL) {
1044 // Failure allocating the locale cache.
1045 // The best we can do is return a NULL reference.
1046 locid = 0;
1047 }
1048 return localeCache[locid]; /*operating on NULL*/
1049 }
1050
1051 /*
1052 This function is defined this way in order to get around static
1053 initialization and static destruction.
1054 */
1055 Locale *
getLocaleCache(void)1056 Locale::getLocaleCache(void)
1057 {
1058 UErrorCode status = U_ZERO_ERROR;
1059 umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
1060 return gLocaleCache;
1061 }
1062
1063 class KeywordEnumeration : public StringEnumeration {
1064 private:
1065 char *keywords;
1066 char *current;
1067 int32_t length;
1068 UnicodeString currUSKey;
1069 static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
1070
1071 public:
getStaticClassID(void)1072 static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
getDynamicClassID(void) const1073 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
1074 public:
KeywordEnumeration(const char * keys,int32_t keywordLen,int32_t currentIndex,UErrorCode & status)1075 KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
1076 : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
1077 if(U_SUCCESS(status) && keywordLen != 0) {
1078 if(keys == NULL || keywordLen < 0) {
1079 status = U_ILLEGAL_ARGUMENT_ERROR;
1080 } else {
1081 keywords = (char *)uprv_malloc(keywordLen+1);
1082 if (keywords == NULL) {
1083 status = U_MEMORY_ALLOCATION_ERROR;
1084 }
1085 else {
1086 uprv_memcpy(keywords, keys, keywordLen);
1087 keywords[keywordLen] = 0;
1088 current = keywords + currentIndex;
1089 length = keywordLen;
1090 }
1091 }
1092 }
1093 }
1094
1095 virtual ~KeywordEnumeration();
1096
clone() const1097 virtual StringEnumeration * clone() const
1098 {
1099 UErrorCode status = U_ZERO_ERROR;
1100 return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
1101 }
1102
count(UErrorCode &) const1103 virtual int32_t count(UErrorCode &/*status*/) const {
1104 char *kw = keywords;
1105 int32_t result = 0;
1106 while(*kw) {
1107 result++;
1108 kw += uprv_strlen(kw)+1;
1109 }
1110 return result;
1111 }
1112
next(int32_t * resultLength,UErrorCode & status)1113 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
1114 const char* result;
1115 int32_t len;
1116 if(U_SUCCESS(status) && *current != 0) {
1117 result = current;
1118 len = (int32_t)uprv_strlen(current);
1119 current += len+1;
1120 if(resultLength != NULL) {
1121 *resultLength = len;
1122 }
1123 } else {
1124 if(resultLength != NULL) {
1125 *resultLength = 0;
1126 }
1127 result = NULL;
1128 }
1129 return result;
1130 }
1131
snext(UErrorCode & status)1132 virtual const UnicodeString* snext(UErrorCode& status) {
1133 int32_t resultLength = 0;
1134 const char *s = next(&resultLength, status);
1135 return setChars(s, resultLength, status);
1136 }
1137
reset(UErrorCode &)1138 virtual void reset(UErrorCode& /*status*/) {
1139 current = keywords;
1140 }
1141 };
1142
1143 const char KeywordEnumeration::fgClassID = '\0';
1144
~KeywordEnumeration()1145 KeywordEnumeration::~KeywordEnumeration() {
1146 uprv_free(keywords);
1147 }
1148
1149 // A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
1150 // the next() method for each keyword before returning it.
1151 class UnicodeKeywordEnumeration : public KeywordEnumeration {
1152 public:
1153 using KeywordEnumeration::KeywordEnumeration;
1154 virtual ~UnicodeKeywordEnumeration();
1155
next(int32_t * resultLength,UErrorCode & status)1156 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
1157 const char* legacy_key = KeywordEnumeration::next(nullptr, status);
1158 if (U_SUCCESS(status) && legacy_key != nullptr) {
1159 const char* key = uloc_toUnicodeLocaleKey(legacy_key);
1160 if (key == nullptr) {
1161 status = U_ILLEGAL_ARGUMENT_ERROR;
1162 } else {
1163 if (resultLength != nullptr) {
1164 *resultLength = static_cast<int32_t>(uprv_strlen(key));
1165 }
1166 return key;
1167 }
1168 }
1169 if (resultLength != nullptr) *resultLength = 0;
1170 return nullptr;
1171 }
1172 };
1173
1174 // Out-of-line virtual destructor to serve as the "key function".
1175 UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
1176
1177 StringEnumeration *
createKeywords(UErrorCode & status) const1178 Locale::createKeywords(UErrorCode &status) const
1179 {
1180 char keywords[256];
1181 int32_t keywordCapacity = sizeof keywords;
1182 StringEnumeration *result = NULL;
1183
1184 if (U_FAILURE(status)) {
1185 return result;
1186 }
1187
1188 const char* variantStart = uprv_strchr(fullName, '@');
1189 const char* assignment = uprv_strchr(fullName, '=');
1190 if(variantStart) {
1191 if(assignment > variantStart) {
1192 int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
1193 if(U_SUCCESS(status) && keyLen) {
1194 result = new KeywordEnumeration(keywords, keyLen, 0, status);
1195 if (!result) {
1196 status = U_MEMORY_ALLOCATION_ERROR;
1197 }
1198 }
1199 } else {
1200 status = U_INVALID_FORMAT_ERROR;
1201 }
1202 }
1203 return result;
1204 }
1205
1206 StringEnumeration *
createUnicodeKeywords(UErrorCode & status) const1207 Locale::createUnicodeKeywords(UErrorCode &status) const
1208 {
1209 char keywords[256];
1210 int32_t keywordCapacity = sizeof keywords;
1211 StringEnumeration *result = NULL;
1212
1213 if (U_FAILURE(status)) {
1214 return result;
1215 }
1216
1217 const char* variantStart = uprv_strchr(fullName, '@');
1218 const char* assignment = uprv_strchr(fullName, '=');
1219 if(variantStart) {
1220 if(assignment > variantStart) {
1221 int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
1222 if(U_SUCCESS(status) && keyLen) {
1223 result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status);
1224 if (!result) {
1225 status = U_MEMORY_ALLOCATION_ERROR;
1226 }
1227 }
1228 } else {
1229 status = U_INVALID_FORMAT_ERROR;
1230 }
1231 }
1232 return result;
1233 }
1234
1235 int32_t
getKeywordValue(const char * keywordName,char * buffer,int32_t bufLen,UErrorCode & status) const1236 Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
1237 {
1238 return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
1239 }
1240
1241 void
getKeywordValue(StringPiece keywordName,ByteSink & sink,UErrorCode & status) const1242 Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
1243 if (U_FAILURE(status)) {
1244 return;
1245 }
1246
1247 if (fIsBogus) {
1248 status = U_ILLEGAL_ARGUMENT_ERROR;
1249 return;
1250 }
1251
1252 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1253 const CharString keywordName_nul(keywordName, status);
1254 if (U_FAILURE(status)) {
1255 return;
1256 }
1257
1258 LocalMemory<char> scratch;
1259 int32_t scratch_capacity = 16; // Arbitrarily chosen default size.
1260
1261 char* buffer;
1262 int32_t result_capacity, reslen;
1263
1264 for (;;) {
1265 if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
1266 status = U_MEMORY_ALLOCATION_ERROR;
1267 return;
1268 }
1269
1270 buffer = sink.GetAppendBuffer(
1271 /*min_capacity=*/scratch_capacity,
1272 /*desired_capacity_hint=*/scratch_capacity,
1273 scratch.getAlias(),
1274 scratch_capacity,
1275 &result_capacity);
1276
1277 reslen = uloc_getKeywordValue(
1278 fullName,
1279 keywordName_nul.data(),
1280 buffer,
1281 result_capacity,
1282 &status);
1283
1284 if (status != U_BUFFER_OVERFLOW_ERROR) {
1285 break;
1286 }
1287
1288 scratch_capacity = reslen;
1289 status = U_ZERO_ERROR;
1290 }
1291
1292 if (U_FAILURE(status)) {
1293 return;
1294 }
1295
1296 sink.Append(buffer, reslen);
1297 if (status == U_STRING_NOT_TERMINATED_WARNING) {
1298 status = U_ZERO_ERROR; // Terminators not used.
1299 }
1300 }
1301
1302 void
getUnicodeKeywordValue(StringPiece keywordName,ByteSink & sink,UErrorCode & status) const1303 Locale::getUnicodeKeywordValue(StringPiece keywordName,
1304 ByteSink& sink,
1305 UErrorCode& status) const {
1306 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1307 const CharString keywordName_nul(keywordName, status);
1308 if (U_FAILURE(status)) {
1309 return;
1310 }
1311
1312 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
1313
1314 if (legacy_key == nullptr) {
1315 status = U_ILLEGAL_ARGUMENT_ERROR;
1316 return;
1317 }
1318
1319 CharString legacy_value;
1320 {
1321 CharStringByteSink sink(&legacy_value);
1322 getKeywordValue(legacy_key, sink, status);
1323 }
1324
1325 if (U_FAILURE(status)) {
1326 return;
1327 }
1328
1329 const char* unicode_value = uloc_toUnicodeLocaleType(
1330 keywordName_nul.data(), legacy_value.data());
1331
1332 if (unicode_value == nullptr) {
1333 status = U_ILLEGAL_ARGUMENT_ERROR;
1334 return;
1335 }
1336
1337 sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
1338 }
1339
1340 void
setKeywordValue(const char * keywordName,const char * keywordValue,UErrorCode & status)1341 Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
1342 {
1343 if (U_FAILURE(status)) {
1344 return;
1345 }
1346 int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY);
1347 int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
1348 bufferLength, &status) + 1;
1349 /* Handle the case the current buffer is not enough to hold the new id */
1350 if (status == U_BUFFER_OVERFLOW_ERROR) {
1351 U_ASSERT(newLength > bufferLength);
1352 char* newFullName = (char *)uprv_malloc(newLength);
1353 if (newFullName == nullptr) {
1354 status = U_MEMORY_ALLOCATION_ERROR;
1355 return;
1356 }
1357 uprv_strcpy(newFullName, fullName);
1358 if (fullName != fullNameBuffer) {
1359 // if full Name is already on the heap, need to free it.
1360 uprv_free(fullName);
1361 }
1362 fullName = newFullName;
1363 status = U_ZERO_ERROR;
1364 uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
1365 } else {
1366 U_ASSERT(newLength <= bufferLength);
1367 }
1368 if (U_SUCCESS(status) && baseName == fullName) {
1369 // May have added the first keyword, meaning that the fullName is no longer also the baseName.
1370 initBaseName(status);
1371 }
1372 }
1373
1374 void
setKeywordValue(StringPiece keywordName,StringPiece keywordValue,UErrorCode & status)1375 Locale::setKeywordValue(StringPiece keywordName,
1376 StringPiece keywordValue,
1377 UErrorCode& status) {
1378 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1379 const CharString keywordName_nul(keywordName, status);
1380 const CharString keywordValue_nul(keywordValue, status);
1381 setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
1382 }
1383
1384 void
setUnicodeKeywordValue(StringPiece keywordName,StringPiece keywordValue,UErrorCode & status)1385 Locale::setUnicodeKeywordValue(StringPiece keywordName,
1386 StringPiece keywordValue,
1387 UErrorCode& status) {
1388 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1389 const CharString keywordName_nul(keywordName, status);
1390 const CharString keywordValue_nul(keywordValue, status);
1391
1392 if (U_FAILURE(status)) {
1393 return;
1394 }
1395
1396 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
1397
1398 if (legacy_key == nullptr) {
1399 status = U_ILLEGAL_ARGUMENT_ERROR;
1400 return;
1401 }
1402
1403 const char* legacy_value = nullptr;
1404
1405 if (!keywordValue_nul.isEmpty()) {
1406 legacy_value =
1407 uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
1408
1409 if (legacy_value == nullptr) {
1410 status = U_ILLEGAL_ARGUMENT_ERROR;
1411 return;
1412 }
1413 }
1414
1415 setKeywordValue(legacy_key, legacy_value, status);
1416 }
1417
1418 const char *
getBaseName() const1419 Locale::getBaseName() const {
1420 return baseName;
1421 }
1422
1423 Locale::Iterator::~Iterator() = default;
1424
1425 //eof
1426 U_NAMESPACE_END
1427