1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2004-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 10 #ifndef ULOCIMP_H 11 #define ULOCIMP_H 12 13 #include "unicode/bytestream.h" 14 #include "unicode/uloc.h" 15 16 #include "charstr.h" 17 18 /** 19 * Create an iterator over the specified keywords list 20 * @param keywordList double-null terminated list. Will be copied. 21 * @param keywordListSize size in bytes of keywordList 22 * @param status err code 23 * @return enumeration (owned by caller) of the keyword list. 24 * @internal ICU 3.0 25 */ 26 U_CAPI UEnumeration* U_EXPORT2 27 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status); 28 29 /** 30 * Look up a resource bundle table item with fallback on the table level. 31 * This is accessible so it can be called by C++ code. 32 */ 33 U_CAPI const UChar * U_EXPORT2 34 uloc_getTableStringWithFallback( 35 const char *path, 36 const char *locale, 37 const char *tableKey, 38 const char *subTableKey, 39 const char *itemKey, 40 int32_t *pLength, 41 UErrorCode *pErrorCode); 42 43 /*returns true if a is an ID separator false otherwise*/ 44 #define _isIDSeparator(a) (a == '_' || a == '-') 45 46 U_CFUNC const char* 47 uloc_getCurrentCountryID(const char* oldID); 48 49 U_CFUNC const char* 50 uloc_getCurrentLanguageID(const char* oldID); 51 52 U_CFUNC void 53 ulocimp_getKeywords(const char *localeID, 54 char prev, 55 icu::ByteSink& sink, 56 UBool valuesToo, 57 UErrorCode *status); 58 59 icu::CharString U_EXPORT2 60 ulocimp_getLanguage(const char *localeID, 61 const char **pEnd, 62 UErrorCode &status); 63 64 icu::CharString U_EXPORT2 65 ulocimp_getScript(const char *localeID, 66 const char **pEnd, 67 UErrorCode &status); 68 69 icu::CharString U_EXPORT2 70 ulocimp_getCountry(const char *localeID, 71 const char **pEnd, 72 UErrorCode &status); 73 74 U_CAPI void U_EXPORT2 75 ulocimp_getName(const char* localeID, 76 icu::ByteSink& sink, 77 UErrorCode* err); 78 79 U_CAPI void U_EXPORT2 80 ulocimp_getBaseName(const char* localeID, 81 icu::ByteSink& sink, 82 UErrorCode* err); 83 84 U_CAPI void U_EXPORT2 85 ulocimp_canonicalize(const char* localeID, 86 icu::ByteSink& sink, 87 UErrorCode* err); 88 89 U_CAPI void U_EXPORT2 90 ulocimp_getKeywordValue(const char* localeID, 91 const char* keywordName, 92 icu::ByteSink& sink, 93 UErrorCode* status); 94 95 /** 96 * Writes a well-formed language tag for this locale ID. 97 * 98 * **Note**: When `strict` is false, any locale fields which do not satisfy the 99 * BCP47 syntax requirement will be omitted from the result. When `strict` is 100 * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale 101 * fields do not satisfy the BCP47 syntax requirement. 102 * 103 * @param localeID the input locale ID 104 * @param sink the output sink receiving the BCP47 language 105 * tag for this Locale. 106 * @param strict boolean value indicating if the function returns 107 * an error for an ill-formed input locale ID. 108 * @param err error information if receiving the language 109 * tag failed. 110 * @return The length of the BCP47 language tag. 111 * 112 * @internal ICU 64 113 */ 114 U_CAPI void U_EXPORT2 115 ulocimp_toLanguageTag(const char* localeID, 116 icu::ByteSink& sink, 117 UBool strict, 118 UErrorCode* err); 119 120 /** 121 * Returns a locale ID for the specified BCP47 language tag string. 122 * If the specified language tag contains any ill-formed subtags, 123 * the first such subtag and all following subtags are ignored. 124 * <p> 125 * This implements the 'Language-Tag' production of BCP 47, and so 126 * supports legacy language tags (marked as “Type: grandfathered” in BCP 47) 127 * (regular and irregular) as well as private use language tags. 128 * 129 * Private use tags are represented as 'x-whatever', 130 * and legacy tags are converted to their canonical replacements where they exist. 131 * 132 * Note that a few legacy tags have no modern replacement; 133 * these will be converted using the fallback described in 134 * the first paragraph, so some information might be lost. 135 * 136 * @param langtag the input BCP47 language tag. 137 * @param tagLen the length of langtag, or -1 to call uprv_strlen(). 138 * @param sink the output sink receiving a locale ID for the 139 * specified BCP47 language tag. 140 * @param parsedLength if not NULL, successfully parsed length 141 * for the input language tag is set. 142 * @param err error information if receiving the locald ID 143 * failed. 144 * @internal ICU 63 145 */ 146 U_CAPI void U_EXPORT2 147 ulocimp_forLanguageTag(const char* langtag, 148 int32_t tagLen, 149 icu::ByteSink& sink, 150 int32_t* parsedLength, 151 UErrorCode* err); 152 153 /** 154 * Get the region to use for supplemental data lookup. Uses 155 * (1) any region specified by locale tag "rg"; if none then 156 * (2) any unicode_region_tag in the locale ID; if none then 157 * (3) if inferRegion is true, the region suggested by 158 * getLikelySubtags on the localeID. 159 * If no region is found, returns length 0. 160 * 161 * @param localeID 162 * The complete locale ID (with keywords) from which 163 * to get the region to use for supplemental data. 164 * @param inferRegion 165 * If true, will try to infer region from localeID if 166 * no other region is found. 167 * @param region 168 * Buffer in which to put the region ID found; should 169 * have a capacity at least ULOC_COUNTRY_CAPACITY. 170 * @param regionCapacity 171 * The actual capacity of the region buffer. 172 * @param status 173 * Pointer to in/out UErrorCode value for latest status. 174 * @return 175 * The length of any region code found, or 0 if none. 176 * @internal ICU 57 177 */ 178 U_CAPI int32_t U_EXPORT2 179 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, 180 char *region, int32_t regionCapacity, UErrorCode* status); 181 182 /** 183 * Add the likely subtags for a provided locale ID, per the algorithm described 184 * in the following CLDR technical report: 185 * 186 * http://www.unicode.org/reports/tr35/#Likely_Subtags 187 * 188 * If localeID is already in the maximal form, or there is no data available 189 * for maximization, it will be copied to the output buffer. For example, 190 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. 191 * 192 * Examples: 193 * 194 * "en" maximizes to "en_Latn_US" 195 * 196 * "de" maximizes to "de_Latn_US" 197 * 198 * "sr" maximizes to "sr_Cyrl_RS" 199 * 200 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) 201 * 202 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) 203 * 204 * @param localeID The locale to maximize 205 * @param sink The output sink receiving the maximized locale 206 * @param err Error information if maximizing the locale failed. If the length 207 * of the localeID and the null-terminator is greater than the maximum allowed size, 208 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. 209 * @internal ICU 64 210 */ 211 U_CAPI void U_EXPORT2 212 ulocimp_addLikelySubtags(const char* localeID, 213 icu::ByteSink& sink, 214 UErrorCode* err); 215 216 /** 217 * Minimize the subtags for a provided locale ID, per the algorithm described 218 * in the following CLDR technical report: 219 * 220 * http://www.unicode.org/reports/tr35/#Likely_Subtags 221 * 222 * If localeID is already in the minimal form, or there is no data available 223 * for minimization, it will be copied to the output buffer. Since the 224 * minimization algorithm relies on proper maximization, see the comments 225 * for ulocimp_addLikelySubtags for reasons why there might not be any data. 226 * 227 * Examples: 228 * 229 * "en_Latn_US" minimizes to "en" 230 * 231 * "de_Latn_US" minimizes to "de" 232 * 233 * "sr_Cyrl_RS" minimizes to "sr" 234 * 235 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the 236 * script, and minimizing to "zh" would imply "zh_Hans_CN".) 237 * 238 * @param localeID The locale to minimize 239 * @param sink The output sink receiving the maximized locale 240 * @param err Error information if minimizing the locale failed. If the length 241 * of the localeID and the null-terminator is greater than the maximum allowed size, 242 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. 243 * @internal ICU 64 244 */ 245 U_CAPI void U_EXPORT2 246 ulocimp_minimizeSubtags(const char* localeID, 247 icu::ByteSink& sink, 248 UErrorCode* err); 249 250 U_CAPI const char * U_EXPORT2 251 locale_getKeywordsStart(const char *localeID); 252 253 U_CFUNC UBool 254 ultag_isExtensionSubtags(const char* s, int32_t len); 255 256 U_CFUNC UBool 257 ultag_isLanguageSubtag(const char* s, int32_t len); 258 259 U_CFUNC UBool 260 ultag_isPrivateuseValueSubtags(const char* s, int32_t len); 261 262 U_CFUNC UBool 263 ultag_isRegionSubtag(const char* s, int32_t len); 264 265 U_CFUNC UBool 266 ultag_isScriptSubtag(const char* s, int32_t len); 267 268 U_CFUNC UBool 269 ultag_isTransformedExtensionSubtags(const char* s, int32_t len); 270 271 U_CFUNC UBool 272 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len); 273 274 U_CFUNC UBool 275 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len); 276 277 U_CFUNC UBool 278 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len); 279 280 U_CFUNC UBool 281 ultag_isUnicodeLocaleKey(const char* s, int32_t len); 282 283 U_CFUNC UBool 284 ultag_isUnicodeLocaleType(const char* s, int32_t len); 285 286 U_CFUNC UBool 287 ultag_isVariantSubtags(const char* s, int32_t len); 288 289 U_CAPI const char * U_EXPORT2 290 ultag_getTKeyStart(const char *localeID); 291 292 U_CFUNC const char* 293 ulocimp_toBcpKey(const char* key); 294 295 U_CFUNC const char* 296 ulocimp_toLegacyKey(const char* key); 297 298 U_CFUNC const char* 299 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); 300 301 U_CFUNC const char* 302 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); 303 304 /* Function for testing purpose */ 305 U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length); 306 307 // Return true if the value is already canonicalized. 308 U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName); 309 310 /** 311 * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY. 312 * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack, 313 * and then, if it's not big enough, reallocate it on the heap and try again. 314 * 315 * You use it like this: 316 * UErrorCode err = U_ZERO_ERROR; 317 * 318 * PreflightingLocaleIDBuffer tempBuffer; 319 * do { 320 * tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err); 321 * } while (tempBuffer.needToTryAgain(&err)); 322 * if (U_SUCCESS(err)) { 323 * uloc_doSomethingWithTheResult(tempBuffer.getBuffer()); 324 * } 325 */ 326 class PreflightingLocaleIDBuffer { 327 private: 328 char stackBuffer[ULOC_FULLNAME_CAPACITY]; 329 char* heapBuffer = nullptr; 330 int32_t capacity = ULOC_FULLNAME_CAPACITY; 331 332 public: 333 int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY; 334 335 // No heap allocation. Use only on the stack. 336 static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete; 337 static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete; 338 #if U_HAVE_PLACEMENT_NEW 339 static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete; 340 #endif 341 PreflightingLocaleIDBuffer()342 PreflightingLocaleIDBuffer() {} 343 ~PreflightingLocaleIDBuffer()344 ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); } 345 getBuffer()346 char* getBuffer() { 347 if (heapBuffer == nullptr) { 348 return stackBuffer; 349 } else { 350 return heapBuffer; 351 } 352 } 353 getCapacity()354 int32_t getCapacity() { 355 return capacity; 356 } 357 needToTryAgain(UErrorCode * err)358 bool needToTryAgain(UErrorCode* err) { 359 if (heapBuffer != nullptr) { 360 return false; 361 } 362 363 if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) { 364 int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia 365 heapBuffer = static_cast<char*>(uprv_malloc(newCapacity)); 366 if (heapBuffer == nullptr) { 367 *err = U_MEMORY_ALLOCATION_ERROR; 368 } else { 369 *err = U_ZERO_ERROR; 370 capacity = newCapacity; 371 } 372 return U_SUCCESS(*err); 373 } 374 return false; 375 } 376 }; 377 378 #endif 379