• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2004-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 
10 #ifndef ULOCIMP_H
11 #define ULOCIMP_H
12 
13 #include "unicode/bytestream.h"
14 #include "unicode/uloc.h"
15 
16 #include "charstr.h"
17 
18 /**
19  * Create an iterator over the specified keywords list
20  * @param keywordList double-null terminated list. Will be copied.
21  * @param keywordListSize size in bytes of keywordList
22  * @param status err code
23  * @return enumeration (owned by caller) of the keyword list.
24  * @internal ICU 3.0
25  */
26 U_CAPI UEnumeration* U_EXPORT2
27 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
28 
29 /**
30  * Look up a resource bundle table item with fallback on the table level.
31  * This is accessible so it can be called by C++ code.
32  */
33 U_CAPI const UChar * U_EXPORT2
34 uloc_getTableStringWithFallback(
35     const char *path,
36     const char *locale,
37     const char *tableKey,
38     const char *subTableKey,
39     const char *itemKey,
40     int32_t *pLength,
41     UErrorCode *pErrorCode);
42 
43 /*returns true if a is an ID separator false otherwise*/
44 #define _isIDSeparator(a) (a == '_' || a == '-')
45 
46 U_CFUNC const char*
47 uloc_getCurrentCountryID(const char* oldID);
48 
49 U_CFUNC const char*
50 uloc_getCurrentLanguageID(const char* oldID);
51 
52 U_CFUNC void
53 ulocimp_getKeywords(const char *localeID,
54              char prev,
55              icu::ByteSink& sink,
56              UBool valuesToo,
57              UErrorCode *status);
58 
59 icu::CharString U_EXPORT2
60 ulocimp_getLanguage(const char *localeID,
61                     const char **pEnd,
62                     UErrorCode &status);
63 
64 icu::CharString U_EXPORT2
65 ulocimp_getScript(const char *localeID,
66                   const char **pEnd,
67                   UErrorCode &status);
68 
69 icu::CharString U_EXPORT2
70 ulocimp_getCountry(const char *localeID,
71                    const char **pEnd,
72                    UErrorCode &status);
73 
74 U_CAPI void U_EXPORT2
75 ulocimp_getName(const char* localeID,
76                 icu::ByteSink& sink,
77                 UErrorCode* err);
78 
79 U_CAPI void U_EXPORT2
80 ulocimp_getBaseName(const char* localeID,
81                     icu::ByteSink& sink,
82                     UErrorCode* err);
83 
84 U_CAPI void U_EXPORT2
85 ulocimp_canonicalize(const char* localeID,
86                      icu::ByteSink& sink,
87                      UErrorCode* err);
88 
89 U_CAPI void U_EXPORT2
90 ulocimp_getKeywordValue(const char* localeID,
91                         const char* keywordName,
92                         icu::ByteSink& sink,
93                         UErrorCode* status);
94 
95 U_CAPI void U_EXPORT2
96 ulocimp_getParent(const char* localeID,
97                   icu::ByteSink& sink,
98                   UErrorCode* err);
99 
100 /**
101  * Writes a well-formed language tag for this locale ID.
102  *
103  * **Note**: When `strict` is false, any locale fields which do not satisfy the
104  * BCP47 syntax requirement will be omitted from the result.  When `strict` is
105  * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
106  * fields do not satisfy the BCP47 syntax requirement.
107  *
108  * @param localeID  the input locale ID
109  * @param sink      the output sink receiving the BCP47 language
110  *                  tag for this Locale.
111  * @param strict    boolean value indicating if the function returns
112  *                  an error for an ill-formed input locale ID.
113  * @param err       error information if receiving the language
114  *                  tag failed.
115  * @return          The length of the BCP47 language tag.
116  *
117  * @internal ICU 64
118  */
119 U_CAPI void U_EXPORT2
120 ulocimp_toLanguageTag(const char* localeID,
121                       icu::ByteSink& sink,
122                       UBool strict,
123                       UErrorCode* err);
124 
125 /**
126  * Returns a locale ID for the specified BCP47 language tag string.
127  * If the specified language tag contains any ill-formed subtags,
128  * the first such subtag and all following subtags are ignored.
129  * <p>
130  * This implements the 'Language-Tag' production of BCP 47, and so
131  * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
132  * (regular and irregular) as well as private use language tags.
133  *
134  * Private use tags are represented as 'x-whatever',
135  * and legacy tags are converted to their canonical replacements where they exist.
136  *
137  * Note that a few legacy tags have no modern replacement;
138  * these will be converted using the fallback described in
139  * the first paragraph, so some information might be lost.
140  *
141  * @param langtag   the input BCP47 language tag.
142  * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
143  * @param sink      the output sink receiving a locale ID for the
144  *                  specified BCP47 language tag.
145  * @param parsedLength  if not NULL, successfully parsed length
146  *                      for the input language tag is set.
147  * @param err       error information if receiving the locald ID
148  *                  failed.
149  * @internal ICU 63
150  */
151 U_CAPI void U_EXPORT2
152 ulocimp_forLanguageTag(const char* langtag,
153                        int32_t tagLen,
154                        icu::ByteSink& sink,
155                        int32_t* parsedLength,
156                        UErrorCode* err);
157 
158 /**
159  * Get the region to use for supplemental data lookup. Uses
160  * (1) any region specified by locale tag "rg"; if none then
161  * (2) any unicode_region_tag in the locale ID; if none then
162  * (3) if inferRegion is true, the region suggested by
163  * getLikelySubtags on the localeID.
164  * If no region is found, returns length 0.
165  *
166  * @param localeID
167  *     The complete locale ID (with keywords) from which
168  *     to get the region to use for supplemental data.
169  * @param inferRegion
170  *     If true, will try to infer region from localeID if
171  *     no other region is found.
172  * @param region
173  *     Buffer in which to put the region ID found; should
174  *     have a capacity at least ULOC_COUNTRY_CAPACITY.
175  * @param regionCapacity
176  *     The actual capacity of the region buffer.
177  * @param status
178  *     Pointer to in/out UErrorCode value for latest status.
179  * @return
180  *     The length of any region code found, or 0 if none.
181  * @internal ICU 57
182  */
183 U_CAPI int32_t U_EXPORT2
184 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
185                                      char *region, int32_t regionCapacity, UErrorCode* status);
186 
187 /**
188  * Add the likely subtags for a provided locale ID, per the algorithm described
189  * in the following CLDR technical report:
190  *
191  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
192  *
193  * If localeID is already in the maximal form, or there is no data available
194  * for maximization, it will be copied to the output buffer.  For example,
195  * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
196  *
197  * Examples:
198  *
199  * "en" maximizes to "en_Latn_US"
200  *
201  * "de" maximizes to "de_Latn_US"
202  *
203  * "sr" maximizes to "sr_Cyrl_RS"
204  *
205  * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
206  *
207  * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
208  *
209  * @param localeID The locale to maximize
210  * @param sink The output sink receiving the maximized locale
211  * @param err Error information if maximizing the locale failed.  If the length
212  * of the localeID and the null-terminator is greater than the maximum allowed size,
213  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
214  * @internal ICU 64
215  */
216 U_CAPI void U_EXPORT2
217 ulocimp_addLikelySubtags(const char* localeID,
218                          icu::ByteSink& sink,
219                          UErrorCode* err);
220 
221 /**
222  * Minimize the subtags for a provided locale ID, per the algorithm described
223  * in the following CLDR technical report:
224  *
225  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
226  *
227  * If localeID is already in the minimal form, or there is no data available
228  * for minimization, it will be copied to the output buffer.  Since the
229  * minimization algorithm relies on proper maximization, see the comments
230  * for ulocimp_addLikelySubtags for reasons why there might not be any data.
231  *
232  * Examples:
233  *
234  * "en_Latn_US" minimizes to "en"
235  *
236  * "de_Latn_US" minimizes to "de"
237  *
238  * "sr_Cyrl_RS" minimizes to "sr"
239  *
240  * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
241  * script, and minimizing to "zh" would imply "zh_Hans_CN".)
242  *
243  * @param localeID The locale to minimize
244  * @param sink The output sink receiving the maximized locale
245  * @param favorScript favor to keep script if true, region if false.
246  * @param err Error information if minimizing the locale failed.  If the length
247  * of the localeID and the null-terminator is greater than the maximum allowed size,
248  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
249  * @internal ICU 64
250  */
251 U_CAPI void U_EXPORT2
252 ulocimp_minimizeSubtags(const char* localeID,
253                         icu::ByteSink& sink,
254                         bool favorScript,
255                         UErrorCode* err);
256 
257 U_CAPI const char * U_EXPORT2
258 locale_getKeywordsStart(const char *localeID);
259 
260 U_CFUNC UBool
261 ultag_isExtensionSubtags(const char* s, int32_t len);
262 
263 U_CFUNC UBool
264 ultag_isLanguageSubtag(const char* s, int32_t len);
265 
266 U_CFUNC UBool
267 ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
268 
269 U_CFUNC UBool
270 ultag_isRegionSubtag(const char* s, int32_t len);
271 
272 U_CFUNC UBool
273 ultag_isScriptSubtag(const char* s, int32_t len);
274 
275 U_CFUNC UBool
276 ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
277 
278 U_CFUNC UBool
279 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
280 
281 U_CFUNC UBool
282 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
283 
284 U_CFUNC UBool
285 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
286 
287 U_CFUNC UBool
288 ultag_isUnicodeLocaleKey(const char* s, int32_t len);
289 
290 U_CFUNC UBool
291 ultag_isUnicodeLocaleType(const char* s, int32_t len);
292 
293 U_CFUNC UBool
294 ultag_isVariantSubtags(const char* s, int32_t len);
295 
296 U_CAPI const char * U_EXPORT2
297 ultag_getTKeyStart(const char *localeID);
298 
299 U_CFUNC const char*
300 ulocimp_toBcpKey(const char* key);
301 
302 U_CFUNC const char*
303 ulocimp_toLegacyKey(const char* key);
304 
305 U_CFUNC const char*
306 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
307 
308 U_CFUNC const char*
309 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
310 
311 /* Function for testing purpose */
312 U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
313 
314 // Return true if the value is already canonicalized.
315 U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
316 
317 #endif
318