• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2004-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 
10 #ifndef ULOCIMP_H
11 #define ULOCIMP_H
12 
13 #include <cstddef>
14 #include <optional>
15 #include <string_view>
16 
17 #include "unicode/bytestream.h"
18 #include "unicode/uloc.h"
19 
20 #include "charstr.h"
21 
22 /**
23  * Create an iterator over the specified keywords list
24  * @param keywordList double-null terminated list. Will be copied.
25  * @param keywordListSize size in bytes of keywordList
26  * @param status err code
27  * @return enumeration (owned by caller) of the keyword list.
28  * @internal ICU 3.0
29  */
30 U_CAPI UEnumeration* U_EXPORT2
31 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
32 
33 /**
34  * Look up a resource bundle table item with fallback on the table level.
35  * This is accessible so it can be called by C++ code.
36  */
37 U_CAPI const UChar * U_EXPORT2
38 uloc_getTableStringWithFallback(
39     const char *path,
40     const char *locale,
41     const char *tableKey,
42     const char *subTableKey,
43     const char *itemKey,
44     int32_t *pLength,
45     UErrorCode *pErrorCode);
46 
47 namespace {
48 /*returns true if a is an ID separator false otherwise*/
_isIDSeparator(char a)49 inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; }
50 }  // namespace
51 
52 U_CFUNC const char*
53 uloc_getCurrentCountryID(const char* oldID);
54 
55 U_CFUNC const char*
56 uloc_getCurrentLanguageID(const char* oldID);
57 
58 U_EXPORT std::optional<std::string_view>
59 ulocimp_toBcpKeyWithFallback(std::string_view keyword);
60 
61 U_EXPORT std::optional<std::string_view>
62 ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value);
63 
64 U_EXPORT std::optional<std::string_view>
65 ulocimp_toLegacyKeyWithFallback(std::string_view keyword);
66 
67 U_EXPORT std::optional<std::string_view>
68 ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value);
69 
70 U_EXPORT icu::CharString
71 ulocimp_getKeywords(const char* localeID,
72                     char prev,
73                     bool valuesToo,
74                     UErrorCode& status);
75 
76 U_EXPORT void
77 ulocimp_getKeywords(const char* localeID,
78                     char prev,
79                     icu::ByteSink& sink,
80                     bool valuesToo,
81                     UErrorCode& status);
82 
83 U_EXPORT icu::CharString
84 ulocimp_getName(const char* localeID,
85                 UErrorCode& err);
86 
87 U_EXPORT void
88 ulocimp_getName(const char* localeID,
89                 icu::ByteSink& sink,
90                 UErrorCode& err);
91 
92 U_EXPORT icu::CharString
93 ulocimp_getBaseName(const char* localeID,
94                     UErrorCode& err);
95 
96 U_EXPORT void
97 ulocimp_getBaseName(const char* localeID,
98                     icu::ByteSink& sink,
99                     UErrorCode& err);
100 
101 U_EXPORT icu::CharString
102 ulocimp_canonicalize(const char* localeID,
103                      UErrorCode& err);
104 
105 U_EXPORT void
106 ulocimp_canonicalize(const char* localeID,
107                      icu::ByteSink& sink,
108                      UErrorCode& err);
109 
110 U_EXPORT icu::CharString
111 ulocimp_getKeywordValue(const char* localeID,
112                         std::string_view keywordName,
113                         UErrorCode& status);
114 
115 U_EXPORT void
116 ulocimp_getKeywordValue(const char* localeID,
117                         std::string_view keywordName,
118                         icu::ByteSink& sink,
119                         UErrorCode& status);
120 
121 U_EXPORT icu::CharString
122 ulocimp_getLanguage(const char* localeID, UErrorCode& status);
123 
124 U_EXPORT icu::CharString
125 ulocimp_getScript(const char* localeID, UErrorCode& status);
126 
127 U_EXPORT icu::CharString
128 ulocimp_getRegion(const char* localeID, UErrorCode& status);
129 
130 U_EXPORT icu::CharString
131 ulocimp_getVariant(const char* localeID, UErrorCode& status);
132 
133 U_EXPORT void
134 ulocimp_setKeywordValue(std::string_view keywordName,
135                         std::string_view keywordValue,
136                         icu::CharString& localeID,
137                         UErrorCode& status);
138 
139 U_EXPORT int32_t
140 ulocimp_setKeywordValue(std::string_view keywords,
141                         std::string_view keywordName,
142                         std::string_view keywordValue,
143                         icu::ByteSink& sink,
144                         UErrorCode& status);
145 
146 U_EXPORT void
147 ulocimp_getSubtags(
148         const char* localeID,
149         icu::CharString* language,
150         icu::CharString* script,
151         icu::CharString* region,
152         icu::CharString* variant,
153         const char** pEnd,
154         UErrorCode& status);
155 
156 U_EXPORT void
157 ulocimp_getSubtags(
158         const char* localeID,
159         icu::ByteSink* language,
160         icu::ByteSink* script,
161         icu::ByteSink* region,
162         icu::ByteSink* variant,
163         const char** pEnd,
164         UErrorCode& status);
165 
166 inline void
ulocimp_getSubtags(const char * localeID,std::nullptr_t,std::nullptr_t,std::nullptr_t,std::nullptr_t,const char ** pEnd,UErrorCode & status)167 ulocimp_getSubtags(
168         const char* localeID,
169         std::nullptr_t,
170         std::nullptr_t,
171         std::nullptr_t,
172         std::nullptr_t,
173         const char** pEnd,
174         UErrorCode& status) {
175     ulocimp_getSubtags(
176             localeID,
177             static_cast<icu::ByteSink*>(nullptr),
178             static_cast<icu::ByteSink*>(nullptr),
179             static_cast<icu::ByteSink*>(nullptr),
180             static_cast<icu::ByteSink*>(nullptr),
181             pEnd,
182             status);
183 }
184 
185 U_EXPORT icu::CharString
186 ulocimp_getParent(const char* localeID,
187                   UErrorCode& err);
188 
189 U_EXPORT void
190 ulocimp_getParent(const char* localeID,
191                   icu::ByteSink& sink,
192                   UErrorCode& err);
193 
194 U_EXPORT icu::CharString
195 ulocimp_toLanguageTag(const char* localeID,
196                       bool strict,
197                       UErrorCode& status);
198 
199 /**
200  * Writes a well-formed language tag for this locale ID.
201  *
202  * **Note**: When `strict` is false, any locale fields which do not satisfy the
203  * BCP47 syntax requirement will be omitted from the result.  When `strict` is
204  * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
205  * fields do not satisfy the BCP47 syntax requirement.
206  *
207  * @param localeID  the input locale ID
208  * @param sink      the output sink receiving the BCP47 language
209  *                  tag for this Locale.
210  * @param strict    boolean value indicating if the function returns
211  *                  an error for an ill-formed input locale ID.
212  * @param err       error information if receiving the language
213  *                  tag failed.
214  * @return          The length of the BCP47 language tag.
215  *
216  * @internal ICU 64
217  */
218 U_EXPORT void
219 ulocimp_toLanguageTag(const char* localeID,
220                       icu::ByteSink& sink,
221                       bool strict,
222                       UErrorCode& err);
223 
224 U_EXPORT icu::CharString
225 ulocimp_forLanguageTag(const char* langtag,
226                        int32_t tagLen,
227                        int32_t* parsedLength,
228                        UErrorCode& status);
229 
230 /**
231  * Returns a locale ID for the specified BCP47 language tag string.
232  * If the specified language tag contains any ill-formed subtags,
233  * the first such subtag and all following subtags are ignored.
234  * <p>
235  * This implements the 'Language-Tag' production of BCP 47, and so
236  * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
237  * (regular and irregular) as well as private use language tags.
238  *
239  * Private use tags are represented as 'x-whatever',
240  * and legacy tags are converted to their canonical replacements where they exist.
241  *
242  * Note that a few legacy tags have no modern replacement;
243  * these will be converted using the fallback described in
244  * the first paragraph, so some information might be lost.
245  *
246  * @param langtag   the input BCP47 language tag.
247  * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
248  * @param sink      the output sink receiving a locale ID for the
249  *                  specified BCP47 language tag.
250  * @param parsedLength  if not NULL, successfully parsed length
251  *                      for the input language tag is set.
252  * @param err       error information if receiving the locald ID
253  *                  failed.
254  * @internal ICU 63
255  */
256 U_EXPORT void
257 ulocimp_forLanguageTag(const char* langtag,
258                        int32_t tagLen,
259                        icu::ByteSink& sink,
260                        int32_t* parsedLength,
261                        UErrorCode& err);
262 
263 /**
264  * Get the region to use for supplemental data lookup. Uses
265  * (1) any region specified by locale tag "rg"; if none then
266  * (2) any unicode_region_tag in the locale ID; if none then
267  * (3) if inferRegion is true, the region suggested by
268  * getLikelySubtags on the localeID.
269  * If no region is found, returns an empty string.
270  *
271  * @param localeID
272  *     The complete locale ID (with keywords) from which
273  *     to get the region to use for supplemental data.
274  * @param inferRegion
275  *     If true, will try to infer region from localeID if
276  *     no other region is found.
277  * @param status
278  *     Pointer to in/out UErrorCode value for latest status.
279  * @return
280  *     The region code found, empty if none found.
281  * @internal ICU 57
282  */
283 U_EXPORT icu::CharString
284 ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
285                                      UErrorCode& status);
286 
287 U_EXPORT icu::CharString
288 ulocimp_addLikelySubtags(const char* localeID,
289                          UErrorCode& status);
290 
291 /**
292  * Add the likely subtags for a provided locale ID, per the algorithm described
293  * in the following CLDR technical report:
294  *
295  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
296  *
297  * If localeID is already in the maximal form, or there is no data available
298  * for maximization, it will be copied to the output buffer.  For example,
299  * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
300  *
301  * Examples:
302  *
303  * "en" maximizes to "en_Latn_US"
304  *
305  * "de" maximizes to "de_Latn_US"
306  *
307  * "sr" maximizes to "sr_Cyrl_RS"
308  *
309  * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
310  *
311  * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
312  *
313  * @param localeID The locale to maximize
314  * @param sink The output sink receiving the maximized locale
315  * @param err Error information if maximizing the locale failed.  If the length
316  * of the localeID and the null-terminator is greater than the maximum allowed size,
317  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
318  * @internal ICU 64
319  */
320 U_EXPORT void
321 ulocimp_addLikelySubtags(const char* localeID,
322                          icu::ByteSink& sink,
323                          UErrorCode& err);
324 
325 U_EXPORT icu::CharString
326 ulocimp_minimizeSubtags(const char* localeID,
327                         bool favorScript,
328                         UErrorCode& status);
329 
330 /**
331  * Minimize the subtags for a provided locale ID, per the algorithm described
332  * in the following CLDR technical report:
333  *
334  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
335  *
336  * If localeID is already in the minimal form, or there is no data available
337  * for minimization, it will be copied to the output buffer.  Since the
338  * minimization algorithm relies on proper maximization, see the comments
339  * for ulocimp_addLikelySubtags for reasons why there might not be any data.
340  *
341  * Examples:
342  *
343  * "en_Latn_US" minimizes to "en"
344  *
345  * "de_Latn_US" minimizes to "de"
346  *
347  * "sr_Cyrl_RS" minimizes to "sr"
348  *
349  * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
350  * script, and minimizing to "zh" would imply "zh_Hans_CN".)
351  *
352  * @param localeID The locale to minimize
353  * @param sink The output sink receiving the maximized locale
354  * @param favorScript favor to keep script if true, region if false.
355  * @param err Error information if minimizing the locale failed.  If the length
356  * of the localeID and the null-terminator is greater than the maximum allowed size,
357  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
358  * @internal ICU 64
359  */
360 U_EXPORT void
361 ulocimp_minimizeSubtags(const char* localeID,
362                         icu::ByteSink& sink,
363                         bool favorScript,
364                         UErrorCode& err);
365 
366 U_CAPI const char * U_EXPORT2
367 locale_getKeywordsStart(const char *localeID);
368 
369 bool
370 ultag_isExtensionSubtags(const char* s, int32_t len);
371 
372 bool
373 ultag_isLanguageSubtag(const char* s, int32_t len);
374 
375 bool
376 ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
377 
378 bool
379 ultag_isRegionSubtag(const char* s, int32_t len);
380 
381 bool
382 ultag_isScriptSubtag(const char* s, int32_t len);
383 
384 bool
385 ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
386 
387 bool
388 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
389 
390 bool
391 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
392 
393 bool
394 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
395 
396 bool
397 ultag_isUnicodeLocaleKey(const char* s, int32_t len);
398 
399 bool
400 ultag_isUnicodeLocaleType(const char* s, int32_t len);
401 
402 bool
403 ultag_isVariantSubtags(const char* s, int32_t len);
404 
405 const char*
406 ultag_getTKeyStart(const char* localeID);
407 
408 U_EXPORT std::optional<std::string_view>
409 ulocimp_toBcpKey(std::string_view key);
410 
411 U_EXPORT std::optional<std::string_view>
412 ulocimp_toLegacyKey(std::string_view key);
413 
414 U_EXPORT std::optional<std::string_view>
415 ulocimp_toBcpType(std::string_view key, std::string_view type);
416 
417 U_EXPORT std::optional<std::string_view>
418 ulocimp_toLegacyType(std::string_view key, std::string_view type);
419 
420 /* Function for testing purpose */
421 U_EXPORT const char* const*
422 ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length);
423 
424 // Return true if the value is already canonicalized.
425 U_EXPORT bool
426 ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
427 
428 #ifdef __cplusplus
429 U_NAMESPACE_BEGIN
430 class U_COMMON_API RegionValidateMap : public UObject {
431  public:
432   RegionValidateMap();
433   virtual ~RegionValidateMap();
434   bool isSet(const char* region) const;
435   bool equals(const RegionValidateMap& that) const;
436  protected:
437   int32_t value(const char* region) const;
438   uint32_t map[22]; // 26x26/32 = 22;
439 };
440 U_NAMESPACE_END
441 #endif /* __cplusplus */
442 
443 #endif
444