1 /*
2 **********************************************************************
3 * Copyright (C) 1997-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File ULOC.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
22
23 /*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29 */
30
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
34
35 #include "putilimp.h"
36 #include "ustr_imp.h"
37 #include "ulocimp.h"
38 #include "umutex.h"
39 #include "cstring.h"
40 #include "cmemory.h"
41 #include "ucln_cmn.h"
42 #include "locmap.h"
43 #include "uarrsort.h"
44 #include "uenumimp.h"
45 #include "uassert.h"
46
47 #include <stdio.h> /* for sprintf */
48
49 /* ### Declarations **************************************************/
50
51 /* Locale stuff from locid.cpp */
52 U_CFUNC void locale_set_default(const char *id);
53 U_CFUNC const char *locale_get_default(void);
54 U_CFUNC int32_t
55 locale_getKeywords(const char *localeID,
56 char prev,
57 char *keywords, int32_t keywordCapacity,
58 char *values, int32_t valuesCapacity, int32_t *valLen,
59 UBool valuesToo,
60 UErrorCode *status);
61
62 /* ### Data tables **************************************************/
63
64 /**
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible. Includes 3-letter codes
67 * that lack a 2-letter equivalent.
68 *
69 * This list must be in sorted order. This list is returned directly
70 * to the user by some API.
71 *
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
73 * entries matched.
74 *
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry. The first list is visible to
77 * user code when this array is returned by API. The second list
78 * contains codes we support, but do not expose through user API.
79 *
80 * Notes
81 *
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
84 *
85 * The 3 character codes are the terminology codes like RFC 3066. This
86 * is compatible with prior ICU codes
87 *
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates. This avoids bad searches going from 3 to 2 character
91 * codes.
92 *
93 * The range qaa-qtz is reserved for local use
94 */
95 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
96 /* ISO639 table version is 20130531 */
97 static const char * const LANGUAGES[] = {
98 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af",
99 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg",
100 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc",
101 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath",
102 "aus", "av", "awa", "ay", "az",
103 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
104 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg",
105 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm",
106 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss",
107 "btk", "bua", "bug", "bum", "byn", "byv",
108 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce",
109 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
110 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
111 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs",
112 "csb", "cu", "cus", "cv", "cy",
113 "da", "dak", "dar", "dav", "day", "de", "del", "den",
114 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
115 "dv", "dyo", "dyu", "dz", "dzg",
116 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
117 "enm", "eo", "es", "et", "eu", "ewo",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj",
119 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur",
120 "fy",
121 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
122 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
123 "grc", "gsw", "gu", "guz", "gv", "gwi",
124 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit",
125 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy",
126 "hz",
127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo",
128 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro",
129 "is", "it", "iu",
130 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
132 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha",
133 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl",
134 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe",
135 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
136 "ksh", "ku", "kum", "kut", "kv", "kw", "ky",
137 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg",
138 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu",
139 "lua", "lui", "lun", "luo", "lus", "luy", "lv",
140 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
141 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga",
142 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
143 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh",
144 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus",
145 "mwl", "mwr", "my", "mye", "myn", "myv",
146 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds",
147 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg",
148 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso",
149 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo",
150 "nzi",
151 "oc", "oj", "om", "or", "os", "osa", "ota", "oto",
152 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
153 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps",
154 "pt",
155 "qu",
156 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof",
157 "rom", "ru", "rup", "rw", "rwk",
158 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
159 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se",
160 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn",
161 "shi", "shn", "shu", "si", "sid", "sio", "sit",
162 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
163 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
164 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk",
165 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr",
166 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg",
167 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh",
168 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
170 "twq", "ty", "tyv", "tzm",
171 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
172 "vai", "ve", "vi", "vo", "vot", "vun",
173 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo",
174 "xal", "xh", "xog",
175 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue",
176 "za", "zap", "zbl", "zen", "zgh", "zh", "znd", "zu",
177 "zun", "zxx", "zza",
178 NULL,
179 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
180 NULL
181 };
182
183 static const char* const DEPRECATED_LANGUAGES[]={
184 "in", "iw", "ji", "jw", NULL, NULL
185 };
186 static const char* const REPLACEMENT_LANGUAGES[]={
187 "id", "he", "yi", "jv", NULL, NULL
188 };
189
190 /**
191 * Table of 3-letter language codes.
192 *
193 * This is a lookup table used to convert 3-letter language codes to
194 * their 2-letter equivalent, where possible. It must be kept in sync
195 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
196 * same language as LANGUAGES_3[i]. The commented-out lines are
197 * copied from LANGUAGES to make eyeballing this baby easier.
198 *
199 * Where a 3-letter language code has no 2-letter equivalent, the
200 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
201 *
202 * This table should be terminated with a NULL entry, followed by a
203 * second list, and another NULL entry. The two lists correspond to
204 * the two lists in LANGUAGES.
205 */
206 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
207 /* ISO639 table version is 20130531 */
208 static const char * const LANGUAGES_3[] = {
209 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
210 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
211 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
212 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
213 "aus", "ava", "awa", "aym", "aze",
214 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
215 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
216 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
217 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
218 "btk", "bua", "bug", "bum", "byn", "byv",
219 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
220 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
221 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
222 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
223 "csb", "chu", "cus", "chv", "cym",
224 "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
225 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
226 "div", "dyo", "dyu", "dzo", "dzg",
227 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
228 "enm", "epo", "spa", "est", "eus", "ewo",
229 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
230 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
231 "fry",
232 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
233 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
234 "grc", "gsw", "guj", "guz", "glv", "gwi",
235 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
236 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
237 "her",
238 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
239 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
240 "isl", "ita", "iku",
241 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
242 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
243 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
244 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
245 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
246 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
247 "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
248 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
249 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
250 "lua", "lui", "lun", "luo", "lus", "luy", "lav",
251 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
252 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
253 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
254 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
255 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
256 "mwl", "mwr", "mya", "mye", "myn", "myv",
257 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
258 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
259 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
260 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
261 "nzi",
262 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
263 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
264 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
265 "por",
266 "que",
267 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
268 "rom", "rus", "rup", "kin", "rwk",
269 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
270 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
271 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
272 "shi", "shn", "shu", "sin", "sid", "sio", "sit",
273 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
274 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
275 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
276 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
277 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
278 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
279 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
280 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
281 "twq", "tah", "tyv", "tzm",
282 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
283 "vai", "ven", "vie", "vol", "vot", "vun",
284 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
285 "xal", "xho", "xog",
286 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
287 "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul",
288 "zun", "zxx", "zza",
289 NULL,
290 /* "in", "iw", "ji", "jw", "sh", */
291 "ind", "heb", "yid", "jaw", "srp",
292 NULL
293 };
294
295 /**
296 * Table of 2-letter country codes.
297 *
298 * This list must be in sorted order. This list is returned directly
299 * to the user by some API.
300 *
301 * This list must be kept in sync with COUNTRIES_3, with corresponding
302 * entries matched.
303 *
304 * This table should be terminated with a NULL entry, followed by a
305 * second list, and another NULL entry. The first list is visible to
306 * user code when this array is returned by API. The second list
307 * contains codes we support, but do not expose through user API.
308 *
309 * Notes:
310 *
311 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
312 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
313 * new codes keeping the old ones for compatibility updated to include
314 * 1999/12/03 revisions *CWB*
315 *
316 * RO(ROM) is now RO(ROU) according to
317 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
318 */
319 static const char * const COUNTRIES[] = {
320 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
321 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
322 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
323 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
324 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
325 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
326 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
327 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
328 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
329 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
330 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
331 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
332 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
333 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
334 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
335 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
336 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
337 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
338 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
339 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
340 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
341 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
342 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
343 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
344 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
345 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
346 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
347 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
348 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
349 "WS", "YE", "YT", "ZA", "ZM", "ZW",
350 NULL,
351 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
352 NULL
353 };
354
355 static const char* const DEPRECATED_COUNTRIES[] = {
356 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
357 };
358 static const char* const REPLACEMENT_COUNTRIES[] = {
359 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
360 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
361 };
362
363 /**
364 * Table of 3-letter country codes.
365 *
366 * This is a lookup table used to convert 3-letter country codes to
367 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
368 * For all valid i, COUNTRIES[i] must refer to the same country as
369 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
370 * to make eyeballing this baby easier.
371 *
372 * This table should be terminated with a NULL entry, followed by a
373 * second list, and another NULL entry. The two lists correspond to
374 * the two lists in COUNTRIES.
375 */
376 static const char * const COUNTRIES_3[] = {
377 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
378 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
379 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
380 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
381 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
382 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
383 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
384 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
385 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
386 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
387 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
388 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
389 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
390 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
391 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
392 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
393 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
394 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
395 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
396 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
397 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
398 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
399 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
400 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
401 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
402 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
403 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
404 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
405 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
406 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
407 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
408 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
409 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
410 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
411 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
412 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
413 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
414 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
415 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
416 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
417 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
418 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
419 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
420 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
421 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
422 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
423 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
424 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
425 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
426 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
427 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
428 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
429 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
430 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
431 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
432 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
433 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
434 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
435 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
436 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
437 NULL,
438 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
439 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
440 NULL
441 };
442
443 typedef struct CanonicalizationMap {
444 const char *id; /* input ID */
445 const char *canonicalID; /* canonicalized output ID */
446 const char *keyword; /* keyword, or NULL if none */
447 const char *value; /* keyword value, or NULL if kw==NULL */
448 } CanonicalizationMap;
449
450 /**
451 * A map to canonicalize locale IDs. This handles a variety of
452 * different semantic kinds of transformations.
453 */
454 static const CanonicalizationMap CANONICALIZE_MAP[] = {
455 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
456 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
457 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
458 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
459 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
460 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
461 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
462 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
463 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
464 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
465 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
466 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
467 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
468 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
469 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
470 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
471 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
472 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
473 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
474 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
475 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
476 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
477 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
478 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
479 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
480 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
481 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
482 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
483 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
484 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
485 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
486 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
487 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
488 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
489 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
490 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
491 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
492 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
493 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
494 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
495 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
496 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
497 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
498 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
499 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
500 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
501 };
502
503 typedef struct VariantMap {
504 const char *variant; /* input ID */
505 const char *keyword; /* keyword, or NULL if none */
506 const char *value; /* keyword value, or NULL if kw==NULL */
507 } VariantMap;
508
509 static const VariantMap VARIANT_MAP[] = {
510 { "EURO", "currency", "EUR" },
511 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
512 { "STROKE", "collation", "stroke" } /* Solaris variant */
513 };
514
515 /* ### BCP47 Conversion *******************************************/
516 /* Test if the locale id has BCP47 u extension and does not have '@' */
517 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
518 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
519 #define _ConvertBCP47(finalID, id, buffer, length,err) \
520 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
521 finalID=id; \
522 } else { \
523 finalID=buffer; \
524 }
525 /* Gets the size of the shortest subtag in the given localeID. */
getShortestSubtagLength(const char * localeID)526 static int32_t getShortestSubtagLength(const char *localeID) {
527 int32_t localeIDLength = uprv_strlen(localeID);
528 int32_t length = localeIDLength;
529 int32_t tmpLength = 0;
530 int32_t i;
531 UBool reset = TRUE;
532
533 for (i = 0; i < localeIDLength; i++) {
534 if (localeID[i] != '_' && localeID[i] != '-') {
535 if (reset) {
536 tmpLength = 0;
537 reset = FALSE;
538 }
539 tmpLength++;
540 } else {
541 if (tmpLength != 0 && tmpLength < length) {
542 length = tmpLength;
543 }
544 reset = TRUE;
545 }
546 }
547
548 return length;
549 }
550
551 /* ### Keywords **************************************************/
552
553 #define ULOC_KEYWORD_BUFFER_LEN 25
554 #define ULOC_MAX_NO_KEYWORDS 25
555
556 U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char * localeID)557 locale_getKeywordsStart(const char *localeID) {
558 const char *result = NULL;
559 if((result = uprv_strchr(localeID, '@')) != NULL) {
560 return result;
561 }
562 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
563 else {
564 /* We do this because the @ sign is variant, and the @ sign used on one
565 EBCDIC machine won't be compiled the same way on other EBCDIC based
566 machines. */
567 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
568 const uint8_t *charToFind = ebcdicSigns;
569 while(*charToFind) {
570 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
571 return result;
572 }
573 charToFind++;
574 }
575 }
576 #endif
577 return NULL;
578 }
579
580 /**
581 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
582 * @param keywordName incoming name to be canonicalized
583 * @param status return status (keyword too long)
584 * @return length of the keyword name
585 */
locale_canonKeywordName(char * buf,const char * keywordName,UErrorCode * status)586 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
587 {
588 int32_t i;
589 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
590
591 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
592 /* keyword name too long for internal buffer */
593 *status = U_INTERNAL_PROGRAM_ERROR;
594 return 0;
595 }
596
597 /* normalize the keyword name */
598 for(i = 0; i < keywordNameLen; i++) {
599 buf[i] = uprv_tolower(keywordName[i]);
600 }
601 buf[i] = 0;
602
603 return keywordNameLen;
604 }
605
606 typedef struct {
607 char keyword[ULOC_KEYWORD_BUFFER_LEN];
608 int32_t keywordLen;
609 const char *valueStart;
610 int32_t valueLen;
611 } KeywordStruct;
612
613 static int32_t U_CALLCONV
compareKeywordStructs(const void *,const void * left,const void * right)614 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
615 const char* leftString = ((const KeywordStruct *)left)->keyword;
616 const char* rightString = ((const KeywordStruct *)right)->keyword;
617 return uprv_strcmp(leftString, rightString);
618 }
619
620 /**
621 * Both addKeyword and addValue must already be in canonical form.
622 * Either both addKeyword and addValue are NULL, or neither is NULL.
623 * If they are not NULL they must be zero terminated.
624 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
625 */
626 static int32_t
_getKeywords(const char * localeID,char prev,char * keywords,int32_t keywordCapacity,char * values,int32_t valuesCapacity,int32_t * valLen,UBool valuesToo,const char * addKeyword,const char * addValue,UErrorCode * status)627 _getKeywords(const char *localeID,
628 char prev,
629 char *keywords, int32_t keywordCapacity,
630 char *values, int32_t valuesCapacity, int32_t *valLen,
631 UBool valuesToo,
632 const char* addKeyword,
633 const char* addValue,
634 UErrorCode *status)
635 {
636 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
637
638 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
639 int32_t numKeywords = 0;
640 const char* pos = localeID;
641 const char* equalSign = NULL;
642 const char* semicolon = NULL;
643 int32_t i = 0, j, n;
644 int32_t keywordsLen = 0;
645 int32_t valuesLen = 0;
646
647 if(prev == '@') { /* start of keyword definition */
648 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
649 do {
650 UBool duplicate = FALSE;
651 /* skip leading spaces */
652 while(*pos == ' ') {
653 pos++;
654 }
655 if (!*pos) { /* handle trailing "; " */
656 break;
657 }
658 if(numKeywords == maxKeywords) {
659 *status = U_INTERNAL_PROGRAM_ERROR;
660 return 0;
661 }
662 equalSign = uprv_strchr(pos, '=');
663 semicolon = uprv_strchr(pos, ';');
664 /* lack of '=' [foo@currency] is illegal */
665 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
666 if(!equalSign || (semicolon && semicolon<equalSign)) {
667 *status = U_INVALID_FORMAT_ERROR;
668 return 0;
669 }
670 /* need to normalize both keyword and keyword name */
671 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
672 /* keyword name too long for internal buffer */
673 *status = U_INTERNAL_PROGRAM_ERROR;
674 return 0;
675 }
676 for(i = 0, n = 0; i < equalSign - pos; ++i) {
677 if (pos[i] != ' ') {
678 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
679 }
680 }
681
682 /* zero-length keyword is an error. */
683 if (n == 0) {
684 *status = U_INVALID_FORMAT_ERROR;
685 return 0;
686 }
687
688 keywordList[numKeywords].keyword[n] = 0;
689 keywordList[numKeywords].keywordLen = n;
690 /* now grab the value part. First we skip the '=' */
691 equalSign++;
692 /* then we leading spaces */
693 while(*equalSign == ' ') {
694 equalSign++;
695 }
696
697 /* Premature end or zero-length value */
698 if (!equalSign || equalSign == semicolon) {
699 *status = U_INVALID_FORMAT_ERROR;
700 return 0;
701 }
702
703 keywordList[numKeywords].valueStart = equalSign;
704
705 pos = semicolon;
706 i = 0;
707 if(pos) {
708 while(*(pos - i - 1) == ' ') {
709 i++;
710 }
711 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
712 pos++;
713 } else {
714 i = (int32_t)uprv_strlen(equalSign);
715 while(i && equalSign[i-1] == ' ') {
716 i--;
717 }
718 keywordList[numKeywords].valueLen = i;
719 }
720 /* If this is a duplicate keyword, then ignore it */
721 for (j=0; j<numKeywords; ++j) {
722 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
723 duplicate = TRUE;
724 break;
725 }
726 }
727 if (!duplicate) {
728 ++numKeywords;
729 }
730 } while(pos);
731
732 /* Handle addKeyword/addValue. */
733 if (addKeyword != NULL) {
734 UBool duplicate = FALSE;
735 U_ASSERT(addValue != NULL);
736 /* Search for duplicate; if found, do nothing. Explicit keyword
737 overrides addKeyword. */
738 for (j=0; j<numKeywords; ++j) {
739 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
740 duplicate = TRUE;
741 break;
742 }
743 }
744 if (!duplicate) {
745 if (numKeywords == maxKeywords) {
746 *status = U_INTERNAL_PROGRAM_ERROR;
747 return 0;
748 }
749 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
750 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
751 keywordList[numKeywords].valueStart = addValue;
752 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
753 ++numKeywords;
754 }
755 } else {
756 U_ASSERT(addValue == NULL);
757 }
758
759 /* now we have a list of keywords */
760 /* we need to sort it */
761 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
762
763 /* Now construct the keyword part */
764 for(i = 0; i < numKeywords; i++) {
765 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
766 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
767 if(valuesToo) {
768 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
769 } else {
770 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
771 }
772 }
773 keywordsLen += keywordList[i].keywordLen + 1;
774 if(valuesToo) {
775 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
776 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
777 }
778 keywordsLen += keywordList[i].valueLen;
779
780 if(i < numKeywords - 1) {
781 if(keywordsLen < keywordCapacity) {
782 keywords[keywordsLen] = ';';
783 }
784 keywordsLen++;
785 }
786 }
787 if(values) {
788 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
789 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
790 values[valuesLen + keywordList[i].valueLen] = 0;
791 }
792 valuesLen += keywordList[i].valueLen + 1;
793 }
794 }
795 if(values) {
796 values[valuesLen] = 0;
797 if(valLen) {
798 *valLen = valuesLen;
799 }
800 }
801 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
802 } else {
803 return 0;
804 }
805 }
806
807 U_CFUNC int32_t
locale_getKeywords(const char * localeID,char prev,char * keywords,int32_t keywordCapacity,char * values,int32_t valuesCapacity,int32_t * valLen,UBool valuesToo,UErrorCode * status)808 locale_getKeywords(const char *localeID,
809 char prev,
810 char *keywords, int32_t keywordCapacity,
811 char *values, int32_t valuesCapacity, int32_t *valLen,
812 UBool valuesToo,
813 UErrorCode *status) {
814 return _getKeywords(localeID, prev, keywords, keywordCapacity,
815 values, valuesCapacity, valLen, valuesToo,
816 NULL, NULL, status);
817 }
818
819 U_CAPI int32_t U_EXPORT2
uloc_getKeywordValue(const char * localeID,const char * keywordName,char * buffer,int32_t bufferCapacity,UErrorCode * status)820 uloc_getKeywordValue(const char* localeID,
821 const char* keywordName,
822 char* buffer, int32_t bufferCapacity,
823 UErrorCode* status)
824 {
825 const char* startSearchHere = NULL;
826 const char* nextSeparator = NULL;
827 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
828 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
829 int32_t i = 0;
830 int32_t result = 0;
831
832 if(status && U_SUCCESS(*status) && localeID) {
833 char tempBuffer[ULOC_FULLNAME_CAPACITY];
834 const char* tmpLocaleID;
835
836 if (_hasBCP47Extension(localeID)) {
837 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
838 } else {
839 tmpLocaleID=localeID;
840 }
841
842 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
843 if(startSearchHere == NULL) {
844 /* no keywords, return at once */
845 return 0;
846 }
847
848 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
849 if(U_FAILURE(*status)) {
850 return 0;
851 }
852
853 /* find the first keyword */
854 while(startSearchHere) {
855 startSearchHere++;
856 /* skip leading spaces (allowed?) */
857 while(*startSearchHere == ' ') {
858 startSearchHere++;
859 }
860 nextSeparator = uprv_strchr(startSearchHere, '=');
861 /* need to normalize both keyword and keyword name */
862 if(!nextSeparator) {
863 break;
864 }
865 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
866 /* keyword name too long for internal buffer */
867 *status = U_INTERNAL_PROGRAM_ERROR;
868 return 0;
869 }
870 for(i = 0; i < nextSeparator - startSearchHere; i++) {
871 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
872 }
873 /* trim trailing spaces */
874 while(startSearchHere[i-1] == ' ') {
875 i--;
876 U_ASSERT(i>=0);
877 }
878 localeKeywordNameBuffer[i] = 0;
879
880 startSearchHere = uprv_strchr(nextSeparator, ';');
881
882 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
883 nextSeparator++;
884 while(*nextSeparator == ' ') {
885 nextSeparator++;
886 }
887 /* we actually found the keyword. Copy the value */
888 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
889 while(*(startSearchHere-1) == ' ') {
890 startSearchHere--;
891 }
892 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
893 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
894 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
895 i = (int32_t)uprv_strlen(nextSeparator);
896 while(nextSeparator[i - 1] == ' ') {
897 i--;
898 }
899 uprv_strncpy(buffer, nextSeparator, i);
900 result = u_terminateChars(buffer, bufferCapacity, i, status);
901 } else {
902 /* give a bigger buffer, please */
903 *status = U_BUFFER_OVERFLOW_ERROR;
904 if(startSearchHere) {
905 result = (int32_t)(startSearchHere - nextSeparator);
906 } else {
907 result = (int32_t)uprv_strlen(nextSeparator);
908 }
909 }
910 return result;
911 }
912 }
913 }
914 return 0;
915 }
916
917 U_CAPI int32_t U_EXPORT2
uloc_setKeywordValue(const char * keywordName,const char * keywordValue,char * buffer,int32_t bufferCapacity,UErrorCode * status)918 uloc_setKeywordValue(const char* keywordName,
919 const char* keywordValue,
920 char* buffer, int32_t bufferCapacity,
921 UErrorCode* status)
922 {
923 /* TODO: sorting. removal. */
924 int32_t keywordNameLen;
925 int32_t keywordValueLen;
926 int32_t bufLen;
927 int32_t needLen = 0;
928 int32_t foundValueLen;
929 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
930 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
931 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
932 int32_t i = 0;
933 int32_t rc;
934 char* nextSeparator = NULL;
935 char* nextEqualsign = NULL;
936 char* startSearchHere = NULL;
937 char* keywordStart = NULL;
938 char *insertHere = NULL;
939 if(U_FAILURE(*status)) {
940 return -1;
941 }
942 if(bufferCapacity>1) {
943 bufLen = (int32_t)uprv_strlen(buffer);
944 } else {
945 *status = U_ILLEGAL_ARGUMENT_ERROR;
946 return 0;
947 }
948 if(bufferCapacity<bufLen) {
949 /* The capacity is less than the length?! Is this NULL terminated? */
950 *status = U_ILLEGAL_ARGUMENT_ERROR;
951 return 0;
952 }
953 if(keywordValue && !*keywordValue) {
954 keywordValue = NULL;
955 }
956 if(keywordValue) {
957 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
958 } else {
959 keywordValueLen = 0;
960 }
961 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
962 if(U_FAILURE(*status)) {
963 return 0;
964 }
965 startSearchHere = (char*)locale_getKeywordsStart(buffer);
966 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
967 if(!keywordValue) { /* no keywords = nothing to remove */
968 return bufLen;
969 }
970
971 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
972 if(startSearchHere) { /* had a single @ */
973 needLen--; /* already had the @ */
974 /* startSearchHere points at the @ */
975 } else {
976 startSearchHere=buffer+bufLen;
977 }
978 if(needLen >= bufferCapacity) {
979 *status = U_BUFFER_OVERFLOW_ERROR;
980 return needLen; /* no change */
981 }
982 *startSearchHere = '@';
983 startSearchHere++;
984 uprv_strcpy(startSearchHere, keywordNameBuffer);
985 startSearchHere += keywordNameLen;
986 *startSearchHere = '=';
987 startSearchHere++;
988 uprv_strcpy(startSearchHere, keywordValue);
989 startSearchHere+=keywordValueLen;
990 return needLen;
991 } /* end shortcut - no @ */
992
993 keywordStart = startSearchHere;
994 /* search for keyword */
995 while(keywordStart) {
996 keywordStart++;
997 /* skip leading spaces (allowed?) */
998 while(*keywordStart == ' ') {
999 keywordStart++;
1000 }
1001 nextEqualsign = uprv_strchr(keywordStart, '=');
1002 /* need to normalize both keyword and keyword name */
1003 if(!nextEqualsign) {
1004 break;
1005 }
1006 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1007 /* keyword name too long for internal buffer */
1008 *status = U_INTERNAL_PROGRAM_ERROR;
1009 return 0;
1010 }
1011 for(i = 0; i < nextEqualsign - keywordStart; i++) {
1012 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1013 }
1014 /* trim trailing spaces */
1015 while(keywordStart[i-1] == ' ') {
1016 i--;
1017 }
1018 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
1019 localeKeywordNameBuffer[i] = 0;
1020
1021 nextSeparator = uprv_strchr(nextEqualsign, ';');
1022 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1023 if(rc == 0) {
1024 nextEqualsign++;
1025 while(*nextEqualsign == ' ') {
1026 nextEqualsign++;
1027 }
1028 /* we actually found the keyword. Change the value */
1029 if (nextSeparator) {
1030 keywordAtEnd = 0;
1031 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1032 } else {
1033 keywordAtEnd = 1;
1034 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1035 }
1036 if(keywordValue) { /* adding a value - not removing */
1037 if(foundValueLen == keywordValueLen) {
1038 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1039 return bufLen; /* no change in size */
1040 } else if(foundValueLen > keywordValueLen) {
1041 int32_t delta = foundValueLen - keywordValueLen;
1042 if(nextSeparator) { /* RH side */
1043 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1044 }
1045 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1046 bufLen -= delta;
1047 buffer[bufLen]=0;
1048 return bufLen;
1049 } else { /* FVL < KVL */
1050 int32_t delta = keywordValueLen - foundValueLen;
1051 if((bufLen+delta) >= bufferCapacity) {
1052 *status = U_BUFFER_OVERFLOW_ERROR;
1053 return bufLen+delta;
1054 }
1055 if(nextSeparator) { /* RH side */
1056 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1057 }
1058 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1059 bufLen += delta;
1060 buffer[bufLen]=0;
1061 return bufLen;
1062 }
1063 } else { /* removing a keyword */
1064 if(keywordAtEnd) {
1065 /* zero out the ';' or '@' just before startSearchhere */
1066 keywordStart[-1] = 0;
1067 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1068 } else {
1069 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1070 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1071 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1072 }
1073 }
1074 } else if(rc<0){ /* end match keyword */
1075 /* could insert at this location. */
1076 insertHere = keywordStart;
1077 }
1078 keywordStart = nextSeparator;
1079 } /* end loop searching */
1080
1081 if(!keywordValue) {
1082 return bufLen; /* removal of non-extant keyword - no change */
1083 }
1084
1085 /* we know there is at least one keyword. */
1086 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1087 if(needLen >= bufferCapacity) {
1088 *status = U_BUFFER_OVERFLOW_ERROR;
1089 return needLen; /* no change */
1090 }
1091
1092 if(insertHere) {
1093 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1094 keywordStart = insertHere;
1095 } else {
1096 keywordStart = buffer+bufLen;
1097 *keywordStart = ';';
1098 keywordStart++;
1099 }
1100 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1101 keywordStart += keywordNameLen;
1102 *keywordStart = '=';
1103 keywordStart++;
1104 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1105 keywordStart+=keywordValueLen;
1106 if(insertHere) {
1107 *keywordStart = ';';
1108 keywordStart++;
1109 }
1110 buffer[needLen]=0;
1111 return needLen;
1112 }
1113
1114 /* ### ID parsing implementation **************************************************/
1115
1116 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1117
1118 /*returns TRUE if one of the special prefixes is here (s=string)
1119 'x-' or 'i-' */
1120 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1121
1122 /* Dot terminates it because of POSIX form where dot precedes the codepage
1123 * except for variant
1124 */
1125 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1126
_strnchr(const char * str,int32_t len,char c)1127 static char* _strnchr(const char* str, int32_t len, char c) {
1128 U_ASSERT(str != 0 && len >= 0);
1129 while (len-- != 0) {
1130 char d = *str;
1131 if (d == c) {
1132 return (char*) str;
1133 } else if (d == 0) {
1134 break;
1135 }
1136 ++str;
1137 }
1138 return NULL;
1139 }
1140
1141 /**
1142 * Lookup 'key' in the array 'list'. The array 'list' should contain
1143 * a NULL entry, followed by more entries, and a second NULL entry.
1144 *
1145 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1146 * COUNTRIES_3.
1147 */
_findIndex(const char * const * list,const char * key)1148 static int16_t _findIndex(const char* const* list, const char* key)
1149 {
1150 const char* const* anchor = list;
1151 int32_t pass = 0;
1152
1153 /* Make two passes through two NULL-terminated arrays at 'list' */
1154 while (pass++ < 2) {
1155 while (*list) {
1156 if (uprv_strcmp(key, *list) == 0) {
1157 return (int16_t)(list - anchor);
1158 }
1159 list++;
1160 }
1161 ++list; /* skip final NULL *CWB*/
1162 }
1163 return -1;
1164 }
1165
1166 /* count the length of src while copying it to dest; return strlen(src) */
1167 static inline int32_t
_copyCount(char * dest,int32_t destCapacity,const char * src)1168 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1169 const char *anchor;
1170 char c;
1171
1172 anchor=src;
1173 for(;;) {
1174 if((c=*src)==0) {
1175 return (int32_t)(src-anchor);
1176 }
1177 if(destCapacity<=0) {
1178 return (int32_t)((src-anchor)+uprv_strlen(src));
1179 }
1180 ++src;
1181 *dest++=c;
1182 --destCapacity;
1183 }
1184 }
1185
1186 U_CFUNC const char*
uloc_getCurrentCountryID(const char * oldID)1187 uloc_getCurrentCountryID(const char* oldID){
1188 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1189 if (offset >= 0) {
1190 return REPLACEMENT_COUNTRIES[offset];
1191 }
1192 return oldID;
1193 }
1194 U_CFUNC const char*
uloc_getCurrentLanguageID(const char * oldID)1195 uloc_getCurrentLanguageID(const char* oldID){
1196 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1197 if (offset >= 0) {
1198 return REPLACEMENT_LANGUAGES[offset];
1199 }
1200 return oldID;
1201 }
1202 /*
1203 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1204 * avoid duplicating code to handle the earlier locale ID pieces
1205 * in the functions for the later ones by
1206 * setting the *pEnd pointer to where they stopped parsing
1207 *
1208 * TODO try to use this in Locale
1209 */
1210 U_CFUNC int32_t
ulocimp_getLanguage(const char * localeID,char * language,int32_t languageCapacity,const char ** pEnd)1211 ulocimp_getLanguage(const char *localeID,
1212 char *language, int32_t languageCapacity,
1213 const char **pEnd) {
1214 int32_t i=0;
1215 int32_t offset;
1216 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1217
1218 /* if it starts with i- or x- then copy that prefix */
1219 if(_isIDPrefix(localeID)) {
1220 if(i<languageCapacity) {
1221 language[i]=(char)uprv_tolower(*localeID);
1222 }
1223 if(i<languageCapacity) {
1224 language[i+1]='-';
1225 }
1226 i+=2;
1227 localeID+=2;
1228 }
1229
1230 /* copy the language as far as possible and count its length */
1231 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1232 if(i<languageCapacity) {
1233 language[i]=(char)uprv_tolower(*localeID);
1234 }
1235 if(i<3) {
1236 U_ASSERT(i>=0);
1237 lang[i]=(char)uprv_tolower(*localeID);
1238 }
1239 i++;
1240 localeID++;
1241 }
1242
1243 if(i==3) {
1244 /* convert 3 character code to 2 character code if possible *CWB*/
1245 offset=_findIndex(LANGUAGES_3, lang);
1246 if(offset>=0) {
1247 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1248 }
1249 }
1250
1251 if(pEnd!=NULL) {
1252 *pEnd=localeID;
1253 }
1254 return i;
1255 }
1256
1257 U_CFUNC int32_t
ulocimp_getScript(const char * localeID,char * script,int32_t scriptCapacity,const char ** pEnd)1258 ulocimp_getScript(const char *localeID,
1259 char *script, int32_t scriptCapacity,
1260 const char **pEnd)
1261 {
1262 int32_t idLen = 0;
1263
1264 if (pEnd != NULL) {
1265 *pEnd = localeID;
1266 }
1267
1268 /* copy the second item as far as possible and count its length */
1269 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1270 && uprv_isASCIILetter(localeID[idLen])) {
1271 idLen++;
1272 }
1273
1274 /* If it's exactly 4 characters long, then it's a script and not a country. */
1275 if (idLen == 4) {
1276 int32_t i;
1277 if (pEnd != NULL) {
1278 *pEnd = localeID+idLen;
1279 }
1280 if(idLen > scriptCapacity) {
1281 idLen = scriptCapacity;
1282 }
1283 if (idLen >= 1) {
1284 script[0]=(char)uprv_toupper(*(localeID++));
1285 }
1286 for (i = 1; i < idLen; i++) {
1287 script[i]=(char)uprv_tolower(*(localeID++));
1288 }
1289 }
1290 else {
1291 idLen = 0;
1292 }
1293 return idLen;
1294 }
1295
1296 U_CFUNC int32_t
ulocimp_getCountry(const char * localeID,char * country,int32_t countryCapacity,const char ** pEnd)1297 ulocimp_getCountry(const char *localeID,
1298 char *country, int32_t countryCapacity,
1299 const char **pEnd)
1300 {
1301 int32_t idLen=0;
1302 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1303 int32_t offset;
1304
1305 /* copy the country as far as possible and count its length */
1306 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1307 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1308 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1309 }
1310 idLen++;
1311 }
1312
1313 /* the country should be either length 2 or 3 */
1314 if (idLen == 2 || idLen == 3) {
1315 UBool gotCountry = FALSE;
1316 /* convert 3 character code to 2 character code if possible *CWB*/
1317 if(idLen==3) {
1318 offset=_findIndex(COUNTRIES_3, cnty);
1319 if(offset>=0) {
1320 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1321 gotCountry = TRUE;
1322 }
1323 }
1324 if (!gotCountry) {
1325 int32_t i = 0;
1326 for (i = 0; i < idLen; i++) {
1327 if (i < countryCapacity) {
1328 country[i]=(char)uprv_toupper(localeID[i]);
1329 }
1330 }
1331 }
1332 localeID+=idLen;
1333 } else {
1334 idLen = 0;
1335 }
1336
1337 if(pEnd!=NULL) {
1338 *pEnd=localeID;
1339 }
1340
1341 return idLen;
1342 }
1343
1344 /**
1345 * @param needSeparator if true, then add leading '_' if any variants
1346 * are added to 'variant'
1347 */
1348 static int32_t
_getVariantEx(const char * localeID,char prev,char * variant,int32_t variantCapacity,UBool needSeparator)1349 _getVariantEx(const char *localeID,
1350 char prev,
1351 char *variant, int32_t variantCapacity,
1352 UBool needSeparator) {
1353 int32_t i=0;
1354
1355 /* get one or more variant tags and separate them with '_' */
1356 if(_isIDSeparator(prev)) {
1357 /* get a variant string after a '-' or '_' */
1358 while(!_isTerminator(*localeID)) {
1359 if (needSeparator) {
1360 if (i<variantCapacity) {
1361 variant[i] = '_';
1362 }
1363 ++i;
1364 needSeparator = FALSE;
1365 }
1366 if(i<variantCapacity) {
1367 variant[i]=(char)uprv_toupper(*localeID);
1368 if(variant[i]=='-') {
1369 variant[i]='_';
1370 }
1371 }
1372 i++;
1373 localeID++;
1374 }
1375 }
1376
1377 /* if there is no variant tag after a '-' or '_' then look for '@' */
1378 if(i==0) {
1379 if(prev=='@') {
1380 /* keep localeID */
1381 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1382 ++localeID; /* point after the '@' */
1383 } else {
1384 return 0;
1385 }
1386 while(!_isTerminator(*localeID)) {
1387 if (needSeparator) {
1388 if (i<variantCapacity) {
1389 variant[i] = '_';
1390 }
1391 ++i;
1392 needSeparator = FALSE;
1393 }
1394 if(i<variantCapacity) {
1395 variant[i]=(char)uprv_toupper(*localeID);
1396 if(variant[i]=='-' || variant[i]==',') {
1397 variant[i]='_';
1398 }
1399 }
1400 i++;
1401 localeID++;
1402 }
1403 }
1404
1405 return i;
1406 }
1407
1408 static int32_t
_getVariant(const char * localeID,char prev,char * variant,int32_t variantCapacity)1409 _getVariant(const char *localeID,
1410 char prev,
1411 char *variant, int32_t variantCapacity) {
1412 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1413 }
1414
1415 /**
1416 * Delete ALL instances of a variant from the given list of one or
1417 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1418 * @param variants the source string of one or more variants,
1419 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1420 * terminated; if it is, trailing zero will NOT be maintained.
1421 * @param variantsLen length of variants
1422 * @param toDelete variant to delete, without separators, e.g. "EURO"
1423 * or "PREEURO"; not zero terminated
1424 * @param toDeleteLen length of toDelete
1425 * @return number of characters deleted from variants
1426 */
1427 static int32_t
_deleteVariant(char * variants,int32_t variantsLen,const char * toDelete,int32_t toDeleteLen)1428 _deleteVariant(char* variants, int32_t variantsLen,
1429 const char* toDelete, int32_t toDeleteLen)
1430 {
1431 int32_t delta = 0; /* number of chars deleted */
1432 for (;;) {
1433 UBool flag = FALSE;
1434 if (variantsLen < toDeleteLen) {
1435 return delta;
1436 }
1437 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1438 (variantsLen == toDeleteLen ||
1439 (flag=(variants[toDeleteLen] == '_'))))
1440 {
1441 int32_t d = toDeleteLen + (flag?1:0);
1442 variantsLen -= d;
1443 delta += d;
1444 if (variantsLen > 0) {
1445 uprv_memmove(variants, variants+d, variantsLen);
1446 }
1447 } else {
1448 char* p = _strnchr(variants, variantsLen, '_');
1449 if (p == NULL) {
1450 return delta;
1451 }
1452 ++p;
1453 variantsLen -= (int32_t)(p - variants);
1454 variants = p;
1455 }
1456 }
1457 }
1458
1459 /* Keyword enumeration */
1460
1461 typedef struct UKeywordsContext {
1462 char* keywords;
1463 char* current;
1464 } UKeywordsContext;
1465
1466 static void U_CALLCONV
uloc_kw_closeKeywords(UEnumeration * enumerator)1467 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1468 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1469 uprv_free(enumerator->context);
1470 uprv_free(enumerator);
1471 }
1472
1473 static int32_t U_CALLCONV
uloc_kw_countKeywords(UEnumeration * en,UErrorCode *)1474 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1475 char *kw = ((UKeywordsContext *)en->context)->keywords;
1476 int32_t result = 0;
1477 while(*kw) {
1478 result++;
1479 kw += uprv_strlen(kw)+1;
1480 }
1481 return result;
1482 }
1483
1484 static const char* U_CALLCONV
uloc_kw_nextKeyword(UEnumeration * en,int32_t * resultLength,UErrorCode *)1485 uloc_kw_nextKeyword(UEnumeration* en,
1486 int32_t* resultLength,
1487 UErrorCode* /*status*/) {
1488 const char* result = ((UKeywordsContext *)en->context)->current;
1489 int32_t len = 0;
1490 if(*result) {
1491 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1492 ((UKeywordsContext *)en->context)->current += len+1;
1493 } else {
1494 result = NULL;
1495 }
1496 if (resultLength) {
1497 *resultLength = len;
1498 }
1499 return result;
1500 }
1501
1502 static void U_CALLCONV
uloc_kw_resetKeywords(UEnumeration * en,UErrorCode *)1503 uloc_kw_resetKeywords(UEnumeration* en,
1504 UErrorCode* /*status*/) {
1505 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1506 }
1507
1508 static const UEnumeration gKeywordsEnum = {
1509 NULL,
1510 NULL,
1511 uloc_kw_closeKeywords,
1512 uloc_kw_countKeywords,
1513 uenum_unextDefault,
1514 uloc_kw_nextKeyword,
1515 uloc_kw_resetKeywords
1516 };
1517
1518 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywordList(const char * keywordList,int32_t keywordListSize,UErrorCode * status)1519 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1520 {
1521 UKeywordsContext *myContext = NULL;
1522 UEnumeration *result = NULL;
1523
1524 if(U_FAILURE(*status)) {
1525 return NULL;
1526 }
1527 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1528 /* Null pointer test */
1529 if (result == NULL) {
1530 *status = U_MEMORY_ALLOCATION_ERROR;
1531 return NULL;
1532 }
1533 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1534 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1535 if (myContext == NULL) {
1536 *status = U_MEMORY_ALLOCATION_ERROR;
1537 uprv_free(result);
1538 return NULL;
1539 }
1540 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1541 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1542 myContext->keywords[keywordListSize] = 0;
1543 myContext->current = myContext->keywords;
1544 result->context = myContext;
1545 return result;
1546 }
1547
1548 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywords(const char * localeID,UErrorCode * status)1549 uloc_openKeywords(const char* localeID,
1550 UErrorCode* status)
1551 {
1552 int32_t i=0;
1553 char keywords[256];
1554 int32_t keywordsCapacity = 256;
1555 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1556 const char* tmpLocaleID;
1557
1558 if(status==NULL || U_FAILURE(*status)) {
1559 return 0;
1560 }
1561
1562 if (_hasBCP47Extension(localeID)) {
1563 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1564 } else {
1565 if (localeID==NULL) {
1566 localeID=uloc_getDefault();
1567 }
1568 tmpLocaleID=localeID;
1569 }
1570
1571 /* Skip the language */
1572 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1573 if(_isIDSeparator(*tmpLocaleID)) {
1574 const char *scriptID;
1575 /* Skip the script if available */
1576 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1577 if(scriptID != tmpLocaleID+1) {
1578 /* Found optional script */
1579 tmpLocaleID = scriptID;
1580 }
1581 /* Skip the Country */
1582 if (_isIDSeparator(*tmpLocaleID)) {
1583 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1584 if(_isIDSeparator(*tmpLocaleID)) {
1585 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1586 }
1587 }
1588 }
1589
1590 /* keywords are located after '@' */
1591 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1592 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1593 }
1594
1595 if(i) {
1596 return uloc_openKeywordList(keywords, i, status);
1597 } else {
1598 return NULL;
1599 }
1600 }
1601
1602
1603 /* bit-flags for 'options' parameter of _canonicalize */
1604 #define _ULOC_STRIP_KEYWORDS 0x2
1605 #define _ULOC_CANONICALIZE 0x1
1606
1607 #define OPTION_SET(options, mask) ((options & mask) != 0)
1608
1609 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1610 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1611
1612 /**
1613 * Canonicalize the given localeID, to level 1 or to level 2,
1614 * depending on the options. To specify level 1, pass in options=0.
1615 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1616 *
1617 * This is the code underlying uloc_getName and uloc_canonicalize.
1618 */
1619 static int32_t
_canonicalize(const char * localeID,char * result,int32_t resultCapacity,uint32_t options,UErrorCode * err)1620 _canonicalize(const char* localeID,
1621 char* result,
1622 int32_t resultCapacity,
1623 uint32_t options,
1624 UErrorCode* err) {
1625 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1626 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1627 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1628 const char* origLocaleID;
1629 const char* tmpLocaleID;
1630 const char* keywordAssign = NULL;
1631 const char* separatorIndicator = NULL;
1632 const char* addKeyword = NULL;
1633 const char* addValue = NULL;
1634 char* name;
1635 char* variant = NULL; /* pointer into name, or NULL */
1636
1637 if (U_FAILURE(*err)) {
1638 return 0;
1639 }
1640
1641 if (_hasBCP47Extension(localeID)) {
1642 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1643 } else {
1644 if (localeID==NULL) {
1645 localeID=uloc_getDefault();
1646 }
1647 tmpLocaleID=localeID;
1648 }
1649
1650 origLocaleID=tmpLocaleID;
1651
1652 /* if we are doing a full canonicalization, then put results in
1653 localeBuffer, if necessary; otherwise send them to result. */
1654 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1655 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1656 name = localeBuffer;
1657 nameCapacity = (int32_t)sizeof(localeBuffer);
1658 } else {
1659 name = result;
1660 nameCapacity = resultCapacity;
1661 }
1662
1663 /* get all pieces, one after another, and separate with '_' */
1664 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1665
1666 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1667 const char *d = uloc_getDefault();
1668
1669 len = (int32_t)uprv_strlen(d);
1670
1671 if (name != NULL) {
1672 uprv_strncpy(name, d, len);
1673 }
1674 } else if(_isIDSeparator(*tmpLocaleID)) {
1675 const char *scriptID;
1676
1677 ++fieldCount;
1678 if(len<nameCapacity) {
1679 name[len]='_';
1680 }
1681 ++len;
1682
1683 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1684 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1685 if(scriptSize > 0) {
1686 /* Found optional script */
1687 tmpLocaleID = scriptID;
1688 ++fieldCount;
1689 len+=scriptSize;
1690 if (_isIDSeparator(*tmpLocaleID)) {
1691 /* If there is something else, then we add the _ */
1692 if(len<nameCapacity) {
1693 name[len]='_';
1694 }
1695 ++len;
1696 }
1697 }
1698
1699 if (_isIDSeparator(*tmpLocaleID)) {
1700 const char *cntryID;
1701 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1702 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1703 if (cntrySize > 0) {
1704 /* Found optional country */
1705 tmpLocaleID = cntryID;
1706 len+=cntrySize;
1707 }
1708 if(_isIDSeparator(*tmpLocaleID)) {
1709 /* If there is something else, then we add the _ if we found country before. */
1710 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1711 ++fieldCount;
1712 if(len<nameCapacity) {
1713 name[len]='_';
1714 }
1715 ++len;
1716 }
1717
1718 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1719 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1720 if (variantSize > 0) {
1721 variant = len<nameCapacity ? name+len : NULL;
1722 len += variantSize;
1723 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1724 }
1725 }
1726 }
1727 }
1728
1729 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1730 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1731 UBool done = FALSE;
1732 do {
1733 char c = *tmpLocaleID;
1734 switch (c) {
1735 case 0:
1736 case '@':
1737 done = TRUE;
1738 break;
1739 default:
1740 if (len<nameCapacity) {
1741 name[len] = c;
1742 }
1743 ++len;
1744 ++tmpLocaleID;
1745 break;
1746 }
1747 } while (!done);
1748 }
1749
1750 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1751 After this, tmpLocaleID either points to '@' or is NULL */
1752 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1753 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1754 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1755 }
1756
1757 /* Copy POSIX-style variant, if any [mr@FOO] */
1758 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1759 tmpLocaleID != NULL && keywordAssign == NULL) {
1760 for (;;) {
1761 char c = *tmpLocaleID;
1762 if (c == 0) {
1763 break;
1764 }
1765 if (len<nameCapacity) {
1766 name[len] = c;
1767 }
1768 ++len;
1769 ++tmpLocaleID;
1770 }
1771 }
1772
1773 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1774 /* Handle @FOO variant if @ is present and not followed by = */
1775 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1776 int32_t posixVariantSize;
1777 /* Add missing '_' if needed */
1778 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1779 do {
1780 if(len<nameCapacity) {
1781 name[len]='_';
1782 }
1783 ++len;
1784 ++fieldCount;
1785 } while(fieldCount<2);
1786 }
1787 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1788 (UBool)(variantSize > 0));
1789 if (posixVariantSize > 0) {
1790 if (variant == NULL) {
1791 variant = name+len;
1792 }
1793 len += posixVariantSize;
1794 variantSize += posixVariantSize;
1795 }
1796 }
1797
1798 /* Handle generic variants first */
1799 if (variant) {
1800 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1801 const char* variantToCompare = VARIANT_MAP[j].variant;
1802 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1803 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1804 len -= variantLen;
1805 if (variantLen > 0) {
1806 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1807 --len;
1808 }
1809 addKeyword = VARIANT_MAP[j].keyword;
1810 addValue = VARIANT_MAP[j].value;
1811 break;
1812 }
1813 }
1814 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1815 --len;
1816 }
1817 }
1818
1819 /* Look up the ID in the canonicalization map */
1820 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1821 const char* id = CANONICALIZE_MAP[j].id;
1822 int32_t n = (int32_t)uprv_strlen(id);
1823 if (len == n && uprv_strncmp(name, id, n) == 0) {
1824 if (n == 0 && tmpLocaleID != NULL) {
1825 break; /* Don't remap "" if keywords present */
1826 }
1827 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1828 if (CANONICALIZE_MAP[j].keyword) {
1829 addKeyword = CANONICALIZE_MAP[j].keyword;
1830 addValue = CANONICALIZE_MAP[j].value;
1831 }
1832 break;
1833 }
1834 }
1835 }
1836
1837 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1838 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1839 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1840 if(len<nameCapacity) {
1841 name[len]='@';
1842 }
1843 ++len;
1844 ++fieldCount;
1845 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1846 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1847 } else if (addKeyword != NULL) {
1848 U_ASSERT(addValue != NULL && len < nameCapacity);
1849 /* inelegant but works -- later make _getKeywords do this? */
1850 len += _copyCount(name+len, nameCapacity-len, "@");
1851 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1852 len += _copyCount(name+len, nameCapacity-len, "=");
1853 len += _copyCount(name+len, nameCapacity-len, addValue);
1854 }
1855 }
1856
1857 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1858 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1859 }
1860
1861 return u_terminateChars(result, resultCapacity, len, err);
1862 }
1863
1864 /* ### ID parsing API **************************************************/
1865
1866 U_CAPI int32_t U_EXPORT2
uloc_getParent(const char * localeID,char * parent,int32_t parentCapacity,UErrorCode * err)1867 uloc_getParent(const char* localeID,
1868 char* parent,
1869 int32_t parentCapacity,
1870 UErrorCode* err)
1871 {
1872 const char *lastUnderscore;
1873 int32_t i;
1874
1875 if (U_FAILURE(*err))
1876 return 0;
1877
1878 if (localeID == NULL)
1879 localeID = uloc_getDefault();
1880
1881 lastUnderscore=uprv_strrchr(localeID, '_');
1882 if(lastUnderscore!=NULL) {
1883 i=(int32_t)(lastUnderscore-localeID);
1884 } else {
1885 i=0;
1886 }
1887
1888 if(i>0 && parent != localeID) {
1889 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1890 }
1891 return u_terminateChars(parent, parentCapacity, i, err);
1892 }
1893
1894 U_CAPI int32_t U_EXPORT2
uloc_getLanguage(const char * localeID,char * language,int32_t languageCapacity,UErrorCode * err)1895 uloc_getLanguage(const char* localeID,
1896 char* language,
1897 int32_t languageCapacity,
1898 UErrorCode* err)
1899 {
1900 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1901 int32_t i=0;
1902
1903 if (err==NULL || U_FAILURE(*err)) {
1904 return 0;
1905 }
1906
1907 if(localeID==NULL) {
1908 localeID=uloc_getDefault();
1909 }
1910
1911 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1912 return u_terminateChars(language, languageCapacity, i, err);
1913 }
1914
1915 U_CAPI int32_t U_EXPORT2
uloc_getScript(const char * localeID,char * script,int32_t scriptCapacity,UErrorCode * err)1916 uloc_getScript(const char* localeID,
1917 char* script,
1918 int32_t scriptCapacity,
1919 UErrorCode* err)
1920 {
1921 int32_t i=0;
1922
1923 if(err==NULL || U_FAILURE(*err)) {
1924 return 0;
1925 }
1926
1927 if(localeID==NULL) {
1928 localeID=uloc_getDefault();
1929 }
1930
1931 /* skip the language */
1932 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1933 if(_isIDSeparator(*localeID)) {
1934 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1935 }
1936 return u_terminateChars(script, scriptCapacity, i, err);
1937 }
1938
1939 U_CAPI int32_t U_EXPORT2
uloc_getCountry(const char * localeID,char * country,int32_t countryCapacity,UErrorCode * err)1940 uloc_getCountry(const char* localeID,
1941 char* country,
1942 int32_t countryCapacity,
1943 UErrorCode* err)
1944 {
1945 int32_t i=0;
1946
1947 if(err==NULL || U_FAILURE(*err)) {
1948 return 0;
1949 }
1950
1951 if(localeID==NULL) {
1952 localeID=uloc_getDefault();
1953 }
1954
1955 /* Skip the language */
1956 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1957 if(_isIDSeparator(*localeID)) {
1958 const char *scriptID;
1959 /* Skip the script if available */
1960 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1961 if(scriptID != localeID+1) {
1962 /* Found optional script */
1963 localeID = scriptID;
1964 }
1965 if(_isIDSeparator(*localeID)) {
1966 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1967 }
1968 }
1969 return u_terminateChars(country, countryCapacity, i, err);
1970 }
1971
1972 U_CAPI int32_t U_EXPORT2
uloc_getVariant(const char * localeID,char * variant,int32_t variantCapacity,UErrorCode * err)1973 uloc_getVariant(const char* localeID,
1974 char* variant,
1975 int32_t variantCapacity,
1976 UErrorCode* err)
1977 {
1978 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1979 const char* tmpLocaleID;
1980 int32_t i=0;
1981
1982 if(err==NULL || U_FAILURE(*err)) {
1983 return 0;
1984 }
1985
1986 if (_hasBCP47Extension(localeID)) {
1987 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1988 } else {
1989 if (localeID==NULL) {
1990 localeID=uloc_getDefault();
1991 }
1992 tmpLocaleID=localeID;
1993 }
1994
1995 /* Skip the language */
1996 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1997 if(_isIDSeparator(*tmpLocaleID)) {
1998 const char *scriptID;
1999 /* Skip the script if available */
2000 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2001 if(scriptID != tmpLocaleID+1) {
2002 /* Found optional script */
2003 tmpLocaleID = scriptID;
2004 }
2005 /* Skip the Country */
2006 if (_isIDSeparator(*tmpLocaleID)) {
2007 const char *cntryID;
2008 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2009 if (cntryID != tmpLocaleID+1) {
2010 /* Found optional country */
2011 tmpLocaleID = cntryID;
2012 }
2013 if(_isIDSeparator(*tmpLocaleID)) {
2014 /* If there was no country ID, skip a possible extra IDSeparator */
2015 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2016 tmpLocaleID++;
2017 }
2018 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2019 }
2020 }
2021 }
2022
2023 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2024 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2025 /*
2026 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2027 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2028 }
2029 */
2030 return u_terminateChars(variant, variantCapacity, i, err);
2031 }
2032
2033 U_CAPI int32_t U_EXPORT2
uloc_getName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2034 uloc_getName(const char* localeID,
2035 char* name,
2036 int32_t nameCapacity,
2037 UErrorCode* err)
2038 {
2039 return _canonicalize(localeID, name, nameCapacity, 0, err);
2040 }
2041
2042 U_CAPI int32_t U_EXPORT2
uloc_getBaseName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2043 uloc_getBaseName(const char* localeID,
2044 char* name,
2045 int32_t nameCapacity,
2046 UErrorCode* err)
2047 {
2048 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2049 }
2050
2051 U_CAPI int32_t U_EXPORT2
uloc_canonicalize(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2052 uloc_canonicalize(const char* localeID,
2053 char* name,
2054 int32_t nameCapacity,
2055 UErrorCode* err)
2056 {
2057 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2058 }
2059
2060 U_CAPI const char* U_EXPORT2
uloc_getISO3Language(const char * localeID)2061 uloc_getISO3Language(const char* localeID)
2062 {
2063 int16_t offset;
2064 char lang[ULOC_LANG_CAPACITY];
2065 UErrorCode err = U_ZERO_ERROR;
2066
2067 if (localeID == NULL)
2068 {
2069 localeID = uloc_getDefault();
2070 }
2071 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2072 if (U_FAILURE(err))
2073 return "";
2074 offset = _findIndex(LANGUAGES, lang);
2075 if (offset < 0)
2076 return "";
2077 return LANGUAGES_3[offset];
2078 }
2079
2080 U_CAPI const char* U_EXPORT2
uloc_getISO3Country(const char * localeID)2081 uloc_getISO3Country(const char* localeID)
2082 {
2083 int16_t offset;
2084 char cntry[ULOC_LANG_CAPACITY];
2085 UErrorCode err = U_ZERO_ERROR;
2086
2087 if (localeID == NULL)
2088 {
2089 localeID = uloc_getDefault();
2090 }
2091 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2092 if (U_FAILURE(err))
2093 return "";
2094 offset = _findIndex(COUNTRIES, cntry);
2095 if (offset < 0)
2096 return "";
2097
2098 return COUNTRIES_3[offset];
2099 }
2100
2101 U_CAPI uint32_t U_EXPORT2
uloc_getLCID(const char * localeID)2102 uloc_getLCID(const char* localeID)
2103 {
2104 UErrorCode status = U_ZERO_ERROR;
2105 char langID[ULOC_FULLNAME_CAPACITY];
2106
2107 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2108 if (U_FAILURE(status)) {
2109 return 0;
2110 }
2111
2112 if (uprv_strchr(localeID, '@')) {
2113 // uprv_convertToLCID does not support keywords other than collation.
2114 // Remove all keywords except collation.
2115 int32_t len;
2116 char collVal[ULOC_KEYWORDS_CAPACITY];
2117 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2118
2119 len = uloc_getKeywordValue(localeID, "collation", collVal,
2120 sizeof(collVal)/sizeof(collVal[0]) - 1, &status);
2121
2122 if (U_SUCCESS(status) && len > 0) {
2123 collVal[len] = 0;
2124
2125 len = uloc_getBaseName(localeID, tmpLocaleID,
2126 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status);
2127
2128 if (U_SUCCESS(status)) {
2129 tmpLocaleID[len] = 0;
2130
2131 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2132 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status);
2133
2134 if (U_SUCCESS(status)) {
2135 tmpLocaleID[len] = 0;
2136 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2137 }
2138 }
2139 }
2140
2141 // fall through - all keywords are simply ignored
2142 status = U_ZERO_ERROR;
2143 }
2144
2145 return uprv_convertToLCID(langID, localeID, &status);
2146 }
2147
2148 U_CAPI int32_t U_EXPORT2
uloc_getLocaleForLCID(uint32_t hostid,char * locale,int32_t localeCapacity,UErrorCode * status)2149 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2150 UErrorCode *status)
2151 {
2152 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2153 }
2154
2155 /* ### Default locale **************************************************/
2156
2157 U_CAPI const char* U_EXPORT2
uloc_getDefault()2158 uloc_getDefault()
2159 {
2160 return locale_get_default();
2161 }
2162
2163 U_CAPI void U_EXPORT2
uloc_setDefault(const char * newDefaultLocale,UErrorCode * err)2164 uloc_setDefault(const char* newDefaultLocale,
2165 UErrorCode* err)
2166 {
2167 if (U_FAILURE(*err))
2168 return;
2169 /* the error code isn't currently used for anything by this function*/
2170
2171 /* propagate change to C++ */
2172 locale_set_default(newDefaultLocale);
2173 }
2174
2175 /**
2176 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2177 * to an array of pointers to arrays of char. All of these pointers are owned
2178 * by ICU-- do not delete them, and do not write through them. The array is
2179 * terminated with a null pointer.
2180 */
2181 U_CAPI const char* const* U_EXPORT2
uloc_getISOLanguages()2182 uloc_getISOLanguages()
2183 {
2184 return LANGUAGES;
2185 }
2186
2187 /**
2188 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2189 * pointer to an array of pointers to arrays of char. All of these pointers are
2190 * owned by ICU-- do not delete them, and do not write through them. The array is
2191 * terminated with a null pointer.
2192 */
2193 U_CAPI const char* const* U_EXPORT2
uloc_getISOCountries()2194 uloc_getISOCountries()
2195 {
2196 return COUNTRIES;
2197 }
2198
2199
2200 /* this function to be moved into cstring.c later */
2201 static char gDecimal = 0;
2202
2203 static /* U_CAPI */
2204 double
2205 /* U_EXPORT2 */
_uloc_strtod(const char * start,char ** end)2206 _uloc_strtod(const char *start, char **end) {
2207 char *decimal;
2208 char *myEnd;
2209 char buf[30];
2210 double rv;
2211 if (!gDecimal) {
2212 char rep[5];
2213 /* For machines that decide to change the decimal on you,
2214 and try to be too smart with localization.
2215 This normally should be just a '.'. */
2216 sprintf(rep, "%+1.1f", 1.0);
2217 gDecimal = rep[2];
2218 }
2219
2220 if(gDecimal == '.') {
2221 return uprv_strtod(start, end); /* fall through to OS */
2222 } else {
2223 uprv_strncpy(buf, start, 29);
2224 buf[29]=0;
2225 decimal = uprv_strchr(buf, '.');
2226 if(decimal) {
2227 *decimal = gDecimal;
2228 } else {
2229 return uprv_strtod(start, end); /* no decimal point */
2230 }
2231 rv = uprv_strtod(buf, &myEnd);
2232 if(end) {
2233 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2234 }
2235 return rv;
2236 }
2237 }
2238
2239 typedef struct {
2240 float q;
2241 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2242 char *locale;
2243 } _acceptLangItem;
2244
2245 static int32_t U_CALLCONV
uloc_acceptLanguageCompare(const void *,const void * a,const void * b)2246 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2247 {
2248 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2249 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2250
2251 int32_t rc = 0;
2252 if(bb->q < aa->q) {
2253 rc = -1; /* A > B */
2254 } else if(bb->q > aa->q) {
2255 rc = 1; /* A < B */
2256 } else {
2257 rc = 0; /* A = B */
2258 }
2259
2260 if(rc==0) {
2261 rc = uprv_stricmp(aa->locale, bb->locale);
2262 }
2263
2264 #if defined(ULOC_DEBUG)
2265 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2266 aa->locale, aa->q,
2267 bb->locale, bb->q,
2268 rc);*/
2269 #endif
2270
2271 return rc;
2272 }
2273
2274 /*
2275 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2276 */
2277
2278 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguageFromHTTP(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char * httpAcceptLanguage,UEnumeration * availableLocales,UErrorCode * status)2279 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2280 const char *httpAcceptLanguage,
2281 UEnumeration* availableLocales,
2282 UErrorCode *status)
2283 {
2284 _acceptLangItem *j;
2285 _acceptLangItem smallBuffer[30];
2286 char **strs;
2287 char tmp[ULOC_FULLNAME_CAPACITY +1];
2288 int32_t n = 0;
2289 const char *itemEnd;
2290 const char *paramEnd;
2291 const char *s;
2292 const char *t;
2293 int32_t res;
2294 int32_t i;
2295 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2296 int32_t jSize;
2297 char *tempstr; /* Use for null pointer check */
2298
2299 j = smallBuffer;
2300 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2301 if(U_FAILURE(*status)) {
2302 return -1;
2303 }
2304
2305 for(s=httpAcceptLanguage;s&&*s;) {
2306 while(isspace(*s)) /* eat space at the beginning */
2307 s++;
2308 itemEnd=uprv_strchr(s,',');
2309 paramEnd=uprv_strchr(s,';');
2310 if(!itemEnd) {
2311 itemEnd = httpAcceptLanguage+l; /* end of string */
2312 }
2313 if(paramEnd && paramEnd<itemEnd) {
2314 /* semicolon (;) is closer than end (,) */
2315 t = paramEnd+1;
2316 if(*t=='q') {
2317 t++;
2318 }
2319 while(isspace(*t)) {
2320 t++;
2321 }
2322 if(*t=='=') {
2323 t++;
2324 }
2325 while(isspace(*t)) {
2326 t++;
2327 }
2328 j[n].q = (float)_uloc_strtod(t,NULL);
2329 } else {
2330 /* no semicolon - it's 1.0 */
2331 j[n].q = 1.0f;
2332 paramEnd = itemEnd;
2333 }
2334 j[n].dummy=0;
2335 /* eat spaces prior to semi */
2336 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2337 ;
2338 /* Check for null pointer from uprv_strndup */
2339 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2340 if (tempstr == NULL) {
2341 *status = U_MEMORY_ALLOCATION_ERROR;
2342 return -1;
2343 }
2344 j[n].locale = tempstr;
2345 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2346 if(strcmp(j[n].locale,tmp)) {
2347 uprv_free(j[n].locale);
2348 j[n].locale=uprv_strdup(tmp);
2349 }
2350 #if defined(ULOC_DEBUG)
2351 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2352 #endif
2353 n++;
2354 s = itemEnd;
2355 while(*s==',') { /* eat duplicate commas */
2356 s++;
2357 }
2358 if(n>=jSize) {
2359 if(j==smallBuffer) { /* overflowed the small buffer. */
2360 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
2361 if(j!=NULL) {
2362 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2363 }
2364 #if defined(ULOC_DEBUG)
2365 fprintf(stderr,"malloced at size %d\n", jSize);
2366 #endif
2367 } else {
2368 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
2369 #if defined(ULOC_DEBUG)
2370 fprintf(stderr,"re-alloced at size %d\n", jSize);
2371 #endif
2372 }
2373 jSize *= 2;
2374 if(j==NULL) {
2375 *status = U_MEMORY_ALLOCATION_ERROR;
2376 return -1;
2377 }
2378 }
2379 }
2380 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2381 if(U_FAILURE(*status)) {
2382 if(j != smallBuffer) {
2383 #if defined(ULOC_DEBUG)
2384 fprintf(stderr,"freeing j %p\n", j);
2385 #endif
2386 uprv_free(j);
2387 }
2388 return -1;
2389 }
2390 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
2391 /* Check for null pointer */
2392 if (strs == NULL) {
2393 uprv_free(j); /* Free to avoid memory leak */
2394 *status = U_MEMORY_ALLOCATION_ERROR;
2395 return -1;
2396 }
2397 for(i=0;i<n;i++) {
2398 #if defined(ULOC_DEBUG)
2399 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2400 #endif
2401 strs[i]=j[i].locale;
2402 }
2403 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2404 (const char**)strs, n, availableLocales, status);
2405 for(i=0;i<n;i++) {
2406 uprv_free(strs[i]);
2407 }
2408 uprv_free(strs);
2409 if(j != smallBuffer) {
2410 #if defined(ULOC_DEBUG)
2411 fprintf(stderr,"freeing j %p\n", j);
2412 #endif
2413 uprv_free(j);
2414 }
2415 return res;
2416 }
2417
2418
2419 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguage(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char ** acceptList,int32_t acceptListCount,UEnumeration * availableLocales,UErrorCode * status)2420 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2421 UAcceptResult *outResult, const char **acceptList,
2422 int32_t acceptListCount,
2423 UEnumeration* availableLocales,
2424 UErrorCode *status)
2425 {
2426 int32_t i,j;
2427 int32_t len;
2428 int32_t maxLen=0;
2429 char tmp[ULOC_FULLNAME_CAPACITY+1];
2430 const char *l;
2431 char **fallbackList;
2432 if(U_FAILURE(*status)) {
2433 return -1;
2434 }
2435 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2436 if(fallbackList==NULL) {
2437 *status = U_MEMORY_ALLOCATION_ERROR;
2438 return -1;
2439 }
2440 for(i=0;i<acceptListCount;i++) {
2441 #if defined(ULOC_DEBUG)
2442 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2443 #endif
2444 while((l=uenum_next(availableLocales, NULL, status))) {
2445 #if defined(ULOC_DEBUG)
2446 fprintf(stderr," %s\n", l);
2447 #endif
2448 len = (int32_t)uprv_strlen(l);
2449 if(!uprv_strcmp(acceptList[i], l)) {
2450 if(outResult) {
2451 *outResult = ULOC_ACCEPT_VALID;
2452 }
2453 #if defined(ULOC_DEBUG)
2454 fprintf(stderr, "MATCH! %s\n", l);
2455 #endif
2456 if(len>0) {
2457 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2458 }
2459 for(j=0;j<i;j++) {
2460 uprv_free(fallbackList[j]);
2461 }
2462 uprv_free(fallbackList);
2463 return u_terminateChars(result, resultAvailable, len, status);
2464 }
2465 if(len>maxLen) {
2466 maxLen = len;
2467 }
2468 }
2469 uenum_reset(availableLocales, status);
2470 /* save off parent info */
2471 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2472 fallbackList[i] = uprv_strdup(tmp);
2473 } else {
2474 fallbackList[i]=0;
2475 }
2476 }
2477
2478 for(maxLen--;maxLen>0;maxLen--) {
2479 for(i=0;i<acceptListCount;i++) {
2480 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2481 #if defined(ULOC_DEBUG)
2482 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2483 #endif
2484 while((l=uenum_next(availableLocales, NULL, status))) {
2485 #if defined(ULOC_DEBUG)
2486 fprintf(stderr," %s\n", l);
2487 #endif
2488 len = (int32_t)uprv_strlen(l);
2489 if(!uprv_strcmp(fallbackList[i], l)) {
2490 if(outResult) {
2491 *outResult = ULOC_ACCEPT_FALLBACK;
2492 }
2493 #if defined(ULOC_DEBUG)
2494 fprintf(stderr, "fallback MATCH! %s\n", l);
2495 #endif
2496 if(len>0) {
2497 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2498 }
2499 for(j=0;j<acceptListCount;j++) {
2500 uprv_free(fallbackList[j]);
2501 }
2502 uprv_free(fallbackList);
2503 return u_terminateChars(result, resultAvailable, len, status);
2504 }
2505 }
2506 uenum_reset(availableLocales, status);
2507
2508 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2509 uprv_free(fallbackList[i]);
2510 fallbackList[i] = uprv_strdup(tmp);
2511 } else {
2512 uprv_free(fallbackList[i]);
2513 fallbackList[i]=0;
2514 }
2515 }
2516 }
2517 if(outResult) {
2518 *outResult = ULOC_ACCEPT_FAILED;
2519 }
2520 }
2521 for(i=0;i<acceptListCount;i++) {
2522 uprv_free(fallbackList[i]);
2523 }
2524 uprv_free(fallbackList);
2525 return -1;
2526 }
2527
2528 /*eof*/
2529