1 /*
2 **********************************************************************
3 * Copyright (C) 1997-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File ULOC.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
22
23 /*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29 */
30
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
34
35 #include "putilimp.h"
36 #include "ustr_imp.h"
37 #include "ulocimp.h"
38 #include "umutex.h"
39 #include "cstring.h"
40 #include "cmemory.h"
41 #include "ucln_cmn.h"
42 #include "locmap.h"
43 #include "uarrsort.h"
44 #include "uenumimp.h"
45 #include "uassert.h"
46
47 #include <stdio.h> /* for sprintf */
48
49 /* ### Declarations **************************************************/
50
51 /* Locale stuff from locid.cpp */
52 U_CFUNC void locale_set_default(const char *id);
53 U_CFUNC const char *locale_get_default(void);
54 U_CFUNC int32_t
55 locale_getKeywords(const char *localeID,
56 char prev,
57 char *keywords, int32_t keywordCapacity,
58 char *values, int32_t valuesCapacity, int32_t *valLen,
59 UBool valuesToo,
60 UErrorCode *status);
61
62 /* ### Data tables **************************************************/
63
64 /**
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible. Includes 3-letter codes
67 * that lack a 2-letter equivalent.
68 *
69 * This list must be in sorted order. This list is returned directly
70 * to the user by some API.
71 *
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
73 * entries matched.
74 *
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry. The first list is visible to
77 * user code when this array is returned by API. The second list
78 * contains codes we support, but do not expose through user API.
79 *
80 * Notes
81 *
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
84 *
85 * The 3 character codes are the terminology codes like RFC 3066. This
86 * is compatible with prior ICU codes
87 *
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates. This avoids bad searches going from 3 to 2 character
91 * codes.
92 *
93 * The range qaa-qtz is reserved for local use
94 */
95 static const char * const LANGUAGES[] = {
96 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
97 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",
98 "ang", "anp", "apa",
99 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
100 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
101 "bai", "bal", "ban", "bas", "bat", "be", "bej",
102 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
103 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
104 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
105 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
106 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
107 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
108 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
109 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
110 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
111 "enm", "eo", "es", "et", "eu", "ewo", "fa",
112 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",
113 "fr", "frm", "fro", "frr", "frs", "fur", "fy",
114 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
115 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
116 "grc", "gsw", "gu", "gv", "gwi",
117 "ha", "hai", "haw", "he", "hi", "hil", "him",
118 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
119 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
120 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
121 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
122 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
123 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
124 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",
125 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
126 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
127 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
128 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
129 "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min",
130 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
131 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
132 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
133 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
134 "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub",
135 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
136 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
137 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
138 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
139 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
140 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
141 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",
142 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
143 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
144 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
145 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
146 "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter",
147 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
148 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
149 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
150 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
151 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
152 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
153 "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd",
154 "zu", "zun", "zxx", "zza",
155 NULL,
156 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
157 NULL
158 };
159 static const char* const DEPRECATED_LANGUAGES[]={
160 "in", "iw", "ji", "jw", NULL, NULL
161 };
162 static const char* const REPLACEMENT_LANGUAGES[]={
163 "id", "he", "yi", "jv", NULL, NULL
164 };
165
166 /**
167 * Table of 3-letter language codes.
168 *
169 * This is a lookup table used to convert 3-letter language codes to
170 * their 2-letter equivalent, where possible. It must be kept in sync
171 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
172 * same language as LANGUAGES_3[i]. The commented-out lines are
173 * copied from LANGUAGES to make eyeballing this baby easier.
174 *
175 * Where a 3-letter language code has no 2-letter equivalent, the
176 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
177 *
178 * This table should be terminated with a NULL entry, followed by a
179 * second list, and another NULL entry. The two lists correspond to
180 * the two lists in LANGUAGES.
181 */
182 static const char * const LANGUAGES_3[] = {
183 /* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
184 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
185 /* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */
186 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
187 /* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
188 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
189 /* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
190 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
191 /* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
192 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
193 /* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
194 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
195 /* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
196 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
197 /* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
198 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
199 /* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
200 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
201 /* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
202 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
203 /* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
204 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
205 /* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
206 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
207 /* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
208 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
209 /* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
210 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
211 /* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
212 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
213 /* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */
214 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
215 /* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */
216 "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
217 /* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
218 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
219 /* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */
220 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
221 /* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
222 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
223 /* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
224 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
225 /* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
226 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
227 /* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
228 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
229 /* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
230 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
231 /* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/
232 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
233 /* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
234 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
235 /* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */
236 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
237 /* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
238 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
239 /* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
240 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
241 /* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
242 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
243 /* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
244 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
245 /* "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", */
246 "mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min",
247 /* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
248 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
249 /* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
250 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
251 /* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
252 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
253 /* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
254 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
255 /* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */
256 "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
257 /* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
258 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
259 /* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
260 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
261 /* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
262 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
263 /* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
264 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
265 /* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
266 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
267 /* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
268 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
269 /* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */
270 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
271 /* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
272 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
273 /* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
274 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
275 /* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
276 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
277 /* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
278 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
279 /* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */
280 "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
281 /* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
282 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
283 /* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", */
284 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
285 /* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
286 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
287 /* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
288 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
289 /* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
290 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
291 /* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
292 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
293 /* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */
294 "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
295 /* "zu", "zun", "zxx", "zza", */
296 "zul", "zun", "zxx", "zza",
297 NULL,
298 /* "in", "iw", "ji", "jw", "sh", */
299 "ind", "heb", "yid", "jaw", "srp",
300 NULL
301 };
302
303 /**
304 * Table of 2-letter country codes.
305 *
306 * This list must be in sorted order. This list is returned directly
307 * to the user by some API.
308 *
309 * This list must be kept in sync with COUNTRIES_3, with corresponding
310 * entries matched.
311 *
312 * This table should be terminated with a NULL entry, followed by a
313 * second list, and another NULL entry. The first list is visible to
314 * user code when this array is returned by API. The second list
315 * contains codes we support, but do not expose through user API.
316 *
317 * Notes:
318 *
319 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
320 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
321 * new codes keeping the old ones for compatibility updated to include
322 * 1999/12/03 revisions *CWB*
323 *
324 * RO(ROM) is now RO(ROU) according to
325 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
326 */
327 static const char * const COUNTRIES[] = {
328 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
329 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
330 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
331 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
332 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
333 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
334 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
335 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
336 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
337 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
338 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
339 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
340 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
341 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
342 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
343 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
344 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
345 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
346 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
347 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
348 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
349 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
350 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
351 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
352 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
353 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
354 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
355 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
356 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
357 "WS", "YE", "YT", "ZA", "ZM", "ZW",
358 NULL,
359 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
360 NULL
361 };
362
363 static const char* const DEPRECATED_COUNTRIES[] ={
364 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
365 };
366 static const char* const REPLACEMENT_COUNTRIES[] = {
367 /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
368 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL /* replacement country codes */
369 };
370
371 /**
372 * Table of 3-letter country codes.
373 *
374 * This is a lookup table used to convert 3-letter country codes to
375 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
376 * For all valid i, COUNTRIES[i] must refer to the same country as
377 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
378 * to make eyeballing this baby easier.
379 *
380 * This table should be terminated with a NULL entry, followed by a
381 * second list, and another NULL entry. The two lists correspond to
382 * the two lists in COUNTRIES.
383 */
384 static const char * const COUNTRIES_3[] = {
385 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
386 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
387 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
388 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
389 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
390 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
391 /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
392 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
393 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
394 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
395 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
396 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
397 /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
398 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
399 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
400 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
401 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
402 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
403 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
404 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
405 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
406 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
407 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
408 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
409 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
410 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
411 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
412 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
413 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
414 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
415 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
416 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
417 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
418 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
419 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
420 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
421 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
422 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
423 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
424 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
425 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
426 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
427 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
428 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
429 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
430 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
431 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
432 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
433 /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
434 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
435 /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
436 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
437 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
438 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
439 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
440 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
441 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
442 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
443 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
444 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
445 NULL,
446 /* "FX", "CS", "RO", "TP", "YU", "ZR", */
447 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
448 NULL
449 };
450
451 typedef struct CanonicalizationMap {
452 const char *id; /* input ID */
453 const char *canonicalID; /* canonicalized output ID */
454 const char *keyword; /* keyword, or NULL if none */
455 const char *value; /* keyword value, or NULL if kw==NULL */
456 } CanonicalizationMap;
457
458 /**
459 * A map to canonicalize locale IDs. This handles a variety of
460 * different semantic kinds of transformations.
461 */
462 static const CanonicalizationMap CANONICALIZE_MAP[] = {
463 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
464 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
465 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
466 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
467 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
468 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
469 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
470 { "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */
471 { "de_1901", "de__1901", NULL, NULL }, /* registered name */
472 { "de_1906", "de__1906", NULL, NULL }, /* registered name */
473 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
474 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
475 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
476 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
477 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
478 { "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */
479 { "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */
480 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
481 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
482 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
483 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
484 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
485 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
486 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
487 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
488 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
489 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
490 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
491 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
492 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
493 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
494 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
495 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
496 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
497 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
498 { "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */
499 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
500 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
501 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
502 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
503 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
504 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
505 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
506 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
507 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
508 { "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */
509 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
510 { "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */
511 { "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */
512 { "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */
513 { "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */
514 { "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */
515 { "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */
516 };
517
518 typedef struct VariantMap {
519 const char *variant; /* input ID */
520 const char *keyword; /* keyword, or NULL if none */
521 const char *value; /* keyword value, or NULL if kw==NULL */
522 } VariantMap;
523
524 static const VariantMap VARIANT_MAP[] = {
525 { "EURO", "currency", "EUR" },
526 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
527 { "STROKE", "collation", "stroke" } /* Solaris variant */
528 };
529
530 /* ### BCP47 Conversion *******************************************/
531 /* Test if the locale id has BCP47 u extension and does not have '@' */
532 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
533 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
534 #define _ConvertBCP47(finalID, id, buffer, length,err) \
535 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
536 finalID=id; \
537 } else { \
538 finalID=buffer; \
539 }
540 /* Gets the size of the shortest subtag in the given localeID. */
getShortestSubtagLength(const char * localeID)541 static int32_t getShortestSubtagLength(const char *localeID) {
542 int32_t localeIDLength = uprv_strlen(localeID);
543 int32_t length = localeIDLength;
544 int32_t tmpLength = 0;
545 int32_t i;
546 UBool reset = TRUE;
547
548 for (i = 0; i < localeIDLength; i++) {
549 if (localeID[i] != '_' && localeID[i] != '-') {
550 if (reset) {
551 tmpLength = 0;
552 reset = FALSE;
553 }
554 tmpLength++;
555 } else {
556 if (tmpLength != 0 && tmpLength < length) {
557 length = tmpLength;
558 }
559 reset = TRUE;
560 }
561 }
562
563 return length;
564 }
565
566 /* ### Keywords **************************************************/
567
568 #define ULOC_KEYWORD_BUFFER_LEN 25
569 #define ULOC_MAX_NO_KEYWORDS 25
570
571 U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char * localeID)572 locale_getKeywordsStart(const char *localeID) {
573 const char *result = NULL;
574 if((result = uprv_strchr(localeID, '@')) != NULL) {
575 return result;
576 }
577 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
578 else {
579 /* We do this because the @ sign is variant, and the @ sign used on one
580 EBCDIC machine won't be compiled the same way on other EBCDIC based
581 machines. */
582 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
583 const uint8_t *charToFind = ebcdicSigns;
584 while(*charToFind) {
585 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
586 return result;
587 }
588 charToFind++;
589 }
590 }
591 #endif
592 return NULL;
593 }
594
595 /**
596 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
597 * @param keywordName incoming name to be canonicalized
598 * @param status return status (keyword too long)
599 * @return length of the keyword name
600 */
locale_canonKeywordName(char * buf,const char * keywordName,UErrorCode * status)601 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
602 {
603 int32_t i;
604 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
605
606 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
607 /* keyword name too long for internal buffer */
608 *status = U_INTERNAL_PROGRAM_ERROR;
609 return 0;
610 }
611
612 /* normalize the keyword name */
613 for(i = 0; i < keywordNameLen; i++) {
614 buf[i] = uprv_tolower(keywordName[i]);
615 }
616 buf[i] = 0;
617
618 return keywordNameLen;
619 }
620
621 typedef struct {
622 char keyword[ULOC_KEYWORD_BUFFER_LEN];
623 int32_t keywordLen;
624 const char *valueStart;
625 int32_t valueLen;
626 } KeywordStruct;
627
628 static int32_t U_CALLCONV
compareKeywordStructs(const void * context,const void * left,const void * right)629 compareKeywordStructs(const void *context, const void *left, const void *right) {
630 const char* leftString = ((const KeywordStruct *)left)->keyword;
631 const char* rightString = ((const KeywordStruct *)right)->keyword;
632 return uprv_strcmp(leftString, rightString);
633 }
634
635 /**
636 * Both addKeyword and addValue must already be in canonical form.
637 * Either both addKeyword and addValue are NULL, or neither is NULL.
638 * If they are not NULL they must be zero terminated.
639 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
640 */
641 static int32_t
_getKeywords(const char * localeID,char prev,char * keywords,int32_t keywordCapacity,char * values,int32_t valuesCapacity,int32_t * valLen,UBool valuesToo,const char * addKeyword,const char * addValue,UErrorCode * status)642 _getKeywords(const char *localeID,
643 char prev,
644 char *keywords, int32_t keywordCapacity,
645 char *values, int32_t valuesCapacity, int32_t *valLen,
646 UBool valuesToo,
647 const char* addKeyword,
648 const char* addValue,
649 UErrorCode *status)
650 {
651 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
652
653 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
654 int32_t numKeywords = 0;
655 const char* pos = localeID;
656 const char* equalSign = NULL;
657 const char* semicolon = NULL;
658 int32_t i = 0, j, n;
659 int32_t keywordsLen = 0;
660 int32_t valuesLen = 0;
661
662 if(prev == '@') { /* start of keyword definition */
663 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
664 do {
665 UBool duplicate = FALSE;
666 /* skip leading spaces */
667 while(*pos == ' ') {
668 pos++;
669 }
670 if (!*pos) { /* handle trailing "; " */
671 break;
672 }
673 if(numKeywords == maxKeywords) {
674 *status = U_INTERNAL_PROGRAM_ERROR;
675 return 0;
676 }
677 equalSign = uprv_strchr(pos, '=');
678 semicolon = uprv_strchr(pos, ';');
679 /* lack of '=' [foo@currency] is illegal */
680 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
681 if(!equalSign || (semicolon && semicolon<equalSign)) {
682 *status = U_INVALID_FORMAT_ERROR;
683 return 0;
684 }
685 /* need to normalize both keyword and keyword name */
686 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
687 /* keyword name too long for internal buffer */
688 *status = U_INTERNAL_PROGRAM_ERROR;
689 return 0;
690 }
691 for(i = 0, n = 0; i < equalSign - pos; ++i) {
692 if (pos[i] != ' ') {
693 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
694 }
695 }
696
697 /* zero-length keyword is an error. */
698 if (n == 0) {
699 *status = U_INVALID_FORMAT_ERROR;
700 return 0;
701 }
702
703 keywordList[numKeywords].keyword[n] = 0;
704 keywordList[numKeywords].keywordLen = n;
705 /* now grab the value part. First we skip the '=' */
706 equalSign++;
707 /* then we leading spaces */
708 while(*equalSign == ' ') {
709 equalSign++;
710 }
711
712 /* Premature end or zero-length value */
713 if (!equalSign || equalSign == semicolon) {
714 *status = U_INVALID_FORMAT_ERROR;
715 return 0;
716 }
717
718 keywordList[numKeywords].valueStart = equalSign;
719
720 pos = semicolon;
721 i = 0;
722 if(pos) {
723 while(*(pos - i - 1) == ' ') {
724 i++;
725 }
726 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
727 pos++;
728 } else {
729 i = (int32_t)uprv_strlen(equalSign);
730 while(i && equalSign[i-1] == ' ') {
731 i--;
732 }
733 keywordList[numKeywords].valueLen = i;
734 }
735 /* If this is a duplicate keyword, then ignore it */
736 for (j=0; j<numKeywords; ++j) {
737 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
738 duplicate = TRUE;
739 break;
740 }
741 }
742 if (!duplicate) {
743 ++numKeywords;
744 }
745 } while(pos);
746
747 /* Handle addKeyword/addValue. */
748 if (addKeyword != NULL) {
749 UBool duplicate = FALSE;
750 U_ASSERT(addValue != NULL);
751 /* Search for duplicate; if found, do nothing. Explicit keyword
752 overrides addKeyword. */
753 for (j=0; j<numKeywords; ++j) {
754 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
755 duplicate = TRUE;
756 break;
757 }
758 }
759 if (!duplicate) {
760 if (numKeywords == maxKeywords) {
761 *status = U_INTERNAL_PROGRAM_ERROR;
762 return 0;
763 }
764 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
765 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
766 keywordList[numKeywords].valueStart = addValue;
767 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
768 ++numKeywords;
769 }
770 } else {
771 U_ASSERT(addValue == NULL);
772 }
773
774 /* now we have a list of keywords */
775 /* we need to sort it */
776 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
777
778 /* Now construct the keyword part */
779 for(i = 0; i < numKeywords; i++) {
780 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
781 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
782 if(valuesToo) {
783 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
784 } else {
785 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
786 }
787 }
788 keywordsLen += keywordList[i].keywordLen + 1;
789 if(valuesToo) {
790 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
791 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
792 }
793 keywordsLen += keywordList[i].valueLen;
794
795 if(i < numKeywords - 1) {
796 if(keywordsLen < keywordCapacity) {
797 keywords[keywordsLen] = ';';
798 }
799 keywordsLen++;
800 }
801 }
802 if(values) {
803 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
804 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
805 values[valuesLen + keywordList[i].valueLen] = 0;
806 }
807 valuesLen += keywordList[i].valueLen + 1;
808 }
809 }
810 if(values) {
811 values[valuesLen] = 0;
812 if(valLen) {
813 *valLen = valuesLen;
814 }
815 }
816 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
817 } else {
818 return 0;
819 }
820 }
821
822 U_CFUNC int32_t
locale_getKeywords(const char * localeID,char prev,char * keywords,int32_t keywordCapacity,char * values,int32_t valuesCapacity,int32_t * valLen,UBool valuesToo,UErrorCode * status)823 locale_getKeywords(const char *localeID,
824 char prev,
825 char *keywords, int32_t keywordCapacity,
826 char *values, int32_t valuesCapacity, int32_t *valLen,
827 UBool valuesToo,
828 UErrorCode *status) {
829 return _getKeywords(localeID, prev, keywords, keywordCapacity,
830 values, valuesCapacity, valLen, valuesToo,
831 NULL, NULL, status);
832 }
833
834 U_CAPI int32_t U_EXPORT2
uloc_getKeywordValue(const char * localeID,const char * keywordName,char * buffer,int32_t bufferCapacity,UErrorCode * status)835 uloc_getKeywordValue(const char* localeID,
836 const char* keywordName,
837 char* buffer, int32_t bufferCapacity,
838 UErrorCode* status)
839 {
840 const char* startSearchHere = NULL;
841 const char* nextSeparator = NULL;
842 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
843 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
844 int32_t i = 0;
845 int32_t result = 0;
846
847 if(status && U_SUCCESS(*status) && localeID) {
848 char tempBuffer[ULOC_FULLNAME_CAPACITY];
849 const char* tmpLocaleID;
850
851 if (_hasBCP47Extension(localeID)) {
852 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
853 } else {
854 tmpLocaleID=localeID;
855 }
856
857 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
858 if(startSearchHere == NULL) {
859 /* no keywords, return at once */
860 return 0;
861 }
862
863 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
864 if(U_FAILURE(*status)) {
865 return 0;
866 }
867
868 /* find the first keyword */
869 while(startSearchHere) {
870 startSearchHere++;
871 /* skip leading spaces (allowed?) */
872 while(*startSearchHere == ' ') {
873 startSearchHere++;
874 }
875 nextSeparator = uprv_strchr(startSearchHere, '=');
876 /* need to normalize both keyword and keyword name */
877 if(!nextSeparator) {
878 break;
879 }
880 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
881 /* keyword name too long for internal buffer */
882 *status = U_INTERNAL_PROGRAM_ERROR;
883 return 0;
884 }
885 for(i = 0; i < nextSeparator - startSearchHere; i++) {
886 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
887 }
888 /* trim trailing spaces */
889 while(startSearchHere[i-1] == ' ') {
890 i--;
891 }
892 localeKeywordNameBuffer[i] = 0;
893
894 startSearchHere = uprv_strchr(nextSeparator, ';');
895
896 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
897 nextSeparator++;
898 while(*nextSeparator == ' ') {
899 nextSeparator++;
900 }
901 /* we actually found the keyword. Copy the value */
902 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
903 while(*(startSearchHere-1) == ' ') {
904 startSearchHere--;
905 }
906 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
907 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
908 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
909 i = (int32_t)uprv_strlen(nextSeparator);
910 while(nextSeparator[i - 1] == ' ') {
911 i--;
912 }
913 uprv_strncpy(buffer, nextSeparator, i);
914 result = u_terminateChars(buffer, bufferCapacity, i, status);
915 } else {
916 /* give a bigger buffer, please */
917 *status = U_BUFFER_OVERFLOW_ERROR;
918 if(startSearchHere) {
919 result = (int32_t)(startSearchHere - nextSeparator);
920 } else {
921 result = (int32_t)uprv_strlen(nextSeparator);
922 }
923 }
924 return result;
925 }
926 }
927 }
928 return 0;
929 }
930
931 U_CAPI int32_t U_EXPORT2
uloc_setKeywordValue(const char * keywordName,const char * keywordValue,char * buffer,int32_t bufferCapacity,UErrorCode * status)932 uloc_setKeywordValue(const char* keywordName,
933 const char* keywordValue,
934 char* buffer, int32_t bufferCapacity,
935 UErrorCode* status)
936 {
937 /* TODO: sorting. removal. */
938 int32_t keywordNameLen;
939 int32_t keywordValueLen;
940 int32_t bufLen;
941 int32_t needLen = 0;
942 int32_t foundValueLen;
943 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
944 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
945 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
946 int32_t i = 0;
947 int32_t rc;
948 char* nextSeparator = NULL;
949 char* nextEqualsign = NULL;
950 char* startSearchHere = NULL;
951 char* keywordStart = NULL;
952 char *insertHere = NULL;
953 if(U_FAILURE(*status)) {
954 return -1;
955 }
956 if(bufferCapacity>1) {
957 bufLen = (int32_t)uprv_strlen(buffer);
958 } else {
959 *status = U_ILLEGAL_ARGUMENT_ERROR;
960 return 0;
961 }
962 if(bufferCapacity<bufLen) {
963 /* The capacity is less than the length?! Is this NULL terminated? */
964 *status = U_ILLEGAL_ARGUMENT_ERROR;
965 return 0;
966 }
967 if(keywordValue && !*keywordValue) {
968 keywordValue = NULL;
969 }
970 if(keywordValue) {
971 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
972 } else {
973 keywordValueLen = 0;
974 }
975 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
976 if(U_FAILURE(*status)) {
977 return 0;
978 }
979 startSearchHere = (char*)locale_getKeywordsStart(buffer);
980 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
981 if(!keywordValue) { /* no keywords = nothing to remove */
982 return bufLen;
983 }
984
985 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
986 if(startSearchHere) { /* had a single @ */
987 needLen--; /* already had the @ */
988 /* startSearchHere points at the @ */
989 } else {
990 startSearchHere=buffer+bufLen;
991 }
992 if(needLen >= bufferCapacity) {
993 *status = U_BUFFER_OVERFLOW_ERROR;
994 return needLen; /* no change */
995 }
996 *startSearchHere = '@';
997 startSearchHere++;
998 uprv_strcpy(startSearchHere, keywordNameBuffer);
999 startSearchHere += keywordNameLen;
1000 *startSearchHere = '=';
1001 startSearchHere++;
1002 uprv_strcpy(startSearchHere, keywordValue);
1003 startSearchHere+=keywordValueLen;
1004 return needLen;
1005 } /* end shortcut - no @ */
1006
1007 keywordStart = startSearchHere;
1008 /* search for keyword */
1009 while(keywordStart) {
1010 keywordStart++;
1011 /* skip leading spaces (allowed?) */
1012 while(*keywordStart == ' ') {
1013 keywordStart++;
1014 }
1015 nextEqualsign = uprv_strchr(keywordStart, '=');
1016 /* need to normalize both keyword and keyword name */
1017 if(!nextEqualsign) {
1018 break;
1019 }
1020 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1021 /* keyword name too long for internal buffer */
1022 *status = U_INTERNAL_PROGRAM_ERROR;
1023 return 0;
1024 }
1025 for(i = 0; i < nextEqualsign - keywordStart; i++) {
1026 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1027 }
1028 /* trim trailing spaces */
1029 while(keywordStart[i-1] == ' ') {
1030 i--;
1031 }
1032 localeKeywordNameBuffer[i] = 0;
1033
1034 nextSeparator = uprv_strchr(nextEqualsign, ';');
1035 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1036 if(rc == 0) {
1037 nextEqualsign++;
1038 while(*nextEqualsign == ' ') {
1039 nextEqualsign++;
1040 }
1041 /* we actually found the keyword. Change the value */
1042 if (nextSeparator) {
1043 keywordAtEnd = 0;
1044 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1045 } else {
1046 keywordAtEnd = 1;
1047 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1048 }
1049 if(keywordValue) { /* adding a value - not removing */
1050 if(foundValueLen == keywordValueLen) {
1051 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1052 return bufLen; /* no change in size */
1053 } else if(foundValueLen > keywordValueLen) {
1054 int32_t delta = foundValueLen - keywordValueLen;
1055 if(nextSeparator) { /* RH side */
1056 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1057 }
1058 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1059 bufLen -= delta;
1060 buffer[bufLen]=0;
1061 return bufLen;
1062 } else { /* FVL < KVL */
1063 int32_t delta = keywordValueLen - foundValueLen;
1064 if((bufLen+delta) >= bufferCapacity) {
1065 *status = U_BUFFER_OVERFLOW_ERROR;
1066 return bufLen+delta;
1067 }
1068 if(nextSeparator) { /* RH side */
1069 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1070 }
1071 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1072 bufLen += delta;
1073 buffer[bufLen]=0;
1074 return bufLen;
1075 }
1076 } else { /* removing a keyword */
1077 if(keywordAtEnd) {
1078 /* zero out the ';' or '@' just before startSearchhere */
1079 keywordStart[-1] = 0;
1080 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1081 } else {
1082 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1083 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1084 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1085 }
1086 }
1087 } else if(rc<0){ /* end match keyword */
1088 /* could insert at this location. */
1089 insertHere = keywordStart;
1090 }
1091 keywordStart = nextSeparator;
1092 } /* end loop searching */
1093
1094 if(!keywordValue) {
1095 return bufLen; /* removal of non-extant keyword - no change */
1096 }
1097
1098 /* we know there is at least one keyword. */
1099 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1100 if(needLen >= bufferCapacity) {
1101 *status = U_BUFFER_OVERFLOW_ERROR;
1102 return needLen; /* no change */
1103 }
1104
1105 if(insertHere) {
1106 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1107 keywordStart = insertHere;
1108 } else {
1109 keywordStart = buffer+bufLen;
1110 *keywordStart = ';';
1111 keywordStart++;
1112 }
1113 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1114 keywordStart += keywordNameLen;
1115 *keywordStart = '=';
1116 keywordStart++;
1117 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1118 keywordStart+=keywordValueLen;
1119 if(insertHere) {
1120 *keywordStart = ';';
1121 keywordStart++;
1122 }
1123 buffer[needLen]=0;
1124 return needLen;
1125 }
1126
1127 /* ### ID parsing implementation **************************************************/
1128
1129 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1130
1131 /*returns TRUE if one of the special prefixes is here (s=string)
1132 'x-' or 'i-' */
1133 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1134
1135 /* Dot terminates it because of POSIX form where dot precedes the codepage
1136 * except for variant
1137 */
1138 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1139
_strnchr(const char * str,int32_t len,char c)1140 static char* _strnchr(const char* str, int32_t len, char c) {
1141 U_ASSERT(str != 0 && len >= 0);
1142 while (len-- != 0) {
1143 char d = *str;
1144 if (d == c) {
1145 return (char*) str;
1146 } else if (d == 0) {
1147 break;
1148 }
1149 ++str;
1150 }
1151 return NULL;
1152 }
1153
1154 /**
1155 * Lookup 'key' in the array 'list'. The array 'list' should contain
1156 * a NULL entry, followed by more entries, and a second NULL entry.
1157 *
1158 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1159 * COUNTRIES_3.
1160 */
_findIndex(const char * const * list,const char * key)1161 static int16_t _findIndex(const char* const* list, const char* key)
1162 {
1163 const char* const* anchor = list;
1164 int32_t pass = 0;
1165
1166 /* Make two passes through two NULL-terminated arrays at 'list' */
1167 while (pass++ < 2) {
1168 while (*list) {
1169 if (uprv_strcmp(key, *list) == 0) {
1170 return (int16_t)(list - anchor);
1171 }
1172 list++;
1173 }
1174 ++list; /* skip final NULL *CWB*/
1175 }
1176 return -1;
1177 }
1178
1179 /* count the length of src while copying it to dest; return strlen(src) */
1180 static U_INLINE int32_t
_copyCount(char * dest,int32_t destCapacity,const char * src)1181 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1182 const char *anchor;
1183 char c;
1184
1185 anchor=src;
1186 for(;;) {
1187 if((c=*src)==0) {
1188 return (int32_t)(src-anchor);
1189 }
1190 if(destCapacity<=0) {
1191 return (int32_t)((src-anchor)+uprv_strlen(src));
1192 }
1193 ++src;
1194 *dest++=c;
1195 --destCapacity;
1196 }
1197 }
1198
1199 U_CFUNC const char*
uloc_getCurrentCountryID(const char * oldID)1200 uloc_getCurrentCountryID(const char* oldID){
1201 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1202 if (offset >= 0) {
1203 return REPLACEMENT_COUNTRIES[offset];
1204 }
1205 return oldID;
1206 }
1207 U_CFUNC const char*
uloc_getCurrentLanguageID(const char * oldID)1208 uloc_getCurrentLanguageID(const char* oldID){
1209 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1210 if (offset >= 0) {
1211 return REPLACEMENT_LANGUAGES[offset];
1212 }
1213 return oldID;
1214 }
1215 /*
1216 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1217 * avoid duplicating code to handle the earlier locale ID pieces
1218 * in the functions for the later ones by
1219 * setting the *pEnd pointer to where they stopped parsing
1220 *
1221 * TODO try to use this in Locale
1222 */
1223 U_CFUNC int32_t
ulocimp_getLanguage(const char * localeID,char * language,int32_t languageCapacity,const char ** pEnd)1224 ulocimp_getLanguage(const char *localeID,
1225 char *language, int32_t languageCapacity,
1226 const char **pEnd) {
1227 int32_t i=0;
1228 int32_t offset;
1229 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1230
1231 /* if it starts with i- or x- then copy that prefix */
1232 if(_isIDPrefix(localeID)) {
1233 if(i<languageCapacity) {
1234 language[i]=(char)uprv_tolower(*localeID);
1235 }
1236 if(i<languageCapacity) {
1237 language[i+1]='-';
1238 }
1239 i+=2;
1240 localeID+=2;
1241 }
1242
1243 /* copy the language as far as possible and count its length */
1244 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1245 if(i<languageCapacity) {
1246 language[i]=(char)uprv_tolower(*localeID);
1247 }
1248 if(i<3) {
1249 lang[i]=(char)uprv_tolower(*localeID);
1250 }
1251 i++;
1252 localeID++;
1253 }
1254
1255 if(i==3) {
1256 /* convert 3 character code to 2 character code if possible *CWB*/
1257 offset=_findIndex(LANGUAGES_3, lang);
1258 if(offset>=0) {
1259 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1260 }
1261 }
1262
1263 if(pEnd!=NULL) {
1264 *pEnd=localeID;
1265 }
1266 return i;
1267 }
1268
1269 U_CFUNC int32_t
ulocimp_getScript(const char * localeID,char * script,int32_t scriptCapacity,const char ** pEnd)1270 ulocimp_getScript(const char *localeID,
1271 char *script, int32_t scriptCapacity,
1272 const char **pEnd)
1273 {
1274 int32_t idLen = 0;
1275
1276 if (pEnd != NULL) {
1277 *pEnd = localeID;
1278 }
1279
1280 /* copy the second item as far as possible and count its length */
1281 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1282 idLen++;
1283 }
1284
1285 /* If it's exactly 4 characters long, then it's a script and not a country. */
1286 if (idLen == 4) {
1287 int32_t i;
1288 if (pEnd != NULL) {
1289 *pEnd = localeID+idLen;
1290 }
1291 if(idLen > scriptCapacity) {
1292 idLen = scriptCapacity;
1293 }
1294 if (idLen >= 1) {
1295 script[0]=(char)uprv_toupper(*(localeID++));
1296 }
1297 for (i = 1; i < idLen; i++) {
1298 script[i]=(char)uprv_tolower(*(localeID++));
1299 }
1300 }
1301 else {
1302 idLen = 0;
1303 }
1304 return idLen;
1305 }
1306
1307 U_CFUNC int32_t
ulocimp_getCountry(const char * localeID,char * country,int32_t countryCapacity,const char ** pEnd)1308 ulocimp_getCountry(const char *localeID,
1309 char *country, int32_t countryCapacity,
1310 const char **pEnd)
1311 {
1312 int32_t idLen=0;
1313 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1314 int32_t offset;
1315
1316 /* copy the country as far as possible and count its length */
1317 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1318 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1319 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1320 }
1321 idLen++;
1322 }
1323
1324 /* the country should be either length 2 or 3 */
1325 if (idLen == 2 || idLen == 3) {
1326 UBool gotCountry = FALSE;
1327 /* convert 3 character code to 2 character code if possible *CWB*/
1328 if(idLen==3) {
1329 offset=_findIndex(COUNTRIES_3, cnty);
1330 if(offset>=0) {
1331 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1332 gotCountry = TRUE;
1333 }
1334 }
1335 if (!gotCountry) {
1336 int32_t i = 0;
1337 for (i = 0; i < idLen; i++) {
1338 if (i < countryCapacity) {
1339 country[i]=(char)uprv_toupper(localeID[i]);
1340 }
1341 }
1342 }
1343 localeID+=idLen;
1344 } else {
1345 idLen = 0;
1346 }
1347
1348 if(pEnd!=NULL) {
1349 *pEnd=localeID;
1350 }
1351
1352 return idLen;
1353 }
1354
1355 /**
1356 * @param needSeparator if true, then add leading '_' if any variants
1357 * are added to 'variant'
1358 */
1359 static int32_t
_getVariantEx(const char * localeID,char prev,char * variant,int32_t variantCapacity,UBool needSeparator)1360 _getVariantEx(const char *localeID,
1361 char prev,
1362 char *variant, int32_t variantCapacity,
1363 UBool needSeparator) {
1364 int32_t i=0;
1365
1366 /* get one or more variant tags and separate them with '_' */
1367 if(_isIDSeparator(prev)) {
1368 /* get a variant string after a '-' or '_' */
1369 while(!_isTerminator(*localeID)) {
1370 if (needSeparator) {
1371 if (i<variantCapacity) {
1372 variant[i] = '_';
1373 }
1374 ++i;
1375 needSeparator = FALSE;
1376 }
1377 if(i<variantCapacity) {
1378 variant[i]=(char)uprv_toupper(*localeID);
1379 if(variant[i]=='-') {
1380 variant[i]='_';
1381 }
1382 }
1383 i++;
1384 localeID++;
1385 }
1386 }
1387
1388 /* if there is no variant tag after a '-' or '_' then look for '@' */
1389 if(i==0) {
1390 if(prev=='@') {
1391 /* keep localeID */
1392 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1393 ++localeID; /* point after the '@' */
1394 } else {
1395 return 0;
1396 }
1397 while(!_isTerminator(*localeID)) {
1398 if (needSeparator) {
1399 if (i<variantCapacity) {
1400 variant[i] = '_';
1401 }
1402 ++i;
1403 needSeparator = FALSE;
1404 }
1405 if(i<variantCapacity) {
1406 variant[i]=(char)uprv_toupper(*localeID);
1407 if(variant[i]=='-' || variant[i]==',') {
1408 variant[i]='_';
1409 }
1410 }
1411 i++;
1412 localeID++;
1413 }
1414 }
1415
1416 return i;
1417 }
1418
1419 static int32_t
_getVariant(const char * localeID,char prev,char * variant,int32_t variantCapacity)1420 _getVariant(const char *localeID,
1421 char prev,
1422 char *variant, int32_t variantCapacity) {
1423 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1424 }
1425
1426 /**
1427 * Delete ALL instances of a variant from the given list of one or
1428 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1429 * @param variants the source string of one or more variants,
1430 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1431 * terminated; if it is, trailing zero will NOT be maintained.
1432 * @param variantsLen length of variants
1433 * @param toDelete variant to delete, without separators, e.g. "EURO"
1434 * or "PREEURO"; not zero terminated
1435 * @param toDeleteLen length of toDelete
1436 * @return number of characters deleted from variants
1437 */
1438 static int32_t
_deleteVariant(char * variants,int32_t variantsLen,const char * toDelete,int32_t toDeleteLen)1439 _deleteVariant(char* variants, int32_t variantsLen,
1440 const char* toDelete, int32_t toDeleteLen)
1441 {
1442 int32_t delta = 0; /* number of chars deleted */
1443 for (;;) {
1444 UBool flag = FALSE;
1445 if (variantsLen < toDeleteLen) {
1446 return delta;
1447 }
1448 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1449 (variantsLen == toDeleteLen ||
1450 (flag=(variants[toDeleteLen] == '_'))))
1451 {
1452 int32_t d = toDeleteLen + (flag?1:0);
1453 variantsLen -= d;
1454 delta += d;
1455 if (variantsLen > 0) {
1456 uprv_memmove(variants, variants+d, variantsLen);
1457 }
1458 } else {
1459 char* p = _strnchr(variants, variantsLen, '_');
1460 if (p == NULL) {
1461 return delta;
1462 }
1463 ++p;
1464 variantsLen -= (int32_t)(p - variants);
1465 variants = p;
1466 }
1467 }
1468 }
1469
1470 /* Keyword enumeration */
1471
1472 typedef struct UKeywordsContext {
1473 char* keywords;
1474 char* current;
1475 } UKeywordsContext;
1476
1477 static void U_CALLCONV
uloc_kw_closeKeywords(UEnumeration * enumerator)1478 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1479 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1480 uprv_free(enumerator->context);
1481 uprv_free(enumerator);
1482 }
1483
1484 static int32_t U_CALLCONV
uloc_kw_countKeywords(UEnumeration * en,UErrorCode * status)1485 uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1486 char *kw = ((UKeywordsContext *)en->context)->keywords;
1487 int32_t result = 0;
1488 while(*kw) {
1489 result++;
1490 kw += uprv_strlen(kw)+1;
1491 }
1492 return result;
1493 }
1494
1495 static const char* U_CALLCONV
uloc_kw_nextKeyword(UEnumeration * en,int32_t * resultLength,UErrorCode * status)1496 uloc_kw_nextKeyword(UEnumeration* en,
1497 int32_t* resultLength,
1498 UErrorCode* status) {
1499 const char* result = ((UKeywordsContext *)en->context)->current;
1500 int32_t len = 0;
1501 if(*result) {
1502 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1503 ((UKeywordsContext *)en->context)->current += len+1;
1504 } else {
1505 result = NULL;
1506 }
1507 if (resultLength) {
1508 *resultLength = len;
1509 }
1510 return result;
1511 }
1512
1513 static void U_CALLCONV
uloc_kw_resetKeywords(UEnumeration * en,UErrorCode * status)1514 uloc_kw_resetKeywords(UEnumeration* en,
1515 UErrorCode* status) {
1516 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1517 }
1518
1519 static const UEnumeration gKeywordsEnum = {
1520 NULL,
1521 NULL,
1522 uloc_kw_closeKeywords,
1523 uloc_kw_countKeywords,
1524 uenum_unextDefault,
1525 uloc_kw_nextKeyword,
1526 uloc_kw_resetKeywords
1527 };
1528
1529 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywordList(const char * keywordList,int32_t keywordListSize,UErrorCode * status)1530 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1531 {
1532 UKeywordsContext *myContext = NULL;
1533 UEnumeration *result = NULL;
1534
1535 if(U_FAILURE(*status)) {
1536 return NULL;
1537 }
1538 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1539 /* Null pointer test */
1540 if (result == NULL) {
1541 *status = U_MEMORY_ALLOCATION_ERROR;
1542 return NULL;
1543 }
1544 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1545 myContext = uprv_malloc(sizeof(UKeywordsContext));
1546 if (myContext == NULL) {
1547 *status = U_MEMORY_ALLOCATION_ERROR;
1548 uprv_free(result);
1549 return NULL;
1550 }
1551 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1552 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1553 myContext->keywords[keywordListSize] = 0;
1554 myContext->current = myContext->keywords;
1555 result->context = myContext;
1556 return result;
1557 }
1558
1559 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywords(const char * localeID,UErrorCode * status)1560 uloc_openKeywords(const char* localeID,
1561 UErrorCode* status)
1562 {
1563 int32_t i=0;
1564 char keywords[256];
1565 int32_t keywordsCapacity = 256;
1566 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1567 const char* tmpLocaleID;
1568
1569 if(status==NULL || U_FAILURE(*status)) {
1570 return 0;
1571 }
1572
1573 if (_hasBCP47Extension(localeID)) {
1574 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1575 } else {
1576 if (localeID==NULL) {
1577 localeID=uloc_getDefault();
1578 }
1579 tmpLocaleID=localeID;
1580 }
1581
1582 /* Skip the language */
1583 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1584 if(_isIDSeparator(*tmpLocaleID)) {
1585 const char *scriptID;
1586 /* Skip the script if available */
1587 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1588 if(scriptID != tmpLocaleID+1) {
1589 /* Found optional script */
1590 tmpLocaleID = scriptID;
1591 }
1592 /* Skip the Country */
1593 if (_isIDSeparator(*tmpLocaleID)) {
1594 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1595 if(_isIDSeparator(*tmpLocaleID)) {
1596 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1597 }
1598 }
1599 }
1600
1601 /* keywords are located after '@' */
1602 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1603 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1604 }
1605
1606 if(i) {
1607 return uloc_openKeywordList(keywords, i, status);
1608 } else {
1609 return NULL;
1610 }
1611 }
1612
1613
1614 /* bit-flags for 'options' parameter of _canonicalize */
1615 #define _ULOC_STRIP_KEYWORDS 0x2
1616 #define _ULOC_CANONICALIZE 0x1
1617
1618 #define OPTION_SET(options, mask) ((options & mask) != 0)
1619
1620 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1621 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1622
1623 /**
1624 * Canonicalize the given localeID, to level 1 or to level 2,
1625 * depending on the options. To specify level 1, pass in options=0.
1626 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1627 *
1628 * This is the code underlying uloc_getName and uloc_canonicalize.
1629 */
1630 static int32_t
_canonicalize(const char * localeID,char * result,int32_t resultCapacity,uint32_t options,UErrorCode * err)1631 _canonicalize(const char* localeID,
1632 char* result,
1633 int32_t resultCapacity,
1634 uint32_t options,
1635 UErrorCode* err) {
1636 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1637 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1638 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1639 const char* origLocaleID;
1640 const char* tmpLocaleID;
1641 const char* keywordAssign = NULL;
1642 const char* separatorIndicator = NULL;
1643 const char* addKeyword = NULL;
1644 const char* addValue = NULL;
1645 char* name;
1646 char* variant = NULL; /* pointer into name, or NULL */
1647
1648 if (U_FAILURE(*err)) {
1649 return 0;
1650 }
1651
1652 if (_hasBCP47Extension(localeID)) {
1653 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1654 } else {
1655 if (localeID==NULL) {
1656 localeID=uloc_getDefault();
1657 }
1658 tmpLocaleID=localeID;
1659 }
1660
1661 origLocaleID=tmpLocaleID;
1662
1663 /* if we are doing a full canonicalization, then put results in
1664 localeBuffer, if necessary; otherwise send them to result. */
1665 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1666 (result == NULL || resultCapacity < sizeof(localeBuffer))) {
1667 name = localeBuffer;
1668 nameCapacity = sizeof(localeBuffer);
1669 } else {
1670 name = result;
1671 nameCapacity = resultCapacity;
1672 }
1673
1674 /* get all pieces, one after another, and separate with '_' */
1675 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1676
1677 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1678 const char *d = uloc_getDefault();
1679
1680 len = (int32_t)uprv_strlen(d);
1681
1682 if (name != NULL) {
1683 uprv_strncpy(name, d, len);
1684 }
1685 } else if(_isIDSeparator(*tmpLocaleID)) {
1686 const char *scriptID;
1687
1688 ++fieldCount;
1689 if(len<nameCapacity) {
1690 name[len]='_';
1691 }
1692 ++len;
1693
1694 scriptSize=ulocimp_getScript(tmpLocaleID+1, name+len, nameCapacity-len, &scriptID);
1695 if(scriptSize > 0) {
1696 /* Found optional script */
1697 tmpLocaleID = scriptID;
1698 ++fieldCount;
1699 len+=scriptSize;
1700 if (_isIDSeparator(*tmpLocaleID)) {
1701 /* If there is something else, then we add the _ */
1702 if(len<nameCapacity) {
1703 name[len]='_';
1704 }
1705 ++len;
1706 }
1707 }
1708
1709 if (_isIDSeparator(*tmpLocaleID)) {
1710 const char *cntryID;
1711 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, name+len, nameCapacity-len, &cntryID);
1712 if (cntrySize > 0) {
1713 /* Found optional country */
1714 tmpLocaleID = cntryID;
1715 len+=cntrySize;
1716 }
1717 if(_isIDSeparator(*tmpLocaleID)) {
1718 /* If there is something else, then we add the _ if we found country before.*/
1719 if (cntrySize > 0) {
1720 ++fieldCount;
1721 if(len<nameCapacity) {
1722 name[len]='_';
1723 }
1724 ++len;
1725 }
1726
1727 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, name+len, nameCapacity-len);
1728 if (variantSize > 0) {
1729 variant = name+len;
1730 len += variantSize;
1731 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1732 }
1733 }
1734 }
1735 }
1736
1737 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1738 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1739 UBool done = FALSE;
1740 do {
1741 char c = *tmpLocaleID;
1742 switch (c) {
1743 case 0:
1744 case '@':
1745 done = TRUE;
1746 break;
1747 default:
1748 if (len<nameCapacity) {
1749 name[len] = c;
1750 }
1751 ++len;
1752 ++tmpLocaleID;
1753 break;
1754 }
1755 } while (!done);
1756 }
1757
1758 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1759 After this, tmpLocaleID either points to '@' or is NULL */
1760 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1761 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1762 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1763 }
1764
1765 /* Copy POSIX-style variant, if any [mr@FOO] */
1766 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1767 tmpLocaleID != NULL && keywordAssign == NULL) {
1768 for (;;) {
1769 char c = *tmpLocaleID;
1770 if (c == 0) {
1771 break;
1772 }
1773 if (len<nameCapacity) {
1774 name[len] = c;
1775 }
1776 ++len;
1777 ++tmpLocaleID;
1778 }
1779 }
1780
1781 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1782 /* Handle @FOO variant if @ is present and not followed by = */
1783 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1784 int32_t posixVariantSize;
1785 /* Add missing '_' if needed */
1786 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1787 do {
1788 if(len<nameCapacity) {
1789 name[len]='_';
1790 }
1791 ++len;
1792 ++fieldCount;
1793 } while(fieldCount<2);
1794 }
1795 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1796 (UBool)(variantSize > 0));
1797 if (posixVariantSize > 0) {
1798 if (variant == NULL) {
1799 variant = name+len;
1800 }
1801 len += posixVariantSize;
1802 variantSize += posixVariantSize;
1803 }
1804 }
1805
1806 /* Handle generic variants first */
1807 if (variant) {
1808 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1809 const char* variantToCompare = VARIANT_MAP[j].variant;
1810 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1811 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1812 len -= variantLen;
1813 if (variantLen > 0) {
1814 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1815 --len;
1816 }
1817 addKeyword = VARIANT_MAP[j].keyword;
1818 addValue = VARIANT_MAP[j].value;
1819 break;
1820 }
1821 }
1822 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1823 --len;
1824 }
1825 }
1826
1827 /* Look up the ID in the canonicalization map */
1828 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1829 const char* id = CANONICALIZE_MAP[j].id;
1830 int32_t n = (int32_t)uprv_strlen(id);
1831 if (len == n && uprv_strncmp(name, id, n) == 0) {
1832 if (n == 0 && tmpLocaleID != NULL) {
1833 break; /* Don't remap "" if keywords present */
1834 }
1835 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1836 if (CANONICALIZE_MAP[j].keyword) {
1837 addKeyword = CANONICALIZE_MAP[j].keyword;
1838 addValue = CANONICALIZE_MAP[j].value;
1839 }
1840 break;
1841 }
1842 }
1843 }
1844
1845 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1846 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1847 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1848 if(len<nameCapacity) {
1849 name[len]='@';
1850 }
1851 ++len;
1852 ++fieldCount;
1853 len += _getKeywords(tmpLocaleID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1854 addKeyword, addValue, err);
1855 } else if (addKeyword != NULL) {
1856 U_ASSERT(addValue != NULL);
1857 /* inelegant but works -- later make _getKeywords do this? */
1858 len += _copyCount(name+len, nameCapacity-len, "@");
1859 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1860 len += _copyCount(name+len, nameCapacity-len, "=");
1861 len += _copyCount(name+len, nameCapacity-len, addValue);
1862 }
1863 }
1864
1865 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1866 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1867 }
1868
1869 return u_terminateChars(result, resultCapacity, len, err);
1870 }
1871
1872 /* ### ID parsing API **************************************************/
1873
1874 U_CAPI int32_t U_EXPORT2
uloc_getParent(const char * localeID,char * parent,int32_t parentCapacity,UErrorCode * err)1875 uloc_getParent(const char* localeID,
1876 char* parent,
1877 int32_t parentCapacity,
1878 UErrorCode* err)
1879 {
1880 const char *lastUnderscore;
1881 int32_t i;
1882
1883 if (U_FAILURE(*err))
1884 return 0;
1885
1886 if (localeID == NULL)
1887 localeID = uloc_getDefault();
1888
1889 lastUnderscore=uprv_strrchr(localeID, '_');
1890 if(lastUnderscore!=NULL) {
1891 i=(int32_t)(lastUnderscore-localeID);
1892 } else {
1893 i=0;
1894 }
1895
1896 if(i>0 && parent != localeID) {
1897 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1898 }
1899 return u_terminateChars(parent, parentCapacity, i, err);
1900 }
1901
1902 U_CAPI int32_t U_EXPORT2
uloc_getLanguage(const char * localeID,char * language,int32_t languageCapacity,UErrorCode * err)1903 uloc_getLanguage(const char* localeID,
1904 char* language,
1905 int32_t languageCapacity,
1906 UErrorCode* err)
1907 {
1908 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1909 int32_t i=0;
1910
1911 if (err==NULL || U_FAILURE(*err)) {
1912 return 0;
1913 }
1914
1915 if(localeID==NULL) {
1916 localeID=uloc_getDefault();
1917 }
1918
1919 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1920 return u_terminateChars(language, languageCapacity, i, err);
1921 }
1922
1923 U_CAPI int32_t U_EXPORT2
uloc_getScript(const char * localeID,char * script,int32_t scriptCapacity,UErrorCode * err)1924 uloc_getScript(const char* localeID,
1925 char* script,
1926 int32_t scriptCapacity,
1927 UErrorCode* err)
1928 {
1929 int32_t i=0;
1930
1931 if(err==NULL || U_FAILURE(*err)) {
1932 return 0;
1933 }
1934
1935 if(localeID==NULL) {
1936 localeID=uloc_getDefault();
1937 }
1938
1939 /* skip the language */
1940 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1941 if(_isIDSeparator(*localeID)) {
1942 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1943 }
1944 return u_terminateChars(script, scriptCapacity, i, err);
1945 }
1946
1947 U_CAPI int32_t U_EXPORT2
uloc_getCountry(const char * localeID,char * country,int32_t countryCapacity,UErrorCode * err)1948 uloc_getCountry(const char* localeID,
1949 char* country,
1950 int32_t countryCapacity,
1951 UErrorCode* err)
1952 {
1953 int32_t i=0;
1954
1955 if(err==NULL || U_FAILURE(*err)) {
1956 return 0;
1957 }
1958
1959 if(localeID==NULL) {
1960 localeID=uloc_getDefault();
1961 }
1962
1963 /* Skip the language */
1964 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1965 if(_isIDSeparator(*localeID)) {
1966 const char *scriptID;
1967 /* Skip the script if available */
1968 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1969 if(scriptID != localeID+1) {
1970 /* Found optional script */
1971 localeID = scriptID;
1972 }
1973 if(_isIDSeparator(*localeID)) {
1974 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1975 }
1976 }
1977 return u_terminateChars(country, countryCapacity, i, err);
1978 }
1979
1980 U_CAPI int32_t U_EXPORT2
uloc_getVariant(const char * localeID,char * variant,int32_t variantCapacity,UErrorCode * err)1981 uloc_getVariant(const char* localeID,
1982 char* variant,
1983 int32_t variantCapacity,
1984 UErrorCode* err)
1985 {
1986 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1987 const char* tmpLocaleID;
1988 int32_t i=0;
1989
1990 if(err==NULL || U_FAILURE(*err)) {
1991 return 0;
1992 }
1993
1994 if (_hasBCP47Extension(localeID)) {
1995 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1996 } else {
1997 if (localeID==NULL) {
1998 localeID=uloc_getDefault();
1999 }
2000 tmpLocaleID=localeID;
2001 }
2002
2003 /* Skip the language */
2004 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2005 if(_isIDSeparator(*tmpLocaleID)) {
2006 const char *scriptID;
2007 /* Skip the script if available */
2008 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2009 if(scriptID != tmpLocaleID+1) {
2010 /* Found optional script */
2011 tmpLocaleID = scriptID;
2012 }
2013 /* Skip the Country */
2014 if (_isIDSeparator(*tmpLocaleID)) {
2015 const char *cntryID;
2016 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2017 if (cntryID != tmpLocaleID+1) {
2018 /* Found optional country */
2019 tmpLocaleID = cntryID;
2020 }
2021 if(_isIDSeparator(*tmpLocaleID)) {
2022 /* If there was no country ID, skip a possible extra IDSeparator */
2023 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2024 tmpLocaleID++;
2025 }
2026 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2027 }
2028 }
2029 }
2030
2031 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2032 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2033 /*
2034 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2035 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2036 }
2037 */
2038 return u_terminateChars(variant, variantCapacity, i, err);
2039 }
2040
2041 U_CAPI int32_t U_EXPORT2
uloc_getName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2042 uloc_getName(const char* localeID,
2043 char* name,
2044 int32_t nameCapacity,
2045 UErrorCode* err)
2046 {
2047 return _canonicalize(localeID, name, nameCapacity, 0, err);
2048 }
2049
2050 U_CAPI int32_t U_EXPORT2
uloc_getBaseName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2051 uloc_getBaseName(const char* localeID,
2052 char* name,
2053 int32_t nameCapacity,
2054 UErrorCode* err)
2055 {
2056 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2057 }
2058
2059 U_CAPI int32_t U_EXPORT2
uloc_canonicalize(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2060 uloc_canonicalize(const char* localeID,
2061 char* name,
2062 int32_t nameCapacity,
2063 UErrorCode* err)
2064 {
2065 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2066 }
2067
2068 U_CAPI const char* U_EXPORT2
uloc_getISO3Language(const char * localeID)2069 uloc_getISO3Language(const char* localeID)
2070 {
2071 int16_t offset;
2072 char lang[ULOC_LANG_CAPACITY];
2073 UErrorCode err = U_ZERO_ERROR;
2074
2075 if (localeID == NULL)
2076 {
2077 localeID = uloc_getDefault();
2078 }
2079 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2080 if (U_FAILURE(err))
2081 return "";
2082 offset = _findIndex(LANGUAGES, lang);
2083 if (offset < 0)
2084 return "";
2085 return LANGUAGES_3[offset];
2086 }
2087
2088 U_CAPI const char* U_EXPORT2
uloc_getISO3Country(const char * localeID)2089 uloc_getISO3Country(const char* localeID)
2090 {
2091 int16_t offset;
2092 char cntry[ULOC_LANG_CAPACITY];
2093 UErrorCode err = U_ZERO_ERROR;
2094
2095 if (localeID == NULL)
2096 {
2097 localeID = uloc_getDefault();
2098 }
2099 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2100 if (U_FAILURE(err))
2101 return "";
2102 offset = _findIndex(COUNTRIES, cntry);
2103 if (offset < 0)
2104 return "";
2105
2106 return COUNTRIES_3[offset];
2107 }
2108
2109 U_CAPI uint32_t U_EXPORT2
uloc_getLCID(const char * localeID)2110 uloc_getLCID(const char* localeID)
2111 {
2112 UErrorCode status = U_ZERO_ERROR;
2113 char langID[ULOC_FULLNAME_CAPACITY];
2114
2115 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2116 if (U_FAILURE(status)) {
2117 return 0;
2118 }
2119
2120 return uprv_convertToLCID(langID, localeID, &status);
2121 }
2122
2123 U_CAPI int32_t U_EXPORT2
uloc_getLocaleForLCID(uint32_t hostid,char * locale,int32_t localeCapacity,UErrorCode * status)2124 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2125 UErrorCode *status)
2126 {
2127 int32_t length;
2128 const char *posix = uprv_convertToPosix(hostid, status);
2129 if (U_FAILURE(*status) || posix == NULL) {
2130 return 0;
2131 }
2132 length = (int32_t)uprv_strlen(posix);
2133 if (length+1 > localeCapacity) {
2134 *status = U_BUFFER_OVERFLOW_ERROR;
2135 }
2136 else {
2137 uprv_strcpy(locale, posix);
2138 }
2139 return length;
2140 }
2141
2142 /* ### Default locale **************************************************/
2143
2144 U_CAPI const char* U_EXPORT2
uloc_getDefault()2145 uloc_getDefault()
2146 {
2147 return locale_get_default();
2148 }
2149
2150 U_CAPI void U_EXPORT2
uloc_setDefault(const char * newDefaultLocale,UErrorCode * err)2151 uloc_setDefault(const char* newDefaultLocale,
2152 UErrorCode* err)
2153 {
2154 if (U_FAILURE(*err))
2155 return;
2156 /* the error code isn't currently used for anything by this function*/
2157
2158 /* propagate change to C++ */
2159 locale_set_default(newDefaultLocale);
2160 }
2161
2162 /**
2163 * Returns a list of all language codes defined in ISO 639. This is a pointer
2164 * to an array of pointers to arrays of char. All of these pointers are owned
2165 * by ICU-- do not delete them, and do not write through them. The array is
2166 * terminated with a null pointer.
2167 */
2168 U_CAPI const char* const* U_EXPORT2
uloc_getISOLanguages()2169 uloc_getISOLanguages()
2170 {
2171 return LANGUAGES;
2172 }
2173
2174 /**
2175 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2176 * pointer to an array of pointers to arrays of char. All of these pointers are
2177 * owned by ICU-- do not delete them, and do not write through them. The array is
2178 * terminated with a null pointer.
2179 */
2180 U_CAPI const char* const* U_EXPORT2
uloc_getISOCountries()2181 uloc_getISOCountries()
2182 {
2183 return COUNTRIES;
2184 }
2185
2186
2187 /* this function to be moved into cstring.c later */
2188 static char gDecimal = 0;
2189
2190 static /* U_CAPI */
2191 double
2192 /* U_EXPORT2 */
_uloc_strtod(const char * start,char ** end)2193 _uloc_strtod(const char *start, char **end) {
2194 char *decimal;
2195 char *myEnd;
2196 char buf[30];
2197 double rv;
2198 if (!gDecimal) {
2199 char rep[5];
2200 /* For machines that decide to change the decimal on you,
2201 and try to be too smart with localization.
2202 This normally should be just a '.'. */
2203 sprintf(rep, "%+1.1f", 1.0);
2204 gDecimal = rep[2];
2205 }
2206
2207 if(gDecimal == '.') {
2208 return uprv_strtod(start, end); /* fall through to OS */
2209 } else {
2210 uprv_strncpy(buf, start, 29);
2211 buf[29]=0;
2212 decimal = uprv_strchr(buf, '.');
2213 if(decimal) {
2214 *decimal = gDecimal;
2215 } else {
2216 return uprv_strtod(start, end); /* no decimal point */
2217 }
2218 rv = uprv_strtod(buf, &myEnd);
2219 if(end) {
2220 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2221 }
2222 return rv;
2223 }
2224 }
2225
2226 typedef struct {
2227 float q;
2228 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2229 char *locale;
2230 } _acceptLangItem;
2231
2232 static int32_t U_CALLCONV
uloc_acceptLanguageCompare(const void * context,const void * a,const void * b)2233 uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2234 {
2235 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2236 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2237
2238 int32_t rc = 0;
2239 if(bb->q < aa->q) {
2240 rc = -1; /* A > B */
2241 } else if(bb->q > aa->q) {
2242 rc = 1; /* A < B */
2243 } else {
2244 rc = 0; /* A = B */
2245 }
2246
2247 if(rc==0) {
2248 rc = uprv_stricmp(aa->locale, bb->locale);
2249 }
2250
2251 #if defined(ULOC_DEBUG)
2252 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2253 aa->locale, aa->q,
2254 bb->locale, bb->q,
2255 rc);*/
2256 #endif
2257
2258 return rc;
2259 }
2260
2261 /*
2262 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2263 */
2264
2265 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguageFromHTTP(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char * httpAcceptLanguage,UEnumeration * availableLocales,UErrorCode * status)2266 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2267 const char *httpAcceptLanguage,
2268 UEnumeration* availableLocales,
2269 UErrorCode *status)
2270 {
2271 _acceptLangItem *j;
2272 _acceptLangItem smallBuffer[30];
2273 char **strs;
2274 char tmp[ULOC_FULLNAME_CAPACITY +1];
2275 int32_t n = 0;
2276 const char *itemEnd;
2277 const char *paramEnd;
2278 const char *s;
2279 const char *t;
2280 int32_t res;
2281 int32_t i;
2282 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2283 int32_t jSize;
2284 char *tempstr; /* Use for null pointer check */
2285
2286 j = smallBuffer;
2287 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2288 if(U_FAILURE(*status)) {
2289 return -1;
2290 }
2291
2292 for(s=httpAcceptLanguage;s&&*s;) {
2293 while(isspace(*s)) /* eat space at the beginning */
2294 s++;
2295 itemEnd=uprv_strchr(s,',');
2296 paramEnd=uprv_strchr(s,';');
2297 if(!itemEnd) {
2298 itemEnd = httpAcceptLanguage+l; /* end of string */
2299 }
2300 if(paramEnd && paramEnd<itemEnd) {
2301 /* semicolon (;) is closer than end (,) */
2302 t = paramEnd+1;
2303 if(*t=='q') {
2304 t++;
2305 }
2306 while(isspace(*t)) {
2307 t++;
2308 }
2309 if(*t=='=') {
2310 t++;
2311 }
2312 while(isspace(*t)) {
2313 t++;
2314 }
2315 j[n].q = (float)_uloc_strtod(t,NULL);
2316 } else {
2317 /* no semicolon - it's 1.0 */
2318 j[n].q = 1.0f;
2319 paramEnd = itemEnd;
2320 }
2321 j[n].dummy=0;
2322 /* eat spaces prior to semi */
2323 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2324 ;
2325 /* Check for null pointer from uprv_strndup */
2326 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2327 if (tempstr == NULL) {
2328 *status = U_MEMORY_ALLOCATION_ERROR;
2329 return -1;
2330 }
2331 j[n].locale = tempstr;
2332 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2333 if(strcmp(j[n].locale,tmp)) {
2334 uprv_free(j[n].locale);
2335 j[n].locale=uprv_strdup(tmp);
2336 }
2337 #if defined(ULOC_DEBUG)
2338 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2339 #endif
2340 n++;
2341 s = itemEnd;
2342 while(*s==',') { /* eat duplicate commas */
2343 s++;
2344 }
2345 if(n>=jSize) {
2346 if(j==smallBuffer) { /* overflowed the small buffer. */
2347 j = uprv_malloc(sizeof(j[0])*(jSize*2));
2348 if(j!=NULL) {
2349 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2350 }
2351 #if defined(ULOC_DEBUG)
2352 fprintf(stderr,"malloced at size %d\n", jSize);
2353 #endif
2354 } else {
2355 j = uprv_realloc(j, sizeof(j[0])*jSize*2);
2356 #if defined(ULOC_DEBUG)
2357 fprintf(stderr,"re-alloced at size %d\n", jSize);
2358 #endif
2359 }
2360 jSize *= 2;
2361 if(j==NULL) {
2362 *status = U_MEMORY_ALLOCATION_ERROR;
2363 return -1;
2364 }
2365 }
2366 }
2367 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2368 if(U_FAILURE(*status)) {
2369 if(j != smallBuffer) {
2370 #if defined(ULOC_DEBUG)
2371 fprintf(stderr,"freeing j %p\n", j);
2372 #endif
2373 uprv_free(j);
2374 }
2375 return -1;
2376 }
2377 strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
2378 /* Check for null pointer */
2379 if (strs == NULL) {
2380 uprv_free(j); /* Free to avoid memory leak */
2381 *status = U_MEMORY_ALLOCATION_ERROR;
2382 return -1;
2383 }
2384 for(i=0;i<n;i++) {
2385 #if defined(ULOC_DEBUG)
2386 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2387 #endif
2388 strs[i]=j[i].locale;
2389 }
2390 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2391 (const char**)strs, n, availableLocales, status);
2392 for(i=0;i<n;i++) {
2393 uprv_free(strs[i]);
2394 }
2395 uprv_free(strs);
2396 if(j != smallBuffer) {
2397 #if defined(ULOC_DEBUG)
2398 fprintf(stderr,"freeing j %p\n", j);
2399 #endif
2400 uprv_free(j);
2401 }
2402 return res;
2403 }
2404
2405
2406 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguage(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char ** acceptList,int32_t acceptListCount,UEnumeration * availableLocales,UErrorCode * status)2407 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2408 UAcceptResult *outResult, const char **acceptList,
2409 int32_t acceptListCount,
2410 UEnumeration* availableLocales,
2411 UErrorCode *status)
2412 {
2413 int32_t i,j;
2414 int32_t len;
2415 int32_t maxLen=0;
2416 char tmp[ULOC_FULLNAME_CAPACITY+1];
2417 const char *l;
2418 char **fallbackList;
2419 if(U_FAILURE(*status)) {
2420 return -1;
2421 }
2422 fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
2423 if(fallbackList==NULL) {
2424 *status = U_MEMORY_ALLOCATION_ERROR;
2425 return -1;
2426 }
2427 for(i=0;i<acceptListCount;i++) {
2428 #if defined(ULOC_DEBUG)
2429 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2430 #endif
2431 while((l=uenum_next(availableLocales, NULL, status))) {
2432 #if defined(ULOC_DEBUG)
2433 fprintf(stderr," %s\n", l);
2434 #endif
2435 len = (int32_t)uprv_strlen(l);
2436 if(!uprv_strcmp(acceptList[i], l)) {
2437 if(outResult) {
2438 *outResult = ULOC_ACCEPT_VALID;
2439 }
2440 #if defined(ULOC_DEBUG)
2441 fprintf(stderr, "MATCH! %s\n", l);
2442 #endif
2443 if(len>0) {
2444 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2445 }
2446 for(j=0;j<i;j++) {
2447 uprv_free(fallbackList[j]);
2448 }
2449 uprv_free(fallbackList);
2450 return u_terminateChars(result, resultAvailable, len, status);
2451 }
2452 if(len>maxLen) {
2453 maxLen = len;
2454 }
2455 }
2456 uenum_reset(availableLocales, status);
2457 /* save off parent info */
2458 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2459 fallbackList[i] = uprv_strdup(tmp);
2460 } else {
2461 fallbackList[i]=0;
2462 }
2463 }
2464
2465 for(maxLen--;maxLen>0;maxLen--) {
2466 for(i=0;i<acceptListCount;i++) {
2467 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2468 #if defined(ULOC_DEBUG)
2469 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2470 #endif
2471 while((l=uenum_next(availableLocales, NULL, status))) {
2472 #if defined(ULOC_DEBUG)
2473 fprintf(stderr," %s\n", l);
2474 #endif
2475 len = (int32_t)uprv_strlen(l);
2476 if(!uprv_strcmp(fallbackList[i], l)) {
2477 if(outResult) {
2478 *outResult = ULOC_ACCEPT_FALLBACK;
2479 }
2480 #if defined(ULOC_DEBUG)
2481 fprintf(stderr, "fallback MATCH! %s\n", l);
2482 #endif
2483 if(len>0) {
2484 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2485 }
2486 for(j=0;j<acceptListCount;j++) {
2487 uprv_free(fallbackList[j]);
2488 }
2489 uprv_free(fallbackList);
2490 return u_terminateChars(result, resultAvailable, len, status);
2491 }
2492 }
2493 uenum_reset(availableLocales, status);
2494
2495 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2496 uprv_free(fallbackList[i]);
2497 fallbackList[i] = uprv_strdup(tmp);
2498 } else {
2499 uprv_free(fallbackList[i]);
2500 fallbackList[i]=0;
2501 }
2502 }
2503 }
2504 if(outResult) {
2505 *outResult = ULOC_ACCEPT_FAILED;
2506 }
2507 }
2508 for(i=0;i<acceptListCount;i++) {
2509 uprv_free(fallbackList[i]);
2510 }
2511 uprv_free(fallbackList);
2512 return -1;
2513 }
2514
2515 /*eof*/
2516