1 /*
2 *******************************************************************************
3 * Copyright (C) 2002-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7 #include "unicode/utypes.h"
8
9 #if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
10
11 #include "unicode/resbund.h"
12 #include "cmemory.h"
13 #include "ustrfmt.h"
14 #include "locutil.h"
15 #include "charstr.h"
16 #include "ucln_cmn.h"
17 #include "uassert.h"
18 #include "umutex.h"
19
20 // see LocaleUtility::getAvailableLocaleNames
21 static icu::UInitOnce LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;
22 static icu::Hashtable * LocaleUtility_cache = NULL;
23
24 #define UNDERSCORE_CHAR ((UChar)0x005f)
25 #define AT_SIGN_CHAR ((UChar)64)
26 #define PERIOD_CHAR ((UChar)46)
27
28 /*
29 ******************************************************************
30 */
31
32 /**
33 * Release all static memory held by Locale Utility.
34 */
35 U_CDECL_BEGIN
service_cleanup(void)36 static UBool U_CALLCONV service_cleanup(void) {
37 if (LocaleUtility_cache) {
38 delete LocaleUtility_cache;
39 LocaleUtility_cache = NULL;
40 }
41 return TRUE;
42 }
43
44
locale_utility_init(UErrorCode & status)45 static void U_CALLCONV locale_utility_init(UErrorCode &status) {
46 using namespace icu;
47 U_ASSERT(LocaleUtility_cache == NULL);
48 ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
49 LocaleUtility_cache = new Hashtable(status);
50 if (U_FAILURE(status)) {
51 delete LocaleUtility_cache;
52 LocaleUtility_cache = NULL;
53 return;
54 }
55 if (LocaleUtility_cache == NULL) {
56 status = U_MEMORY_ALLOCATION_ERROR;
57 return;
58 }
59 LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable);
60 }
61
62 U_CDECL_END
63
64 U_NAMESPACE_BEGIN
65
66 UnicodeString&
canonicalLocaleString(const UnicodeString * id,UnicodeString & result)67 LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
68 {
69 if (id == NULL) {
70 result.setToBogus();
71 } else {
72 // Fix case only (no other changes) up to the first '@' or '.' or
73 // end of string, whichever comes first. In 3.0 I changed this to
74 // stop at first '@' or '.'. It used to run out to the end of
75 // string. My fix makes the tests pass but is probably
76 // structurally incorrect. See below. [alan 3.0]
77
78 // TODO: Doug, you might want to revise this...
79 result = *id;
80 int32_t i = 0;
81 int32_t end = result.indexOf(AT_SIGN_CHAR);
82 int32_t n = result.indexOf(PERIOD_CHAR);
83 if (n >= 0 && n < end) {
84 end = n;
85 }
86 if (end < 0) {
87 end = result.length();
88 }
89 n = result.indexOf(UNDERSCORE_CHAR);
90 if (n < 0) {
91 n = end;
92 }
93 for (; i < n; ++i) {
94 UChar c = result.charAt(i);
95 if (c >= 0x0041 && c <= 0x005a) {
96 c += 0x20;
97 result.setCharAt(i, c);
98 }
99 }
100 for (n = end; i < n; ++i) {
101 UChar c = result.charAt(i);
102 if (c >= 0x0061 && c <= 0x007a) {
103 c -= 0x20;
104 result.setCharAt(i, c);
105 }
106 }
107 }
108 return result;
109
110 #if 0
111 // This code does a proper full level 2 canonicalization of id.
112 // It's nasty to go from UChar to char to char to UChar -- but
113 // that's what you have to do to use the uloc_canonicalize
114 // function on UnicodeStrings.
115
116 // I ended up doing the alternate fix (see above) not for
117 // performance reasons, although performance will certainly be
118 // better, but because doing a full level 2 canonicalization
119 // causes some tests to fail. [alan 3.0]
120
121 // TODO: Doug, you might want to revisit this...
122 result.setToBogus();
123 if (id != 0) {
124 int32_t buflen = id->length() + 8; // space for NUL
125 char* buf = (char*) uprv_malloc(buflen);
126 char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
127 if (buf != 0 && canon != 0) {
128 U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
129 UErrorCode ec = U_ZERO_ERROR;
130 uloc_canonicalize(buf, canon, buflen, &ec);
131 if (U_SUCCESS(ec)) {
132 result = UnicodeString(canon);
133 }
134 }
135 uprv_free(buf);
136 uprv_free(canon);
137 }
138 return result;
139 #endif
140 }
141
142 Locale&
initLocaleFromName(const UnicodeString & id,Locale & result)143 LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
144 {
145 enum { BUFLEN = 128 }; // larger than ever needed
146
147 if (id.isBogus() || id.length() >= BUFLEN) {
148 result.setToBogus();
149 } else {
150 /*
151 * We need to convert from a UnicodeString to char * in order to
152 * create a Locale.
153 *
154 * Problem: Locale ID strings may contain '@' which is a variant
155 * character and cannot be handled by invariant-character conversion.
156 *
157 * Hack: Since ICU code can handle locale IDs with multiple encodings
158 * of '@' (at least for EBCDIC; it's not known to be a problem for
159 * ASCII-based systems),
160 * we use regular invariant-character conversion for everything else
161 * and manually convert U+0040 into a compiler-char-constant '@'.
162 * While this compilation-time constant may not match the runtime
163 * encoding of '@', it should be one of the encodings which ICU
164 * recognizes.
165 *
166 * There should be only at most one '@' in a locale ID.
167 */
168 char buffer[BUFLEN];
169 int32_t prev, i;
170 prev = 0;
171 for(;;) {
172 i = id.indexOf((UChar)0x40, prev);
173 if(i < 0) {
174 // no @ between prev and the rest of the string
175 id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
176 break; // done
177 } else {
178 // normal invariant-character conversion for text between @s
179 id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
180 // manually "convert" U+0040 at id[i] into '@' at buffer[i]
181 buffer[i] = '@';
182 prev = i + 1;
183 }
184 }
185 result = Locale::createFromName(buffer);
186 }
187 return result;
188 }
189
190 UnicodeString&
initNameFromLocale(const Locale & locale,UnicodeString & result)191 LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
192 {
193 if (locale.isBogus()) {
194 result.setToBogus();
195 } else {
196 result.append(UnicodeString(locale.getName(), -1, US_INV));
197 }
198 return result;
199 }
200
201 const Hashtable*
getAvailableLocaleNames(const UnicodeString & bundleID)202 LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
203 {
204 // LocaleUtility_cache is a hash-of-hashes. The top-level keys
205 // are path strings ('bundleID') passed to
206 // ures_openAvailableLocales. The top-level values are
207 // second-level hashes. The second-level keys are result strings
208 // from ures_openAvailableLocales. The second-level values are
209 // garbage ((void*)1 or other random pointer).
210
211 UErrorCode status = U_ZERO_ERROR;
212 umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);
213 Hashtable *cache = LocaleUtility_cache;
214 if (cache == NULL) {
215 // Catastrophic failure.
216 return NULL;
217 }
218
219 Hashtable* htp;
220 umtx_lock(NULL);
221 htp = (Hashtable*) cache->get(bundleID);
222 umtx_unlock(NULL);
223
224 if (htp == NULL) {
225 htp = new Hashtable(status);
226 if (htp && U_SUCCESS(status)) {
227 CharString cbundleID;
228 cbundleID.appendInvariantChars(bundleID, status);
229 const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
230 UEnumeration *uenum = ures_openAvailableLocales(path, &status);
231 for (;;) {
232 const UChar* id = uenum_unext(uenum, NULL, &status);
233 if (id == NULL) {
234 break;
235 }
236 htp->put(UnicodeString(id), (void*)htp, status);
237 }
238 uenum_close(uenum);
239 if (U_FAILURE(status)) {
240 delete htp;
241 return NULL;
242 }
243 umtx_lock(NULL);
244 Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID));
245 if (t != NULL) {
246 // Another thread raced through this code, creating the cache entry first.
247 // Discard ours and return theirs.
248 umtx_unlock(NULL);
249 delete htp;
250 htp = t;
251 } else {
252 cache->put(bundleID, (void*)htp, status);
253 umtx_unlock(NULL);
254 }
255 }
256 }
257 return htp;
258 }
259
260 UBool
isFallbackOf(const UnicodeString & root,const UnicodeString & child)261 LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
262 {
263 return child.indexOf(root) == 0 &&
264 (child.length() == root.length() ||
265 child.charAt(root.length()) == UNDERSCORE_CHAR);
266 }
267
268 U_NAMESPACE_END
269
270 /* !UCONFIG_NO_SERVICE */
271 #endif
272
273
274