1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2014-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9 #include <algorithm>
10
11 #include "unicode/utypes.h"
12 #include "unicode/unistr.h"
13 #include "unicode/uobject.h"
14
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "uassert.h"
19 #include "ucln_cmn.h"
20 #include "uhash.h"
21 #include "umutex.h"
22 #include "uresimp.h"
23 #include "uvector.h"
24 #include "udataswp.h" /* for InvChar functions */
25
26 static UHashtable* gLocExtKeyMap = NULL;
27 static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
28
29 // bit flags for special types
30 typedef enum {
31 SPECIALTYPE_NONE = 0,
32 SPECIALTYPE_CODEPOINTS = 1,
33 SPECIALTYPE_REORDER_CODE = 2,
34 SPECIALTYPE_RG_KEY_VALUE = 4
35 } SpecialType;
36
37 struct LocExtKeyData : public icu::UMemory {
38 const char* legacyId;
39 const char* bcpId;
40 icu::LocalUHashtablePointer typeMap;
41 uint32_t specialTypes;
42 };
43
44 struct LocExtType : public icu::UMemory {
45 const char* legacyId;
46 const char* bcpId;
47 };
48
49 static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = NULL;
50 static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = NULL;
51 static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = NULL;
52
53 U_CDECL_BEGIN
54
55 static UBool U_CALLCONV
uloc_key_type_cleanup(void)56 uloc_key_type_cleanup(void) {
57 if (gLocExtKeyMap != NULL) {
58 uhash_close(gLocExtKeyMap);
59 gLocExtKeyMap = NULL;
60 }
61
62 delete gLocExtKeyDataEntries;
63 gLocExtKeyDataEntries = NULL;
64
65 delete gLocExtTypeEntries;
66 gLocExtTypeEntries = NULL;
67
68 delete gKeyTypeStringPool;
69 gKeyTypeStringPool = NULL;
70
71 gLocExtKeyMapInitOnce.reset();
72 return TRUE;
73 }
74
75 U_CDECL_END
76
77
78 static void U_CALLCONV
initFromResourceBundle(UErrorCode & sts)79 initFromResourceBundle(UErrorCode& sts) {
80 U_NAMESPACE_USE
81 ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
82
83 gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
84
85 LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
86 LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
87 LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
88
89 if (U_FAILURE(sts)) {
90 return;
91 }
92
93 UErrorCode tmpSts = U_ZERO_ERROR;
94 LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
95 tmpSts = U_ZERO_ERROR;
96 LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
97
98 // initialize pools storing dynamically allocated objects
99 gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>;
100 if (gKeyTypeStringPool == NULL) {
101 sts = U_MEMORY_ALLOCATION_ERROR;
102 return;
103 }
104 gLocExtKeyDataEntries = new icu::MemoryPool<LocExtKeyData>;
105 if (gLocExtKeyDataEntries == NULL) {
106 sts = U_MEMORY_ALLOCATION_ERROR;
107 return;
108 }
109 gLocExtTypeEntries = new icu::MemoryPool<LocExtType>;
110 if (gLocExtTypeEntries == NULL) {
111 sts = U_MEMORY_ALLOCATION_ERROR;
112 return;
113 }
114
115 // iterate through keyMap resource
116 LocalUResourceBundlePointer keyMapEntry;
117
118 while (ures_hasNext(keyMapRes.getAlias())) {
119 keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
120 if (U_FAILURE(sts)) {
121 break;
122 }
123 const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
124 UnicodeString uBcpKeyId = ures_getUnicodeString(keyMapEntry.getAlias(), &sts);
125 if (U_FAILURE(sts)) {
126 break;
127 }
128
129 // empty value indicates that BCP key is same with the legacy key.
130 const char* bcpKeyId = legacyKeyId;
131 if (!uBcpKeyId.isEmpty()) {
132 icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create();
133 if (bcpKeyIdBuf == NULL) {
134 sts = U_MEMORY_ALLOCATION_ERROR;
135 break;
136 }
137 bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts);
138 if (U_FAILURE(sts)) {
139 break;
140 }
141 bcpKeyId = bcpKeyIdBuf->data();
142 }
143
144 UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
145
146 UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
147 if (U_FAILURE(sts)) {
148 break;
149 }
150 uint32_t specialTypes = SPECIALTYPE_NONE;
151
152 LocalUResourceBundlePointer typeAliasResByKey;
153 LocalUResourceBundlePointer bcpTypeAliasResByKey;
154
155 if (typeAliasRes.isValid()) {
156 tmpSts = U_ZERO_ERROR;
157 typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
158 if (U_FAILURE(tmpSts)) {
159 typeAliasResByKey.orphan();
160 }
161 }
162 if (bcpTypeAliasRes.isValid()) {
163 tmpSts = U_ZERO_ERROR;
164 bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
165 if (U_FAILURE(tmpSts)) {
166 bcpTypeAliasResByKey.orphan();
167 }
168 }
169
170 // look up type map for the key, and walk through the mapping data
171 tmpSts = U_ZERO_ERROR;
172 LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts));
173 if (U_FAILURE(tmpSts)) {
174 // type map for each key must exist
175 UPRV_UNREACHABLE;
176 } else {
177 LocalUResourceBundlePointer typeMapEntry;
178
179 while (ures_hasNext(typeMapResByKey.getAlias())) {
180 typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
181 if (U_FAILURE(sts)) {
182 break;
183 }
184 const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
185
186 // special types
187 if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
188 specialTypes |= SPECIALTYPE_CODEPOINTS;
189 continue;
190 }
191 if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
192 specialTypes |= SPECIALTYPE_REORDER_CODE;
193 continue;
194 }
195 if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
196 specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
197 continue;
198 }
199
200 if (isTZ) {
201 // a timezone key uses a colon instead of a slash in the resource.
202 // e.g. America:Los_Angeles
203 if (uprv_strchr(legacyTypeId, ':') != NULL) {
204 icu::CharString* legacyTypeIdBuf =
205 gKeyTypeStringPool->create(legacyTypeId, sts);
206 if (legacyTypeIdBuf == NULL) {
207 sts = U_MEMORY_ALLOCATION_ERROR;
208 break;
209 }
210 if (U_FAILURE(sts)) {
211 break;
212 }
213 std::replace(
214 legacyTypeIdBuf->data(),
215 legacyTypeIdBuf->data() + legacyTypeIdBuf->length(),
216 ':', '/');
217 legacyTypeId = legacyTypeIdBuf->data();
218 }
219 }
220
221 UnicodeString uBcpTypeId = ures_getUnicodeString(typeMapEntry.getAlias(), &sts);
222 if (U_FAILURE(sts)) {
223 break;
224 }
225
226 // empty value indicates that BCP type is same with the legacy type.
227 const char* bcpTypeId = legacyTypeId;
228 if (!uBcpTypeId.isEmpty()) {
229 icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create();
230 if (bcpTypeIdBuf == NULL) {
231 sts = U_MEMORY_ALLOCATION_ERROR;
232 break;
233 }
234 bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts);
235 if (U_FAILURE(sts)) {
236 break;
237 }
238 bcpTypeId = bcpTypeIdBuf->data();
239 }
240
241 // Note: legacy type value should never be
242 // equivalent to bcp type value of a different
243 // type under the same key. So we use a single
244 // map for lookup.
245 LocExtType* t = gLocExtTypeEntries->create();
246 if (t == NULL) {
247 sts = U_MEMORY_ALLOCATION_ERROR;
248 break;
249 }
250 t->bcpId = bcpTypeId;
251 t->legacyId = legacyTypeId;
252
253 uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
254 if (bcpTypeId != legacyTypeId) {
255 // different type value
256 uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
257 }
258 if (U_FAILURE(sts)) {
259 break;
260 }
261
262 // also put aliases in the map
263 if (typeAliasResByKey.isValid()) {
264 LocalUResourceBundlePointer typeAliasDataEntry;
265
266 ures_resetIterator(typeAliasResByKey.getAlias());
267 while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
268 int32_t toLen;
269 typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
270 const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
271 if (U_FAILURE(sts)) {
272 break;
273 }
274 // check if this is an alias of canoncal legacy type
275 if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
276 const char* from = ures_getKey(typeAliasDataEntry.getAlias());
277 if (isTZ) {
278 // replace colon with slash if necessary
279 if (uprv_strchr(from, ':') != NULL) {
280 icu::CharString* fromBuf =
281 gKeyTypeStringPool->create(from, sts);
282 if (fromBuf == NULL) {
283 sts = U_MEMORY_ALLOCATION_ERROR;
284 break;
285 }
286 if (U_FAILURE(sts)) {
287 break;
288 }
289 std::replace(
290 fromBuf->data(),
291 fromBuf->data() + fromBuf->length(),
292 ':', '/');
293 from = fromBuf->data();
294 }
295 }
296 uhash_put(typeDataMap, (void*)from, t, &sts);
297 }
298 }
299 if (U_FAILURE(sts)) {
300 break;
301 }
302 }
303
304 if (bcpTypeAliasResByKey.isValid()) {
305 LocalUResourceBundlePointer bcpTypeAliasDataEntry;
306
307 ures_resetIterator(bcpTypeAliasResByKey.getAlias());
308 while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
309 int32_t toLen;
310 bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
311 const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
312 if (U_FAILURE(sts)) {
313 break;
314 }
315 // check if this is an alias of bcp type
316 if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) {
317 const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
318 uhash_put(typeDataMap, (void*)from, t, &sts);
319 }
320 }
321 if (U_FAILURE(sts)) {
322 break;
323 }
324 }
325 }
326 }
327 if (U_FAILURE(sts)) {
328 break;
329 }
330
331 LocExtKeyData* keyData = gLocExtKeyDataEntries->create();
332 if (keyData == NULL) {
333 sts = U_MEMORY_ALLOCATION_ERROR;
334 break;
335 }
336 keyData->bcpId = bcpKeyId;
337 keyData->legacyId = legacyKeyId;
338 keyData->specialTypes = specialTypes;
339 keyData->typeMap.adoptInstead(typeDataMap);
340
341 uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
342 if (legacyKeyId != bcpKeyId) {
343 // different key value
344 uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
345 }
346 if (U_FAILURE(sts)) {
347 break;
348 }
349 }
350 }
351
352 static UBool
init()353 init() {
354 UErrorCode sts = U_ZERO_ERROR;
355 umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
356 if (U_FAILURE(sts)) {
357 return FALSE;
358 }
359 return TRUE;
360 }
361
362 static UBool
isSpecialTypeCodepoints(const char * val)363 isSpecialTypeCodepoints(const char* val) {
364 int32_t subtagLen = 0;
365 const char* p = val;
366 while (*p) {
367 if (*p == '-') {
368 if (subtagLen < 4 || subtagLen > 6) {
369 return FALSE;
370 }
371 subtagLen = 0;
372 } else if ((*p >= '0' && *p <= '9') ||
373 (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
374 (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
375 subtagLen++;
376 } else {
377 return FALSE;
378 }
379 p++;
380 }
381 return (subtagLen >= 4 && subtagLen <= 6);
382 }
383
384 static UBool
isSpecialTypeReorderCode(const char * val)385 isSpecialTypeReorderCode(const char* val) {
386 int32_t subtagLen = 0;
387 const char* p = val;
388 while (*p) {
389 if (*p == '-') {
390 if (subtagLen < 3 || subtagLen > 8) {
391 return FALSE;
392 }
393 subtagLen = 0;
394 } else if (uprv_isASCIILetter(*p)) {
395 subtagLen++;
396 } else {
397 return FALSE;
398 }
399 p++;
400 }
401 return (subtagLen >=3 && subtagLen <=8);
402 }
403
404 static UBool
isSpecialTypeRgKeyValue(const char * val)405 isSpecialTypeRgKeyValue(const char* val) {
406 int32_t subtagLen = 0;
407 const char* p = val;
408 while (*p) {
409 if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
410 (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
411 subtagLen++;
412 } else {
413 return FALSE;
414 }
415 p++;
416 }
417 return (subtagLen == 6);
418 }
419
420 U_CFUNC const char*
ulocimp_toBcpKey(const char * key)421 ulocimp_toBcpKey(const char* key) {
422 if (!init()) {
423 return NULL;
424 }
425
426 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
427 if (keyData != NULL) {
428 return keyData->bcpId;
429 }
430 return NULL;
431 }
432
433 U_CFUNC const char*
ulocimp_toLegacyKey(const char * key)434 ulocimp_toLegacyKey(const char* key) {
435 if (!init()) {
436 return NULL;
437 }
438
439 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
440 if (keyData != NULL) {
441 return keyData->legacyId;
442 }
443 return NULL;
444 }
445
446 U_CFUNC const char*
ulocimp_toBcpType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)447 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
448 if (isKnownKey != NULL) {
449 *isKnownKey = FALSE;
450 }
451 if (isSpecialType != NULL) {
452 *isSpecialType = FALSE;
453 }
454
455 if (!init()) {
456 return NULL;
457 }
458
459 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
460 if (keyData != NULL) {
461 if (isKnownKey != NULL) {
462 *isKnownKey = TRUE;
463 }
464 LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
465 if (t != NULL) {
466 return t->bcpId;
467 }
468 if (keyData->specialTypes != SPECIALTYPE_NONE) {
469 UBool matched = FALSE;
470 if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
471 matched = isSpecialTypeCodepoints(type);
472 }
473 if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
474 matched = isSpecialTypeReorderCode(type);
475 }
476 if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
477 matched = isSpecialTypeRgKeyValue(type);
478 }
479 if (matched) {
480 if (isSpecialType != NULL) {
481 *isSpecialType = TRUE;
482 }
483 return type;
484 }
485 }
486 }
487 return NULL;
488 }
489
490
491 U_CFUNC const char*
ulocimp_toLegacyType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)492 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
493 if (isKnownKey != NULL) {
494 *isKnownKey = FALSE;
495 }
496 if (isSpecialType != NULL) {
497 *isSpecialType = FALSE;
498 }
499
500 if (!init()) {
501 return NULL;
502 }
503
504 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
505 if (keyData != NULL) {
506 if (isKnownKey != NULL) {
507 *isKnownKey = TRUE;
508 }
509 LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
510 if (t != NULL) {
511 return t->legacyId;
512 }
513 if (keyData->specialTypes != SPECIALTYPE_NONE) {
514 UBool matched = FALSE;
515 if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
516 matched = isSpecialTypeCodepoints(type);
517 }
518 if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
519 matched = isSpecialTypeReorderCode(type);
520 }
521 if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
522 matched = isSpecialTypeRgKeyValue(type);
523 }
524 if (matched) {
525 if (isSpecialType != NULL) {
526 *isSpecialType = TRUE;
527 }
528 return type;
529 }
530 }
531 }
532 return NULL;
533 }
534
535