1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2014-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9 #include "unicode/utypes.h"
10
11 #include "cstring.h"
12 #include "uassert.h"
13 #include "ucln_cmn.h"
14 #include "uhash.h"
15 #include "umutex.h"
16 #include "uresimp.h"
17 #include "uvector.h"
18 #include "udataswp.h" /* for InvChar functions */
19
20 static UHashtable* gLocExtKeyMap = NULL;
21 static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
22 static icu::UVector* gKeyTypeStringPool = NULL;
23 static icu::UVector* gLocExtKeyDataEntries = NULL;
24 static icu::UVector* gLocExtTypeEntries = NULL;
25
26 // bit flags for special types
27 typedef enum {
28 SPECIALTYPE_NONE = 0,
29 SPECIALTYPE_CODEPOINTS = 1,
30 SPECIALTYPE_REORDER_CODE = 2,
31 SPECIALTYPE_RG_KEY_VALUE = 4
32 } SpecialType;
33
34 typedef struct LocExtKeyData {
35 const char* legacyId;
36 const char* bcpId;
37 UHashtable* typeMap;
38 uint32_t specialTypes;
39 } LocExtKeyData;
40
41 typedef struct LocExtType {
42 const char* legacyId;
43 const char* bcpId;
44 } LocExtType;
45
46 U_CDECL_BEGIN
47
48 static UBool U_CALLCONV
uloc_key_type_cleanup(void)49 uloc_key_type_cleanup(void) {
50 if (gLocExtKeyMap != NULL) {
51 uhash_close(gLocExtKeyMap);
52 gLocExtKeyMap = NULL;
53 }
54
55 delete gLocExtKeyDataEntries;
56 gLocExtKeyDataEntries = NULL;
57
58 delete gLocExtTypeEntries;
59 gLocExtTypeEntries = NULL;
60
61 delete gKeyTypeStringPool;
62 gKeyTypeStringPool = NULL;
63
64 gLocExtKeyMapInitOnce.reset();
65 return TRUE;
66 }
67
68 static void U_CALLCONV
uloc_deleteKeyTypeStringPoolEntry(void * obj)69 uloc_deleteKeyTypeStringPoolEntry(void* obj) {
70 uprv_free(obj);
71 }
72
73 static void U_CALLCONV
uloc_deleteKeyDataEntry(void * obj)74 uloc_deleteKeyDataEntry(void* obj) {
75 LocExtKeyData* keyData = (LocExtKeyData*)obj;
76 if (keyData->typeMap != NULL) {
77 uhash_close(keyData->typeMap);
78 }
79 uprv_free(keyData);
80 }
81
82 static void U_CALLCONV
uloc_deleteTypeEntry(void * obj)83 uloc_deleteTypeEntry(void* obj) {
84 uprv_free(obj);
85 }
86
87 U_CDECL_END
88
89
90 static void U_CALLCONV
initFromResourceBundle(UErrorCode & sts)91 initFromResourceBundle(UErrorCode& sts) {
92 U_NAMESPACE_USE
93 ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
94
95 gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
96
97 LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
98 LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
99 LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
100
101 if (U_FAILURE(sts)) {
102 return;
103 }
104
105 UErrorCode tmpSts = U_ZERO_ERROR;
106 LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
107 tmpSts = U_ZERO_ERROR;
108 LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
109
110 // initialize vectors storing dynamically allocated objects
111 gKeyTypeStringPool = new UVector(uloc_deleteKeyTypeStringPoolEntry, NULL, sts);
112 if (gKeyTypeStringPool == NULL) {
113 if (U_SUCCESS(sts)) {
114 sts = U_MEMORY_ALLOCATION_ERROR;
115 }
116 }
117 if (U_FAILURE(sts)) {
118 return;
119 }
120 gLocExtKeyDataEntries = new UVector(uloc_deleteKeyDataEntry, NULL, sts);
121 if (gLocExtKeyDataEntries == NULL) {
122 if (U_SUCCESS(sts)) {
123 sts = U_MEMORY_ALLOCATION_ERROR;
124 }
125 }
126 if (U_FAILURE(sts)) {
127 return;
128 }
129 gLocExtTypeEntries = new UVector(uloc_deleteTypeEntry, NULL, sts);
130 if (gLocExtTypeEntries == NULL) {
131 if (U_SUCCESS(sts)) {
132 sts = U_MEMORY_ALLOCATION_ERROR;
133 }
134 }
135 if (U_FAILURE(sts)) {
136 return;
137 }
138
139 // iterate through keyMap resource
140 LocalUResourceBundlePointer keyMapEntry;
141
142 while (ures_hasNext(keyMapRes.getAlias())) {
143 keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
144 if (U_FAILURE(sts)) {
145 break;
146 }
147 const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
148 int32_t bcpKeyIdLen = 0;
149 const UChar* uBcpKeyId = ures_getString(keyMapEntry.getAlias(), &bcpKeyIdLen, &sts);
150 if (U_FAILURE(sts)) {
151 break;
152 }
153
154 // empty value indicates that BCP key is same with the legacy key.
155 const char* bcpKeyId = legacyKeyId;
156 if (bcpKeyIdLen > 0) {
157 char* bcpKeyIdBuf = (char*)uprv_malloc(bcpKeyIdLen + 1);
158 if (bcpKeyIdBuf == NULL) {
159 sts = U_MEMORY_ALLOCATION_ERROR;
160 break;
161 }
162 u_UCharsToChars(uBcpKeyId, bcpKeyIdBuf, bcpKeyIdLen);
163 bcpKeyIdBuf[bcpKeyIdLen] = 0;
164 gKeyTypeStringPool->addElement(bcpKeyIdBuf, sts);
165 if (U_FAILURE(sts)) {
166 break;
167 }
168 bcpKeyId = bcpKeyIdBuf;
169 }
170
171 UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
172
173 UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
174 if (U_FAILURE(sts)) {
175 break;
176 }
177 uint32_t specialTypes = SPECIALTYPE_NONE;
178
179 LocalUResourceBundlePointer typeAliasResByKey;
180 LocalUResourceBundlePointer bcpTypeAliasResByKey;
181
182 if (typeAliasRes.isValid()) {
183 tmpSts = U_ZERO_ERROR;
184 typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
185 if (U_FAILURE(tmpSts)) {
186 typeAliasResByKey.orphan();
187 }
188 }
189 if (bcpTypeAliasRes.isValid()) {
190 tmpSts = U_ZERO_ERROR;
191 bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
192 if (U_FAILURE(tmpSts)) {
193 bcpTypeAliasResByKey.orphan();
194 }
195 }
196
197 // look up type map for the key, and walk through the mapping data
198 tmpSts = U_ZERO_ERROR;
199 LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts));
200 if (U_FAILURE(tmpSts)) {
201 // type map for each key must exist
202 U_ASSERT(FALSE);
203 } else {
204 LocalUResourceBundlePointer typeMapEntry;
205
206 while (ures_hasNext(typeMapResByKey.getAlias())) {
207 typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
208 if (U_FAILURE(sts)) {
209 break;
210 }
211 const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
212
213 // special types
214 if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
215 specialTypes |= SPECIALTYPE_CODEPOINTS;
216 continue;
217 }
218 if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
219 specialTypes |= SPECIALTYPE_REORDER_CODE;
220 continue;
221 }
222 if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
223 specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
224 continue;
225 }
226
227 if (isTZ) {
228 // a timezone key uses a colon instead of a slash in the resource.
229 // e.g. America:Los_Angeles
230 if (uprv_strchr(legacyTypeId, ':') != NULL) {
231 int32_t legacyTypeIdLen = static_cast<int32_t>(uprv_strlen(legacyTypeId));
232 char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);
233 if (legacyTypeIdBuf == NULL) {
234 sts = U_MEMORY_ALLOCATION_ERROR;
235 break;
236 }
237 const char* p = legacyTypeId;
238 char* q = legacyTypeIdBuf;
239 while (*p) {
240 if (*p == ':') {
241 *q++ = '/';
242 } else {
243 *q++ = *p;
244 }
245 p++;
246 }
247 *q = 0;
248
249 gKeyTypeStringPool->addElement(legacyTypeIdBuf, sts);
250 if (U_FAILURE(sts)) {
251 break;
252 }
253 legacyTypeId = legacyTypeIdBuf;
254 }
255 }
256
257 int32_t bcpTypeIdLen = 0;
258 const UChar* uBcpTypeId = ures_getString(typeMapEntry.getAlias(), &bcpTypeIdLen, &sts);
259 if (U_FAILURE(sts)) {
260 break;
261 }
262
263 // empty value indicates that BCP type is same with the legacy type.
264 const char* bcpTypeId = legacyTypeId;
265 if (bcpTypeIdLen > 0) {
266 char* bcpTypeIdBuf = (char*)uprv_malloc(bcpTypeIdLen + 1);
267 if (bcpTypeIdBuf == NULL) {
268 sts = U_MEMORY_ALLOCATION_ERROR;
269 break;
270 }
271 u_UCharsToChars(uBcpTypeId, bcpTypeIdBuf, bcpTypeIdLen);
272 bcpTypeIdBuf[bcpTypeIdLen] = 0;
273 gKeyTypeStringPool->addElement(bcpTypeIdBuf, sts);
274 if (U_FAILURE(sts)) {
275 break;
276 }
277 bcpTypeId = bcpTypeIdBuf;
278 }
279
280 // Note: legacy type value should never be
281 // equivalent to bcp type value of a different
282 // type under the same key. So we use a single
283 // map for lookup.
284 LocExtType* t = (LocExtType*)uprv_malloc(sizeof(LocExtType));
285 if (t == NULL) {
286 sts = U_MEMORY_ALLOCATION_ERROR;
287 break;
288 }
289 t->bcpId = bcpTypeId;
290 t->legacyId = legacyTypeId;
291 gLocExtTypeEntries->addElement((void*)t, sts);
292 if (U_FAILURE(sts)) {
293 break;
294 }
295
296 uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
297 if (bcpTypeId != legacyTypeId) {
298 // different type value
299 uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
300 }
301 if (U_FAILURE(sts)) {
302 break;
303 }
304
305 // also put aliases in the map
306 if (typeAliasResByKey.isValid()) {
307 LocalUResourceBundlePointer typeAliasDataEntry;
308
309 ures_resetIterator(typeAliasResByKey.getAlias());
310 while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
311 int32_t toLen;
312 typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
313 const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
314 if (U_FAILURE(sts)) {
315 break;
316 }
317 // check if this is an alias of canoncal legacy type
318 if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
319 const char* from = ures_getKey(typeAliasDataEntry.getAlias());
320 if (isTZ) {
321 // replace colon with slash if necessary
322 if (uprv_strchr(from, ':') != NULL) {
323 int32_t fromLen = static_cast<int32_t>(uprv_strlen(from));
324 char* fromBuf = (char*)uprv_malloc(fromLen + 1);
325 if (fromBuf == NULL) {
326 sts = U_MEMORY_ALLOCATION_ERROR;
327 break;
328 }
329 const char* p = from;
330 char* q = fromBuf;
331 while (*p) {
332 if (*p == ':') {
333 *q++ = '/';
334 } else {
335 *q++ = *p;
336 }
337 p++;
338 }
339 *q = 0;
340
341 gKeyTypeStringPool->addElement(fromBuf, sts);
342 if (U_FAILURE(sts)) {
343 break;
344 }
345 from = fromBuf;
346 }
347 }
348 uhash_put(typeDataMap, (void*)from, t, &sts);
349 }
350 }
351 if (U_FAILURE(sts)) {
352 break;
353 }
354 }
355
356 if (bcpTypeAliasResByKey.isValid()) {
357 LocalUResourceBundlePointer bcpTypeAliasDataEntry;
358
359 ures_resetIterator(bcpTypeAliasResByKey.getAlias());
360 while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
361 int32_t toLen;
362 bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
363 const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
364 if (U_FAILURE(sts)) {
365 break;
366 }
367 // check if this is an alias of bcp type
368 if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) {
369 const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
370 uhash_put(typeDataMap, (void*)from, t, &sts);
371 }
372 }
373 if (U_FAILURE(sts)) {
374 break;
375 }
376 }
377 }
378 }
379 if (U_FAILURE(sts)) {
380 break;
381 }
382
383 LocExtKeyData* keyData = (LocExtKeyData*)uprv_malloc(sizeof(LocExtKeyData));
384 if (keyData == NULL) {
385 sts = U_MEMORY_ALLOCATION_ERROR;
386 break;
387 }
388 keyData->bcpId = bcpKeyId;
389 keyData->legacyId = legacyKeyId;
390 keyData->specialTypes = specialTypes;
391 keyData->typeMap = typeDataMap;
392
393 gLocExtKeyDataEntries->addElement((void*)keyData, sts);
394 if (U_FAILURE(sts)) {
395 break;
396 }
397
398 uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
399 if (legacyKeyId != bcpKeyId) {
400 // different key value
401 uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
402 }
403 if (U_FAILURE(sts)) {
404 break;
405 }
406 }
407 }
408
409 static UBool
init()410 init() {
411 UErrorCode sts = U_ZERO_ERROR;
412 umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
413 if (U_FAILURE(sts)) {
414 return FALSE;
415 }
416 return TRUE;
417 }
418
419 static UBool
isSpecialTypeCodepoints(const char * val)420 isSpecialTypeCodepoints(const char* val) {
421 int32_t subtagLen = 0;
422 const char* p = val;
423 while (*p) {
424 if (*p == '-') {
425 if (subtagLen < 4 || subtagLen > 6) {
426 return FALSE;
427 }
428 subtagLen = 0;
429 } else if ((*p >= '0' && *p <= '9') ||
430 (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
431 (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
432 subtagLen++;
433 } else {
434 return FALSE;
435 }
436 p++;
437 }
438 return (subtagLen >= 4 && subtagLen <= 6);
439 }
440
441 static UBool
isSpecialTypeReorderCode(const char * val)442 isSpecialTypeReorderCode(const char* val) {
443 int32_t subtagLen = 0;
444 const char* p = val;
445 while (*p) {
446 if (*p == '-') {
447 if (subtagLen < 3 || subtagLen > 8) {
448 return FALSE;
449 }
450 subtagLen = 0;
451 } else if (uprv_isASCIILetter(*p)) {
452 subtagLen++;
453 } else {
454 return FALSE;
455 }
456 p++;
457 }
458 return (subtagLen >=3 && subtagLen <=8);
459 }
460
461 static UBool
isSpecialTypeRgKeyValue(const char * val)462 isSpecialTypeRgKeyValue(const char* val) {
463 int32_t subtagLen = 0;
464 const char* p = val;
465 while (*p) {
466 if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
467 (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
468 subtagLen++;
469 } else {
470 return FALSE;
471 }
472 p++;
473 }
474 return (subtagLen == 6);
475 }
476
477 U_CFUNC const char*
ulocimp_toBcpKey(const char * key)478 ulocimp_toBcpKey(const char* key) {
479 if (!init()) {
480 return NULL;
481 }
482
483 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
484 if (keyData != NULL) {
485 return keyData->bcpId;
486 }
487 return NULL;
488 }
489
490 U_CFUNC const char*
ulocimp_toLegacyKey(const char * key)491 ulocimp_toLegacyKey(const char* key) {
492 if (!init()) {
493 return NULL;
494 }
495
496 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
497 if (keyData != NULL) {
498 return keyData->legacyId;
499 }
500 return NULL;
501 }
502
503 U_CFUNC const char*
ulocimp_toBcpType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)504 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
505 if (isKnownKey != NULL) {
506 *isKnownKey = FALSE;
507 }
508 if (isSpecialType != NULL) {
509 *isSpecialType = FALSE;
510 }
511
512 if (!init()) {
513 return NULL;
514 }
515
516 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
517 if (keyData != NULL) {
518 if (isKnownKey != NULL) {
519 *isKnownKey = TRUE;
520 }
521 LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
522 if (t != NULL) {
523 return t->bcpId;
524 }
525 if (keyData->specialTypes != SPECIALTYPE_NONE) {
526 UBool matched = FALSE;
527 if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
528 matched = isSpecialTypeCodepoints(type);
529 }
530 if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
531 matched = isSpecialTypeReorderCode(type);
532 }
533 if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
534 matched = isSpecialTypeRgKeyValue(type);
535 }
536 if (matched) {
537 if (isSpecialType != NULL) {
538 *isSpecialType = TRUE;
539 }
540 return type;
541 }
542 }
543 }
544 return NULL;
545 }
546
547
548 U_CFUNC const char*
ulocimp_toLegacyType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)549 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
550 if (isKnownKey != NULL) {
551 *isKnownKey = FALSE;
552 }
553 if (isSpecialType != NULL) {
554 *isSpecialType = FALSE;
555 }
556
557 if (!init()) {
558 return NULL;
559 }
560
561 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
562 if (keyData != NULL) {
563 if (isKnownKey != NULL) {
564 *isKnownKey = TRUE;
565 }
566 LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
567 if (t != NULL) {
568 return t->legacyId;
569 }
570 if (keyData->specialTypes != SPECIALTYPE_NONE) {
571 UBool matched = FALSE;
572 if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
573 matched = isSpecialTypeCodepoints(type);
574 }
575 if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
576 matched = isSpecialTypeReorderCode(type);
577 }
578 if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
579 matched = isSpecialTypeRgKeyValue(type);
580 }
581 if (matched) {
582 if (isSpecialType != NULL) {
583 *isSpecialType = TRUE;
584 }
585 return type;
586 }
587 }
588 }
589 return NULL;
590 }
591
592