• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 1996-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  ucol_res.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 * Description:
12 * This file contains dependencies that the collation run-time doesn't normally
13 * need. This mainly contains resource bundle usage and collation meta information
14 *
15 * Modification history
16 * Date        Name      Comments
17 * 1996-1999   various members of ICU team maintained C API for collation framework
18 * 02/16/2001  synwee    Added internal method getPrevSpecialCE
19 * 03/01/2001  synwee    Added maxexpansion functionality.
20 * 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compliant
21 * 12/08/2004  grhoten   Split part of ucol.cpp into ucol_res.cpp
22 * 2012-2014   markus    Rewritten in C++ again.
23 */
24 
25 #include "unicode/utypes.h"
26 
27 #if !UCONFIG_NO_COLLATION
28 
29 #include "unicode/coll.h"
30 #include "unicode/localpointer.h"
31 #include "unicode/locid.h"
32 #include "unicode/tblcoll.h"
33 #include "unicode/ucol.h"
34 #include "unicode/uloc.h"
35 #include "unicode/unistr.h"
36 #include "unicode/ures.h"
37 #include "cmemory.h"
38 #include "cstring.h"
39 #include "collationdatareader.h"
40 #include "collationroot.h"
41 #include "collationtailoring.h"
42 #include "putilimp.h"
43 #include "uassert.h"
44 #include "ucln_in.h"
45 #include "ucol_imp.h"
46 #include "uenumimp.h"
47 #include "ulist.h"
48 #include "umutex.h"
49 #include "uresimp.h"
50 #include "ustrenum.h"
51 #include "utracimp.h"
52 
53 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
54 
55 U_NAMESPACE_BEGIN
56 
57 namespace {
58 
59 static const UChar *rootRules = NULL;
60 static int32_t rootRulesLength = 0;
61 static UResourceBundle *rootBundle = NULL;
62 static UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
63 
64 }  // namespace
65 
66 U_CDECL_BEGIN
67 
68 static UBool U_CALLCONV
ucol_res_cleanup()69 ucol_res_cleanup() {
70     rootRules = NULL;
71     rootRulesLength = 0;
72     ures_close(rootBundle);
73     rootBundle = NULL;
74     gInitOnce.reset();
75     return TRUE;
76 }
77 
78 U_CDECL_END
79 
80 void
loadRootRules(UErrorCode & errorCode)81 CollationLoader::loadRootRules(UErrorCode &errorCode) {
82     if(U_FAILURE(errorCode)) { return; }
83     rootBundle = ures_open(U_ICUDATA_COLL, kRootLocaleName, &errorCode);
84     if(U_FAILURE(errorCode)) { return; }
85     rootRules = ures_getStringByKey(rootBundle, "UCARules", &rootRulesLength, &errorCode);
86     if(U_FAILURE(errorCode)) {
87         ures_close(rootBundle);
88         rootBundle = NULL;
89         return;
90     }
91     ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup);
92 }
93 
94 void
appendRootRules(UnicodeString & s)95 CollationLoader::appendRootRules(UnicodeString &s) {
96     UErrorCode errorCode = U_ZERO_ERROR;
97     umtx_initOnce(gInitOnce, CollationLoader::loadRootRules, errorCode);
98     if(U_SUCCESS(errorCode)) {
99         s.append(rootRules, rootRulesLength);
100     }
101 }
102 
103 UnicodeString *
loadRules(const char * localeID,const char * collationType,UErrorCode & errorCode)104 CollationLoader::loadRules(const char *localeID, const char *collationType, UErrorCode &errorCode) {
105     if(U_FAILURE(errorCode)) { return NULL; }
106     U_ASSERT(collationType != NULL && *collationType != 0);
107     // Copy the type for lowercasing.
108     char type[16];
109     int32_t typeLength = uprv_strlen(collationType);
110     if(typeLength >= LENGTHOF(type)) {
111         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
112         return NULL;
113     }
114     uprv_memcpy(type, collationType, typeLength + 1);
115     T_CString_toLowerCase(type);
116 
117     LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, localeID, &errorCode));
118     LocalUResourceBundlePointer collations(
119             ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode));
120     LocalUResourceBundlePointer data(
121             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
122     int32_t length;
123     const UChar *s =  ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode);
124     if(U_FAILURE(errorCode)) { return NULL; }
125 
126     // No string pointer aliasing so that we need not hold onto the resource bundle.
127     UnicodeString *rules = new UnicodeString(s, length);
128     if(rules == NULL) {
129         errorCode = U_MEMORY_ALLOCATION_ERROR;
130         return NULL;
131     }
132     return rules;
133 }
134 
135 const CollationTailoring *
loadTailoring(const Locale & locale,Locale & validLocale,UErrorCode & errorCode)136 CollationLoader::loadTailoring(const Locale &locale, Locale &validLocale, UErrorCode &errorCode) {
137     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
138     if(U_FAILURE(errorCode)) { return NULL; }
139     const char *name = locale.getName();
140     if(*name == 0 || uprv_strcmp(name, "root") == 0) {
141         validLocale = Locale::getRoot();
142         return root;
143     }
144 
145     LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, name, &errorCode));
146     if(errorCode == U_MISSING_RESOURCE_ERROR) {
147         errorCode = U_USING_DEFAULT_WARNING;
148         validLocale = Locale::getRoot();
149         return root;
150     }
151     const char *vLocale = ures_getLocaleByType(bundle.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode);
152     if(U_FAILURE(errorCode)) { return NULL; }
153     validLocale = Locale(vLocale);
154 
155     // There are zero or more tailorings in the collations table.
156     LocalUResourceBundlePointer collations(
157             ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode));
158     if(errorCode == U_MISSING_RESOURCE_ERROR) {
159         errorCode = U_USING_DEFAULT_WARNING;
160         return root;
161     }
162     if(U_FAILURE(errorCode)) { return NULL; }
163 
164     // Fetch the collation type from the locale ID and the default type from the data.
165     char type[16];
166     int32_t typeLength = locale.getKeywordValue("collation", type, LENGTHOF(type) - 1, errorCode);
167     if(U_FAILURE(errorCode)) {
168         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
169         return NULL;
170     }
171     type[typeLength] = 0;  // in case of U_NOT_TERMINATED_WARNING
172     char defaultType[16];
173     {
174         UErrorCode internalErrorCode = U_ZERO_ERROR;
175         LocalUResourceBundlePointer def(
176                 ures_getByKeyWithFallback(collations.getAlias(), "default", NULL,
177                                           &internalErrorCode));
178         int32_t length;
179         const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
180         if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) {
181             u_UCharsToChars(s, defaultType, length + 1);
182         } else {
183             uprv_strcpy(defaultType, "standard");
184         }
185     }
186     if(typeLength == 0 || uprv_strcmp(type, "default") == 0) {
187         uprv_strcpy(type, defaultType);
188     } else {
189         T_CString_toLowerCase(type);
190     }
191 
192     // Load the collations/type tailoring, with type fallback.
193     UBool typeFallback = FALSE;
194     LocalUResourceBundlePointer data(
195             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
196     if(errorCode == U_MISSING_RESOURCE_ERROR &&
197             typeLength > 6 && uprv_strncmp(type, "search", 6) == 0) {
198         // fall back from something like "searchjl" to "search"
199         typeFallback = TRUE;
200         type[6] = 0;
201         errorCode = U_ZERO_ERROR;
202         data.adoptInstead(
203             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
204     }
205     if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, defaultType) != 0) {
206         // fall back to the default type
207         typeFallback = TRUE;
208         uprv_strcpy(type, defaultType);
209         errorCode = U_ZERO_ERROR;
210         data.adoptInstead(
211             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
212     }
213     if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, "standard") != 0) {
214         // fall back to the "standard" type
215         typeFallback = TRUE;
216         uprv_strcpy(type, "standard");
217         errorCode = U_ZERO_ERROR;
218         data.adoptInstead(
219             ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode));
220     }
221     if(errorCode == U_MISSING_RESOURCE_ERROR) {
222         errorCode = U_USING_DEFAULT_WARNING;
223         return root;
224     }
225     if(U_FAILURE(errorCode)) { return NULL; }
226 
227     LocalPointer<CollationTailoring> t(new CollationTailoring(root->settings));
228     if(t.isNull() || t->isBogus()) {
229         errorCode = U_MEMORY_ALLOCATION_ERROR;
230         return NULL;
231     }
232 
233     // Is this the same as the root collator? If so, then use that instead.
234     const char *actualLocale = ures_getLocaleByType(data.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode);
235     if(U_FAILURE(errorCode)) { return NULL; }
236     if((*actualLocale == 0 || uprv_strcmp(actualLocale, "root") == 0) &&
237             uprv_strcmp(type, "standard") == 0) {
238         if(typeFallback) {
239             errorCode = U_USING_DEFAULT_WARNING;
240         }
241         return root;
242     }
243     t->actualLocale = Locale(actualLocale);
244 
245     // deserialize
246     LocalUResourceBundlePointer binary(
247             ures_getByKey(data.getAlias(), "%%CollationBin", NULL, &errorCode));
248     // Note: U_MISSING_RESOURCE_ERROR --> The old code built from rules if available
249     // but that created undesirable dependencies.
250     int32_t length;
251     const uint8_t *inBytes = ures_getBinary(binary.getAlias(), &length, &errorCode);
252     if(U_FAILURE(errorCode)) { return NULL; }
253     CollationDataReader::read(root, inBytes, length, *t, errorCode);
254     // Note: U_COLLATOR_VERSION_MISMATCH --> The old code built from rules if available
255     // but that created undesirable dependencies.
256     if(U_FAILURE(errorCode)) { return NULL; }
257 
258     // Try to fetch the optional rules string.
259     {
260         UErrorCode internalErrorCode = U_ZERO_ERROR;
261         int32_t length;
262         const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length,
263                                              &internalErrorCode);
264         if(U_SUCCESS(errorCode)) {
265             t->rules.setTo(TRUE, s, length);
266         }
267     }
268 
269     // Set the collation types on the informational locales,
270     // except when they match the default types (for brevity and backwards compatibility).
271     // For the valid locale, suppress the default type.
272     if(uprv_strcmp(type, defaultType) != 0) {
273         validLocale.setKeywordValue("collation", type, errorCode);
274         if(U_FAILURE(errorCode)) { return NULL; }
275     }
276 
277     // For the actual locale, suppress the default type *according to the actual locale*.
278     // For example, zh has default=pinyin and contains all of the Chinese tailorings.
279     // zh_Hant has default=stroke but has no other data.
280     // For the valid locale "zh_Hant" we need to suppress stroke.
281     // For the actual locale "zh" we need to suppress pinyin instead.
282     if(uprv_strcmp(actualLocale, vLocale) != 0) {
283         // Opening a bundle for the actual locale should always succeed.
284         LocalUResourceBundlePointer actualBundle(
285                 ures_open(U_ICUDATA_COLL, actualLocale, &errorCode));
286         if(U_FAILURE(errorCode)) { return NULL; }
287         UErrorCode internalErrorCode = U_ZERO_ERROR;
288         LocalUResourceBundlePointer def(
289                 ures_getByKeyWithFallback(actualBundle.getAlias(), "collations/default", NULL,
290                                           &internalErrorCode));
291         int32_t length;
292         const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode);
293         if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) {
294             u_UCharsToChars(s, defaultType, length + 1);
295         } else {
296             uprv_strcpy(defaultType, "standard");
297         }
298     }
299     if(uprv_strcmp(type, defaultType) != 0) {
300         t->actualLocale.setKeywordValue("collation", type, errorCode);
301         if(U_FAILURE(errorCode)) { return NULL; }
302     }
303 
304     if(typeFallback) {
305         errorCode = U_USING_DEFAULT_WARNING;
306     }
307     t->bundle = bundle.orphan();
308     return t.orphan();
309 }
310 
311 U_NAMESPACE_END
312 
313 U_NAMESPACE_USE
314 
315 U_CAPI UCollator*
ucol_open(const char * loc,UErrorCode * status)316 ucol_open(const char *loc,
317           UErrorCode *status)
318 {
319     U_NAMESPACE_USE
320 
321     UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
322     UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
323     UCollator *result = NULL;
324 
325     Collator *coll = Collator::createInstance(loc, *status);
326     if(U_SUCCESS(*status)) {
327         result = coll->toUCollator();
328     }
329     UTRACE_EXIT_PTR_STATUS(result, *status);
330     return result;
331 }
332 
333 
334 U_CAPI int32_t U_EXPORT2
ucol_getDisplayName(const char * objLoc,const char * dispLoc,UChar * result,int32_t resultLength,UErrorCode * status)335 ucol_getDisplayName(    const    char        *objLoc,
336                     const    char        *dispLoc,
337                     UChar             *result,
338                     int32_t         resultLength,
339                     UErrorCode        *status)
340 {
341     U_NAMESPACE_USE
342 
343     if(U_FAILURE(*status)) return -1;
344     UnicodeString dst;
345     if(!(result==NULL && resultLength==0)) {
346         // NULL destination for pure preflighting: empty dummy string
347         // otherwise, alias the destination buffer
348         dst.setTo(result, 0, resultLength);
349     }
350     Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
351     return dst.extract(result, resultLength, *status);
352 }
353 
354 U_CAPI const char* U_EXPORT2
ucol_getAvailable(int32_t index)355 ucol_getAvailable(int32_t index)
356 {
357     int32_t count = 0;
358     const Locale *loc = Collator::getAvailableLocales(count);
359     if (loc != NULL && index < count) {
360         return loc[index].getName();
361     }
362     return NULL;
363 }
364 
365 U_CAPI int32_t U_EXPORT2
ucol_countAvailable()366 ucol_countAvailable()
367 {
368     int32_t count = 0;
369     Collator::getAvailableLocales(count);
370     return count;
371 }
372 
373 #if !UCONFIG_NO_SERVICE
374 U_CAPI UEnumeration* U_EXPORT2
ucol_openAvailableLocales(UErrorCode * status)375 ucol_openAvailableLocales(UErrorCode *status) {
376     U_NAMESPACE_USE
377 
378     // This is a wrapper over Collator::getAvailableLocales()
379     if (U_FAILURE(*status)) {
380         return NULL;
381     }
382     StringEnumeration *s = icu::Collator::getAvailableLocales();
383     if (s == NULL) {
384         *status = U_MEMORY_ALLOCATION_ERROR;
385         return NULL;
386     }
387     return uenum_openFromStringEnumeration(s, status);
388 }
389 #endif
390 
391 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
392 
393 static const char RESOURCE_NAME[] = "collations";
394 
395 static const char* const KEYWORDS[] = { "collation" };
396 
397 #define KEYWORD_COUNT LENGTHOF(KEYWORDS)
398 
399 U_CAPI UEnumeration* U_EXPORT2
ucol_getKeywords(UErrorCode * status)400 ucol_getKeywords(UErrorCode *status) {
401     UEnumeration *result = NULL;
402     if (U_SUCCESS(*status)) {
403         return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
404     }
405     return result;
406 }
407 
408 U_CAPI UEnumeration* U_EXPORT2
ucol_getKeywordValues(const char * keyword,UErrorCode * status)409 ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
410     if (U_FAILURE(*status)) {
411         return NULL;
412     }
413     // hard-coded to accept exactly one collation keyword
414     // modify if additional collation keyword is added later
415     if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
416     {
417         *status = U_ILLEGAL_ARGUMENT_ERROR;
418         return NULL;
419     }
420     return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
421 }
422 
423 static const UEnumeration defaultKeywordValues = {
424     NULL,
425     NULL,
426     ulist_close_keyword_values_iterator,
427     ulist_count_keyword_values,
428     uenum_unextDefault,
429     ulist_next_keyword_value,
430     ulist_reset_keyword_values_iterator
431 };
432 
433 #include <stdio.h>
434 
435 U_CAPI UEnumeration* U_EXPORT2
ucol_getKeywordValuesForLocale(const char *,const char * locale,UBool,UErrorCode * status)436 ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale,
437                                UBool /*commonlyUsed*/, UErrorCode* status) {
438     /* Get the locale base name. */
439     char localeBuffer[ULOC_FULLNAME_CAPACITY] = "";
440     uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status);
441 
442     /* Create the 2 lists
443      * -values is the temp location for the keyword values
444      * -results hold the actual list used by the UEnumeration object
445      */
446     UList *values = ulist_createEmptyList(status);
447     UList *results = ulist_createEmptyList(status);
448     UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
449     if (U_FAILURE(*status) || en == NULL) {
450         if (en == NULL) {
451             *status = U_MEMORY_ALLOCATION_ERROR;
452         } else {
453             uprv_free(en);
454         }
455         ulist_deleteList(values);
456         ulist_deleteList(results);
457         return NULL;
458     }
459 
460     memcpy(en, &defaultKeywordValues, sizeof(UEnumeration));
461     en->context = results;
462 
463     /* Open the resource bundle for collation with the given locale. */
464     UResourceBundle bundle, collations, collres, defres;
465     ures_initStackObject(&bundle);
466     ures_initStackObject(&collations);
467     ures_initStackObject(&collres);
468     ures_initStackObject(&defres);
469 
470     ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
471 
472     while (U_SUCCESS(*status)) {
473         ures_getByKey(&bundle, RESOURCE_NAME, &collations, status);
474         ures_resetIterator(&collations);
475         while (U_SUCCESS(*status) && ures_hasNext(&collations)) {
476             ures_getNextResource(&collations, &collres, status);
477             const char *key = ures_getKey(&collres);
478             /* If the key is default, get the string and store it in results list only
479              * if results list is empty.
480              */
481             if (uprv_strcmp(key, "default") == 0) {
482                 if (ulist_getListSize(results) == 0) {
483                     char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY);
484                     int32_t defcollLength = ULOC_KEYWORDS_CAPACITY;
485 
486                     ures_getNextResource(&collres, &defres, status);
487 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
488 			/* optimize - use the utf-8 string */
489                     ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status);
490 #else
491                     {
492                        const UChar* defString = ures_getString(&defres, &defcollLength, status);
493                        if(U_SUCCESS(*status)) {
494 			   if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) {
495 				*status = U_BUFFER_OVERFLOW_ERROR;
496 			   } else {
497                            	u_UCharsToChars(defString, defcoll, defcollLength+1);
498 			   }
499                        }
500                     }
501 #endif
502 
503                     ulist_addItemBeginList(results, defcoll, TRUE, status);
504                 }
505             } else {
506                 ulist_addItemEndList(values, key, FALSE, status);
507             }
508         }
509 
510         /* If the locale is "" this is root so exit. */
511         if (uprv_strlen(localeBuffer) == 0) {
512             break;
513         }
514         /* Get the parent locale and open a new resource bundle. */
515         uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status);
516         ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status);
517     }
518 
519     ures_close(&defres);
520     ures_close(&collres);
521     ures_close(&collations);
522     ures_close(&bundle);
523 
524     if (U_SUCCESS(*status)) {
525         char *value = NULL;
526         ulist_resetList(values);
527         while ((value = (char *)ulist_getNext(values)) != NULL) {
528             if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) {
529                 ulist_addItemEndList(results, value, FALSE, status);
530                 if (U_FAILURE(*status)) {
531                     break;
532                 }
533             }
534         }
535     }
536 
537     ulist_deleteList(values);
538 
539     if (U_FAILURE(*status)){
540         uenum_close(en);
541         en = NULL;
542     } else {
543         ulist_resetList(results);
544     }
545 
546     return en;
547 }
548 
549 U_CAPI int32_t U_EXPORT2
ucol_getFunctionalEquivalent(char * result,int32_t resultCapacity,const char * keyword,const char * locale,UBool * isAvailable,UErrorCode * status)550 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
551                              const char* keyword, const char* locale,
552                              UBool* isAvailable, UErrorCode* status)
553 {
554     // N.B.: Resource name is "collations" but keyword is "collation"
555     return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
556         "collations", keyword, locale,
557         isAvailable, TRUE, status);
558 }
559 
560 #endif /* #if !UCONFIG_NO_COLLATION */
561