• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 1996-2007, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  ucol_res.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 * Description:
12 * This file contains dependencies that the collation run-time doesn't normally
13 * need. This mainly contains resource bundle usage and collation meta information
14 *
15 * Modification history
16 * Date        Name      Comments
17 * 1996-1999   various members of ICU team maintained C API for collation framework
18 * 02/16/2001  synwee    Added internal method getPrevSpecialCE
19 * 03/01/2001  synwee    Added maxexpansion functionality.
20 * 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compliant
21 * 12/08/2004  grhoten   Split part of ucol.cpp into ucol_res.cpp
22 */
23 
24 #include "unicode/utypes.h"
25 
26 #if !UCONFIG_NO_COLLATION
27 #include "unicode/uloc.h"
28 #include "unicode/coll.h"
29 #include "unicode/tblcoll.h"
30 #include "unicode/caniter.h"
31 #include "unicode/ustring.h"
32 
33 #include "ucol_bld.h"
34 #include "ucol_imp.h"
35 #include "ucol_tok.h"
36 #include "ucol_elm.h"
37 #include "uresimp.h"
38 #include "ustr_imp.h"
39 #include "cstring.h"
40 #include "umutex.h"
41 #include "ustrenum.h"
42 #include "putilimp.h"
43 #include "utracimp.h"
44 #include "cmemory.h"
45 
46 U_NAMESPACE_USE
47 
48 U_CDECL_BEGIN
49 static void U_CALLCONV
ucol_prv_closeResources(UCollator * coll)50 ucol_prv_closeResources(UCollator *coll) {
51     if(coll->rb != NULL) { /* pointing to read-only memory */
52         ures_close(coll->rb);
53     }
54     if(coll->elements != NULL) {
55         ures_close(coll->elements);
56     }
57 }
58 U_CDECL_END
59 
60 /****************************************************************************/
61 /* Following are the open/close functions                                   */
62 /*                                                                          */
63 /****************************************************************************/
64 static UCollator*
tryOpeningFromRules(UResourceBundle * collElem,UErrorCode * status)65 tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
66     int32_t rulesLen = 0;
67     const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
68     return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
69 
70 }
71 
72 
73 // API in ucol_imp.h
74 
75 U_CFUNC UCollator*
ucol_open_internal(const char * loc,UErrorCode * status)76 ucol_open_internal(const char *loc,
77                    UErrorCode *status)
78 {
79     const UCollator* UCA = ucol_initUCA(status);
80 
81     /* New version */
82     if(U_FAILURE(*status)) return 0;
83 
84 
85 
86     UCollator *result = NULL;
87     UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
88 
89     /* we try to find stuff from keyword */
90     UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
91     UResourceBundle *collElem = NULL;
92     char keyBuffer[256];
93     // if there is a keyword, we pick it up and try to get elements
94     if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
95         // no keyword. we try to find the default setting, which will give us the keyword value
96         UErrorCode intStatus = U_ZERO_ERROR;
97         // finding default value does not affect collation fallback status
98         UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
99         if(U_SUCCESS(intStatus)) {
100             int32_t defaultKeyLen = 0;
101             const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
102             u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
103             keyBuffer[defaultKeyLen] = 0;
104         } else {
105             *status = U_INTERNAL_PROGRAM_ERROR;
106             return NULL;
107         }
108         ures_close(defaultColl);
109     }
110     collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
111 
112     UResourceBundle *binary = NULL;
113 
114     if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
115         *status = U_USING_DEFAULT_WARNING;
116         result = ucol_initCollator(UCA->image, result, UCA, status);
117         // if we use UCA, real locale is root
118         result->rb = ures_open(U_ICUDATA_COLL, "", status);
119         result->elements = ures_open(U_ICUDATA_COLL, "", status);
120         if(U_FAILURE(*status)) {
121             goto clean;
122         }
123         ures_close(b);
124         result->hasRealData = FALSE;
125     } else if(U_SUCCESS(*status)) {
126         int32_t len = 0;
127         UErrorCode binaryStatus = U_ZERO_ERROR;
128 
129         binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
130 
131         if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
132             binary = NULL;
133             result = tryOpeningFromRules(collElem, status);
134             if(U_FAILURE(*status)) {
135                 goto clean;
136             }
137         } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
138             const uint8_t *inData = ures_getBinary(binary, &len, status);
139             UCATableHeader *colData = (UCATableHeader *)inData;
140             if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
141                 uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
142                 colData->version[0] != UCOL_BUILDER_VERSION)
143             {
144                 *status = U_DIFFERENT_UCA_VERSION;
145                 result = tryOpeningFromRules(collElem, status);
146             } else {
147                 if(U_FAILURE(*status)){
148                     goto clean;
149                 }
150                 if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
151                     result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
152                     if(U_FAILURE(*status)){
153                         goto clean;
154                     }
155                     result->hasRealData = TRUE;
156                 } else {
157                     result = ucol_initCollator(UCA->image, result, UCA, status);
158                     ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
159                     if(U_FAILURE(*status)){
160                         goto clean;
161                     }
162                     result->hasRealData = FALSE;
163                 }
164                 result->freeImageOnClose = FALSE;
165             }
166         }
167         result->rb = b;
168         result->elements = collElem;
169         len = 0;
170         binaryStatus = U_ZERO_ERROR;
171         result->rules = ures_getStringByKey(result->elements, "Sequence", &len, &binaryStatus);
172         result->rulesLength = len;
173         result->freeRulesOnClose = FALSE;
174     } else { /* There is another error, and we're just gonna clean up */
175         goto clean;
176     }
177 
178     result->validLocale = NULL; // default is to use rb info
179 
180     if(loc == NULL) {
181         loc = ures_getLocale(result->rb, status);
182     }
183     result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
184     /* test for NULL */
185     if (result->requestedLocale == NULL) {
186         *status = U_MEMORY_ALLOCATION_ERROR;
187         goto clean;
188     }
189     uprv_strcpy(result->requestedLocale, loc);
190 
191     ures_close(binary);
192     ures_close(collations); //??? we have to decide on that. Probably affects something :)
193     result->resCleaner = ucol_prv_closeResources;
194     return result;
195 
196 clean:
197     ures_close(b);
198     ures_close(collElem);
199     ures_close(collations);
200     ures_close(binary);
201     return NULL;
202 }
203 
204 U_CAPI UCollator*
ucol_open(const char * loc,UErrorCode * status)205 ucol_open(const char *loc,
206           UErrorCode *status)
207 {
208     U_NAMESPACE_USE
209 
210     UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
211     UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
212     UCollator *result = NULL;
213 
214     u_init(status);
215 #if !UCONFIG_NO_SERVICE
216     result = Collator::createUCollator(loc, status);
217     if (result == NULL)
218 #endif
219     {
220         result = ucol_open_internal(loc, status);
221     }
222     UTRACE_EXIT_PTR_STATUS(result, *status);
223     return result;
224 }
225 
226 U_CAPI UCollator* U_EXPORT2
ucol_openRules(const UChar * rules,int32_t rulesLength,UColAttributeValue normalizationMode,UCollationStrength strength,UParseError * parseError,UErrorCode * status)227 ucol_openRules( const UChar        *rules,
228                int32_t            rulesLength,
229                UColAttributeValue normalizationMode,
230                UCollationStrength strength,
231                UParseError        *parseError,
232                UErrorCode         *status)
233 {
234     UColTokenParser src;
235     UColAttributeValue norm;
236     UParseError tErr;
237 
238     if(status == NULL || U_FAILURE(*status)){
239         return 0;
240     }
241 
242     u_init(status);
243     if (U_FAILURE(*status)) {
244         return NULL;
245     }
246 
247     if(rules == NULL || rulesLength < -1) {
248         *status = U_ILLEGAL_ARGUMENT_ERROR;
249         return 0;
250     }
251 
252     if(rulesLength == -1) {
253         rulesLength = u_strlen(rules);
254     }
255 
256     if(parseError == NULL){
257         parseError = &tErr;
258     }
259 
260     switch(normalizationMode) {
261     case UCOL_OFF:
262     case UCOL_ON:
263     case UCOL_DEFAULT:
264         norm = normalizationMode;
265         break;
266     default:
267         *status = U_ILLEGAL_ARGUMENT_ERROR;
268         return 0;
269     }
270 
271     UCollator *UCA = ucol_initUCA(status);
272 
273     if(U_FAILURE(*status)){
274         return NULL;
275     }
276 
277     ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status);
278     ucol_tok_assembleTokenList(&src,parseError, status);
279 
280     if(U_FAILURE(*status)) {
281         /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
282         /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
283         /* so something might be done here... or on lower level */
284 #ifdef UCOL_DEBUG
285         if(*status == U_ILLEGAL_ARGUMENT_ERROR) {
286             fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source);
287         } else {
288             fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source);
289         }
290 #endif
291         ucol_tok_closeTokenList(&src);
292         return NULL;
293     }
294     UCollator *result = NULL;
295     UCATableHeader *table = NULL;
296 
297     if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
298         /* also, if we wanted to remove some contractions, we should make a tailoring */
299         table = ucol_assembleTailoringTable(&src, status);
300         if(U_SUCCESS(*status)) {
301             // builder version
302             table->version[0] = UCOL_BUILDER_VERSION;
303             // no tailoring information on this level
304             table->version[1] = table->version[2] = table->version[3] = 0;
305             // set UCD version
306             u_getUnicodeVersion(table->UCDVersion);
307             // set UCA version
308             uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
309             result = ucol_initCollator(table, 0, UCA, status);
310             result->hasRealData = TRUE;
311             result->freeImageOnClose = TRUE;
312         }
313     } else { /* no rules, but no error either */
314         // must be only options
315         // We will init the collator from UCA
316         result = ucol_initCollator(UCA->image, 0, UCA, status);
317         // And set only the options
318         UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
319         /* test for NULL */
320         if (opts == NULL) {
321             *status = U_MEMORY_ALLOCATION_ERROR;
322             goto cleanup;
323         }
324         uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
325         ucol_setOptionsFromHeader(result, opts, status);
326         result->freeOptionsOnClose = TRUE;
327         result->hasRealData = FALSE;
328         result->freeImageOnClose = FALSE;
329     }
330 
331     if(U_SUCCESS(*status)) {
332         UChar *newRules;
333         result->dataVersion[0] = UCOL_BUILDER_VERSION;
334         if(rulesLength > 0) {
335             newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
336             /* test for NULL */
337             if (newRules == NULL) {
338                 *status = U_MEMORY_ALLOCATION_ERROR;
339                 goto cleanup;
340             }
341             uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR);
342             newRules[rulesLength]=0;
343             result->rules = newRules;
344             result->rulesLength = rulesLength;
345             result->freeRulesOnClose = TRUE;
346         }
347         result->rb = NULL;
348         result->elements = NULL;
349         result->validLocale = NULL;
350         result->requestedLocale = NULL;
351         ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
352         ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
353     } else {
354 cleanup:
355         if(result != NULL) {
356             ucol_close(result);
357         } else {
358             if(table != NULL) {
359                 uprv_free(table);
360             }
361         }
362         result = NULL;
363     }
364 
365     ucol_tok_closeTokenList(&src);
366 
367     return result;
368 }
369 
370 U_CAPI int32_t U_EXPORT2
ucol_getRulesEx(const UCollator * coll,UColRuleOption delta,UChar * buffer,int32_t bufferLen)371 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
372     UErrorCode status = U_ZERO_ERROR;
373     int32_t len = 0;
374     int32_t UCAlen = 0;
375     const UChar* ucaRules = 0;
376     const UChar *rules = ucol_getRules(coll, &len);
377     if(delta == UCOL_FULL_RULES) {
378         /* take the UCA rules and append real rules at the end */
379         /* UCA rules will be probably coming from the root RB */
380         ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
381         /*
382         UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
383         UResourceBundle*  uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
384         ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
385         ures_close(uca);
386         ures_close(cresb);
387         */
388     }
389     if(U_FAILURE(status)) {
390         return 0;
391     }
392     if(buffer!=0 && bufferLen>0){
393         *buffer=0;
394         if(UCAlen > 0) {
395             u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
396         }
397         if(len > 0 && bufferLen > UCAlen) {
398             u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
399         }
400     }
401     return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
402 }
403 
404 static const UChar _NUL = 0;
405 
406 U_CAPI const UChar* U_EXPORT2
ucol_getRules(const UCollator * coll,int32_t * length)407 ucol_getRules(    const    UCollator       *coll,
408               int32_t            *length)
409 {
410     if(coll->rules != NULL) {
411         *length = coll->rulesLength;
412         return coll->rules;
413     }
414     else {
415         *length = 0;
416         return &_NUL;
417     }
418 }
419 
420 U_CAPI UBool U_EXPORT2
ucol_equals(const UCollator * source,const UCollator * target)421 ucol_equals(const UCollator *source, const UCollator *target) {
422     UErrorCode status = U_ZERO_ERROR;
423     // if pointers are equal, collators are equal
424     if(source == target) {
425         return TRUE;
426     }
427     int32_t i = 0, j = 0;
428     // if any of attributes are different, collators are not equal
429     for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
430         if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
431             return FALSE;
432         }
433     }
434 
435     int32_t sourceRulesLen = 0, targetRulesLen = 0;
436     const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
437     const UChar *targetRules = ucol_getRules(target, &targetRulesLen);
438 
439     if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
440         // all the attributes are equal and the rules are equal - collators are equal
441         return(TRUE);
442     }
443     // hard part, need to construct tree from rules and see if they yield the same tailoring
444     UBool result = TRUE;
445     UParseError parseError;
446     UColTokenParser sourceParser, targetParser;
447     int32_t sourceListLen = 0, targetListLen = 0;
448     ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status);
449     ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status);
450     sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
451     targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
452 
453     if(sourceListLen != targetListLen) {
454         // different number of resets
455         result = FALSE;
456     } else {
457         UColToken *sourceReset = NULL, *targetReset = NULL;
458         UChar *sourceResetString = NULL, *targetResetString = NULL;
459         int32_t sourceStringLen = 0, targetStringLen = 0;
460         for(i = 0; i < sourceListLen; i++) {
461             sourceReset = sourceParser.lh[i].reset;
462             sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
463             sourceStringLen = sourceReset->source >> 24;
464             for(j = 0; j < sourceListLen; j++) {
465                 targetReset = targetParser.lh[j].reset;
466                 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
467                 targetStringLen = targetReset->source >> 24;
468                 if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
469                     sourceReset = sourceParser.lh[i].first;
470                     targetReset = targetParser.lh[j].first;
471                     while(sourceReset != NULL && targetReset != NULL) {
472                         sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
473                         sourceStringLen = sourceReset->source >> 24;
474                         targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
475                         targetStringLen = targetReset->source >> 24;
476                         if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
477                             result = FALSE;
478                             goto returnResult;
479                         }
480                         // probably also need to check the expansions
481                         if(sourceReset->expansion) {
482                             if(!targetReset->expansion) {
483                                 result = FALSE;
484                                 goto returnResult;
485                             } else {
486                                 // compare expansions
487                                 sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
488                                 sourceStringLen = sourceReset->expansion >> 24;
489                                 targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
490                                 targetStringLen = targetReset->expansion >> 24;
491                                 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
492                                     result = FALSE;
493                                     goto returnResult;
494                                 }
495                             }
496                         } else {
497                             if(targetReset->expansion) {
498                                 result = FALSE;
499                                 goto returnResult;
500                             }
501                         }
502                         sourceReset = sourceReset->next;
503                         targetReset = targetReset->next;
504                     }
505                     if(sourceReset != targetReset) { // at least one is not NULL
506                         // there are more tailored elements in one list
507                         result = FALSE;
508                         goto returnResult;
509                     }
510 
511 
512                     break;
513                 }
514             }
515             // couldn't find the reset anchor, so the collators are not equal
516             if(j == sourceListLen) {
517                 result = FALSE;
518                 goto returnResult;
519             }
520         }
521     }
522 
523 returnResult:
524     ucol_tok_closeTokenList(&sourceParser);
525     ucol_tok_closeTokenList(&targetParser);
526     return result;
527 
528 }
529 
530 U_CAPI int32_t U_EXPORT2
ucol_getDisplayName(const char * objLoc,const char * dispLoc,UChar * result,int32_t resultLength,UErrorCode * status)531 ucol_getDisplayName(    const    char        *objLoc,
532                     const    char        *dispLoc,
533                     UChar             *result,
534                     int32_t         resultLength,
535                     UErrorCode        *status)
536 {
537     U_NAMESPACE_USE
538 
539     if(U_FAILURE(*status)) return -1;
540     UnicodeString dst;
541     if(!(result==NULL && resultLength==0)) {
542         // NULL destination for pure preflighting: empty dummy string
543         // otherwise, alias the destination buffer
544         dst.setTo(result, 0, resultLength);
545     }
546     Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
547     return dst.extract(result, resultLength, *status);
548 }
549 
550 U_CAPI const char* U_EXPORT2
ucol_getAvailable(int32_t index)551 ucol_getAvailable(int32_t index)
552 {
553     int32_t count = 0;
554     const Locale *loc = Collator::getAvailableLocales(count);
555     if (loc != NULL && index < count) {
556         return loc[index].getName();
557     }
558     return NULL;
559 }
560 
561 U_CAPI int32_t U_EXPORT2
ucol_countAvailable()562 ucol_countAvailable()
563 {
564     int32_t count = 0;
565     Collator::getAvailableLocales(count);
566     return count;
567 }
568 
569 #if !UCONFIG_NO_SERVICE
570 U_CAPI UEnumeration* U_EXPORT2
ucol_openAvailableLocales(UErrorCode * status)571 ucol_openAvailableLocales(UErrorCode *status) {
572     U_NAMESPACE_USE
573 
574     // This is a wrapper over Collator::getAvailableLocales()
575     if (U_FAILURE(*status)) {
576         return NULL;
577     }
578     StringEnumeration *s = Collator::getAvailableLocales();
579     if (s == NULL) {
580         *status = U_MEMORY_ALLOCATION_ERROR;
581         return NULL;
582     }
583     return uenum_openStringEnumeration(s, status);
584 }
585 #endif
586 
587 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
588 
589 static const char* RESOURCE_NAME = "collations";
590 
591 static const char* KEYWORDS[] = { "collation" };
592 
593 #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
594 
595 U_CAPI UEnumeration* U_EXPORT2
ucol_getKeywords(UErrorCode * status)596 ucol_getKeywords(UErrorCode *status) {
597     UEnumeration *result = NULL;
598     if (U_SUCCESS(*status)) {
599         return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
600     }
601     return result;
602 }
603 
604 U_CAPI UEnumeration* U_EXPORT2
ucol_getKeywordValues(const char * keyword,UErrorCode * status)605 ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
606     if (U_FAILURE(*status)) {
607         return NULL;
608     }
609     // hard-coded to accept exactly one collation keyword
610     // modify if additional collation keyword is added later
611     if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
612     {
613         *status = U_ILLEGAL_ARGUMENT_ERROR;
614         return NULL;
615     }
616     return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
617 }
618 
619 U_CAPI int32_t U_EXPORT2
ucol_getFunctionalEquivalent(char * result,int32_t resultCapacity,const char * keyword,const char * locale,UBool * isAvailable,UErrorCode * status)620 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
621                              const char* keyword, const char* locale,
622                              UBool* isAvailable, UErrorCode* status)
623 {
624     // N.B.: Resource name is "collations" but keyword is "collation"
625     return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
626         "collations", keyword, locale,
627         isAvailable, TRUE, status);
628 }
629 
630 /* returns the locale name the collation data comes from */
631 U_CAPI const char * U_EXPORT2
ucol_getLocale(const UCollator * coll,ULocDataLocaleType type,UErrorCode * status)632 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
633     return ucol_getLocaleByType(coll, type, status);
634 }
635 
636 U_CAPI const char * U_EXPORT2
ucol_getLocaleByType(const UCollator * coll,ULocDataLocaleType type,UErrorCode * status)637 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
638     const char *result = NULL;
639     if(status == NULL || U_FAILURE(*status)) {
640         return NULL;
641     }
642     UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
643     UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
644 
645     switch(type) {
646   case ULOC_ACTUAL_LOCALE:
647       // validLocale is set only if service registration has explicitly set the
648       // requested and valid locales.  if this is the case, the actual locale
649       // is considered to be the valid locale.
650       if (coll->validLocale != NULL) {
651           result = coll->validLocale;
652       } else if(coll->elements != NULL) {
653           result = ures_getLocale(coll->elements, status);
654       }
655       break;
656   case ULOC_VALID_LOCALE:
657       if (coll->validLocale != NULL) {
658           result = coll->validLocale;
659       } else if(coll->rb != NULL) {
660           result = ures_getLocale(coll->rb, status);
661       }
662       break;
663   case ULOC_REQUESTED_LOCALE:
664       result = coll->requestedLocale;
665       break;
666   default:
667       *status = U_ILLEGAL_ARGUMENT_ERROR;
668     }
669     UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
670     UTRACE_EXIT_STATUS(*status);
671     return result;
672 }
673 
674 U_CFUNC void U_EXPORT2
ucol_setReqValidLocales(UCollator * coll,char * requestedLocaleToAdopt,char * validLocaleToAdopt)675 ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt)
676 {
677     if (coll) {
678         if (coll->validLocale) {
679             uprv_free(coll->validLocale);
680         }
681         coll->validLocale = validLocaleToAdopt;
682         if (coll->requestedLocale) { // should always have
683             uprv_free(coll->requestedLocale);
684         }
685         coll->requestedLocale = requestedLocaleToAdopt;
686     }
687 }
688 
689 U_CAPI USet * U_EXPORT2
ucol_getTailoredSet(const UCollator * coll,UErrorCode * status)690 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
691 {
692     U_NAMESPACE_USE
693 
694     if(status == NULL || U_FAILURE(*status)) {
695         return NULL;
696     }
697     if(coll == NULL || coll->UCA == NULL) {
698         *status = U_ILLEGAL_ARGUMENT_ERROR;
699         return NULL;
700     }
701     UParseError parseError;
702     UColTokenParser src;
703     int32_t rulesLen = 0;
704     const UChar *rules = ucol_getRules(coll, &rulesLen);
705     UBool startOfRules = TRUE;
706     // we internally use the C++ class, for the following reasons:
707     // 1. we need to utilize canonical iterator, which is a C++ only class
708     // 2. canonical iterator returns UnicodeStrings - USet cannot take them
709     // 3. USet is internally really UnicodeSet, C is just a wrapper
710     UnicodeSet *tailored = new UnicodeSet();
711     UnicodeString pattern;
712     UnicodeString empty;
713     CanonicalIterator it(empty, *status);
714 
715 
716     // The idea is to tokenize the rule set. For each non-reset token,
717     // we add all the canonicaly equivalent FCD sequences
718     ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status);
719     while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) {
720         startOfRules = FALSE;
721         if(src.parsedToken.strength != UCOL_TOK_RESET) {
722             const UChar *stuff = src.source+(src.parsedToken.charsOffset);
723             it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status);
724             pattern = it.next();
725             while(!pattern.isBogus()) {
726                 if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) {
727                     tailored->add(pattern);
728                 }
729                 pattern = it.next();
730             }
731         }
732     }
733     ucol_tok_closeTokenList(&src);
734     return (USet *)tailored;
735 }
736 
737 #endif /* #if !UCONFIG_NO_COLLATION */
738