1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucol_res.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * Description:
12 * This file contains dependencies that the collation run-time doesn't normally
13 * need. This mainly contains resource bundle usage and collation meta information
14 *
15 * Modification history
16 * Date Name Comments
17 * 1996-1999 various members of ICU team maintained C API for collation framework
18 * 02/16/2001 synwee Added internal method getPrevSpecialCE
19 * 03/01/2001 synwee Added maxexpansion functionality.
20 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
21 * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp
22 */
23
24 #include "unicode/utypes.h"
25
26 #if !UCONFIG_NO_COLLATION
27 #include "unicode/uloc.h"
28 #include "unicode/coll.h"
29 #include "unicode/tblcoll.h"
30 #include "unicode/caniter.h"
31 #include "unicode/ustring.h"
32
33 #include "ucol_bld.h"
34 #include "ucol_imp.h"
35 #include "ucol_tok.h"
36 #include "ucol_elm.h"
37 #include "uresimp.h"
38 #include "ustr_imp.h"
39 #include "cstring.h"
40 #include "umutex.h"
41 #include "ustrenum.h"
42 #include "putilimp.h"
43 #include "utracimp.h"
44 #include "cmemory.h"
45
46 U_NAMESPACE_USE
47
48 U_CDECL_BEGIN
49 static void U_CALLCONV
ucol_prv_closeResources(UCollator * coll)50 ucol_prv_closeResources(UCollator *coll) {
51 if(coll->rb != NULL) { /* pointing to read-only memory */
52 ures_close(coll->rb);
53 }
54 if(coll->elements != NULL) {
55 ures_close(coll->elements);
56 }
57 }
58 U_CDECL_END
59
60 /****************************************************************************/
61 /* Following are the open/close functions */
62 /* */
63 /****************************************************************************/
64 static UCollator*
tryOpeningFromRules(UResourceBundle * collElem,UErrorCode * status)65 tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) {
66 int32_t rulesLen = 0;
67 const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status);
68 return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status);
69
70 }
71
72
73 // API in ucol_imp.h
74
75 U_CFUNC UCollator*
ucol_open_internal(const char * loc,UErrorCode * status)76 ucol_open_internal(const char *loc,
77 UErrorCode *status)
78 {
79 const UCollator* UCA = ucol_initUCA(status);
80
81 /* New version */
82 if(U_FAILURE(*status)) return 0;
83
84
85
86 UCollator *result = NULL;
87 UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status);
88
89 /* we try to find stuff from keyword */
90 UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status);
91 UResourceBundle *collElem = NULL;
92 char keyBuffer[256];
93 // if there is a keyword, we pick it up and try to get elements
94 if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status)) {
95 // no keyword. we try to find the default setting, which will give us the keyword value
96 UErrorCode intStatus = U_ZERO_ERROR;
97 // finding default value does not affect collation fallback status
98 UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus);
99 if(U_SUCCESS(intStatus)) {
100 int32_t defaultKeyLen = 0;
101 const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus);
102 u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen);
103 keyBuffer[defaultKeyLen] = 0;
104 } else {
105 *status = U_INTERNAL_PROGRAM_ERROR;
106 return NULL;
107 }
108 ures_close(defaultColl);
109 }
110 collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status);
111
112 UResourceBundle *binary = NULL;
113
114 if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */
115 *status = U_USING_DEFAULT_WARNING;
116 result = ucol_initCollator(UCA->image, result, UCA, status);
117 // if we use UCA, real locale is root
118 result->rb = ures_open(U_ICUDATA_COLL, "", status);
119 result->elements = ures_open(U_ICUDATA_COLL, "", status);
120 if(U_FAILURE(*status)) {
121 goto clean;
122 }
123 ures_close(b);
124 result->hasRealData = FALSE;
125 } else if(U_SUCCESS(*status)) {
126 int32_t len = 0;
127 UErrorCode binaryStatus = U_ZERO_ERROR;
128
129 binary = ures_getByKey(collElem, "%%CollationBin", NULL, &binaryStatus);
130
131 if(binaryStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */
132 binary = NULL;
133 result = tryOpeningFromRules(collElem, status);
134 if(U_FAILURE(*status)) {
135 goto clean;
136 }
137 } else if(U_SUCCESS(*status)) { /* otherwise, we'll pick a collation data that exists */
138 const uint8_t *inData = ures_getBinary(binary, &len, status);
139 UCATableHeader *colData = (UCATableHeader *)inData;
140 if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
141 uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 ||
142 colData->version[0] != UCOL_BUILDER_VERSION)
143 {
144 *status = U_DIFFERENT_UCA_VERSION;
145 result = tryOpeningFromRules(collElem, status);
146 } else {
147 if(U_FAILURE(*status)){
148 goto clean;
149 }
150 if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
151 result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status);
152 if(U_FAILURE(*status)){
153 goto clean;
154 }
155 result->hasRealData = TRUE;
156 } else {
157 result = ucol_initCollator(UCA->image, result, UCA, status);
158 ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status);
159 if(U_FAILURE(*status)){
160 goto clean;
161 }
162 result->hasRealData = FALSE;
163 }
164 result->freeImageOnClose = FALSE;
165 }
166 }
167 result->rb = b;
168 result->elements = collElem;
169 len = 0;
170 binaryStatus = U_ZERO_ERROR;
171 result->rules = ures_getStringByKey(result->elements, "Sequence", &len, &binaryStatus);
172 result->rulesLength = len;
173 result->freeRulesOnClose = FALSE;
174 } else { /* There is another error, and we're just gonna clean up */
175 goto clean;
176 }
177
178 result->validLocale = NULL; // default is to use rb info
179
180 if(loc == NULL) {
181 loc = ures_getLocale(result->rb, status);
182 }
183 result->requestedLocale = (char *)uprv_malloc((uprv_strlen(loc)+1)*sizeof(char));
184 /* test for NULL */
185 if (result->requestedLocale == NULL) {
186 *status = U_MEMORY_ALLOCATION_ERROR;
187 goto clean;
188 }
189 uprv_strcpy(result->requestedLocale, loc);
190
191 ures_close(binary);
192 ures_close(collations); //??? we have to decide on that. Probably affects something :)
193 result->resCleaner = ucol_prv_closeResources;
194 return result;
195
196 clean:
197 ures_close(b);
198 ures_close(collElem);
199 ures_close(collations);
200 ures_close(binary);
201 return NULL;
202 }
203
204 U_CAPI UCollator*
ucol_open(const char * loc,UErrorCode * status)205 ucol_open(const char *loc,
206 UErrorCode *status)
207 {
208 U_NAMESPACE_USE
209
210 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN);
211 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc);
212 UCollator *result = NULL;
213
214 u_init(status);
215 #if !UCONFIG_NO_SERVICE
216 result = Collator::createUCollator(loc, status);
217 if (result == NULL)
218 #endif
219 {
220 result = ucol_open_internal(loc, status);
221 }
222 UTRACE_EXIT_PTR_STATUS(result, *status);
223 return result;
224 }
225
226 U_CAPI UCollator* U_EXPORT2
ucol_openRules(const UChar * rules,int32_t rulesLength,UColAttributeValue normalizationMode,UCollationStrength strength,UParseError * parseError,UErrorCode * status)227 ucol_openRules( const UChar *rules,
228 int32_t rulesLength,
229 UColAttributeValue normalizationMode,
230 UCollationStrength strength,
231 UParseError *parseError,
232 UErrorCode *status)
233 {
234 UColTokenParser src;
235 UColAttributeValue norm;
236 UParseError tErr;
237
238 if(status == NULL || U_FAILURE(*status)){
239 return 0;
240 }
241
242 u_init(status);
243 if (U_FAILURE(*status)) {
244 return NULL;
245 }
246
247 if(rules == NULL || rulesLength < -1) {
248 *status = U_ILLEGAL_ARGUMENT_ERROR;
249 return 0;
250 }
251
252 if(rulesLength == -1) {
253 rulesLength = u_strlen(rules);
254 }
255
256 if(parseError == NULL){
257 parseError = &tErr;
258 }
259
260 switch(normalizationMode) {
261 case UCOL_OFF:
262 case UCOL_ON:
263 case UCOL_DEFAULT:
264 norm = normalizationMode;
265 break;
266 default:
267 *status = U_ILLEGAL_ARGUMENT_ERROR;
268 return 0;
269 }
270
271 UCollator *UCA = ucol_initUCA(status);
272
273 if(U_FAILURE(*status)){
274 return NULL;
275 }
276
277 ucol_tok_initTokenList(&src, rules, rulesLength, UCA, status);
278 ucol_tok_assembleTokenList(&src,parseError, status);
279
280 if(U_FAILURE(*status)) {
281 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */
282 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */
283 /* so something might be done here... or on lower level */
284 #ifdef UCOL_DEBUG
285 if(*status == U_ILLEGAL_ARGUMENT_ERROR) {
286 fprintf(stderr, "bad option starting at offset %i\n", src.current-src.source);
287 } else {
288 fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source);
289 }
290 #endif
291 ucol_tok_closeTokenList(&src);
292 return NULL;
293 }
294 UCollator *result = NULL;
295 UCATableHeader *table = NULL;
296
297 if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
298 /* also, if we wanted to remove some contractions, we should make a tailoring */
299 table = ucol_assembleTailoringTable(&src, status);
300 if(U_SUCCESS(*status)) {
301 // builder version
302 table->version[0] = UCOL_BUILDER_VERSION;
303 // no tailoring information on this level
304 table->version[1] = table->version[2] = table->version[3] = 0;
305 // set UCD version
306 u_getUnicodeVersion(table->UCDVersion);
307 // set UCA version
308 uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
309 result = ucol_initCollator(table, 0, UCA, status);
310 result->hasRealData = TRUE;
311 result->freeImageOnClose = TRUE;
312 }
313 } else { /* no rules, but no error either */
314 // must be only options
315 // We will init the collator from UCA
316 result = ucol_initCollator(UCA->image, 0, UCA, status);
317 // And set only the options
318 UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
319 /* test for NULL */
320 if (opts == NULL) {
321 *status = U_MEMORY_ALLOCATION_ERROR;
322 goto cleanup;
323 }
324 uprv_memcpy(opts, src.opts, sizeof(UColOptionSet));
325 ucol_setOptionsFromHeader(result, opts, status);
326 result->freeOptionsOnClose = TRUE;
327 result->hasRealData = FALSE;
328 result->freeImageOnClose = FALSE;
329 }
330
331 if(U_SUCCESS(*status)) {
332 UChar *newRules;
333 result->dataVersion[0] = UCOL_BUILDER_VERSION;
334 if(rulesLength > 0) {
335 newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR);
336 /* test for NULL */
337 if (newRules == NULL) {
338 *status = U_MEMORY_ALLOCATION_ERROR;
339 goto cleanup;
340 }
341 uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR);
342 newRules[rulesLength]=0;
343 result->rules = newRules;
344 result->rulesLength = rulesLength;
345 result->freeRulesOnClose = TRUE;
346 }
347 result->rb = NULL;
348 result->elements = NULL;
349 result->validLocale = NULL;
350 result->requestedLocale = NULL;
351 ucol_setAttribute(result, UCOL_STRENGTH, strength, status);
352 ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status);
353 } else {
354 cleanup:
355 if(result != NULL) {
356 ucol_close(result);
357 } else {
358 if(table != NULL) {
359 uprv_free(table);
360 }
361 }
362 result = NULL;
363 }
364
365 ucol_tok_closeTokenList(&src);
366
367 return result;
368 }
369
370 U_CAPI int32_t U_EXPORT2
ucol_getRulesEx(const UCollator * coll,UColRuleOption delta,UChar * buffer,int32_t bufferLen)371 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
372 UErrorCode status = U_ZERO_ERROR;
373 int32_t len = 0;
374 int32_t UCAlen = 0;
375 const UChar* ucaRules = 0;
376 const UChar *rules = ucol_getRules(coll, &len);
377 if(delta == UCOL_FULL_RULES) {
378 /* take the UCA rules and append real rules at the end */
379 /* UCA rules will be probably coming from the root RB */
380 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status);
381 /*
382 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status);
383 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status);
384 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status);
385 ures_close(uca);
386 ures_close(cresb);
387 */
388 }
389 if(U_FAILURE(status)) {
390 return 0;
391 }
392 if(buffer!=0 && bufferLen>0){
393 *buffer=0;
394 if(UCAlen > 0) {
395 u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen));
396 }
397 if(len > 0 && bufferLen > UCAlen) {
398 u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen));
399 }
400 }
401 return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status);
402 }
403
404 static const UChar _NUL = 0;
405
406 U_CAPI const UChar* U_EXPORT2
ucol_getRules(const UCollator * coll,int32_t * length)407 ucol_getRules( const UCollator *coll,
408 int32_t *length)
409 {
410 if(coll->rules != NULL) {
411 *length = coll->rulesLength;
412 return coll->rules;
413 }
414 else {
415 *length = 0;
416 return &_NUL;
417 }
418 }
419
420 U_CAPI UBool U_EXPORT2
ucol_equals(const UCollator * source,const UCollator * target)421 ucol_equals(const UCollator *source, const UCollator *target) {
422 UErrorCode status = U_ZERO_ERROR;
423 // if pointers are equal, collators are equal
424 if(source == target) {
425 return TRUE;
426 }
427 int32_t i = 0, j = 0;
428 // if any of attributes are different, collators are not equal
429 for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
430 if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) {
431 return FALSE;
432 }
433 }
434
435 int32_t sourceRulesLen = 0, targetRulesLen = 0;
436 const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen);
437 const UChar *targetRules = ucol_getRules(target, &targetRulesLen);
438
439 if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) {
440 // all the attributes are equal and the rules are equal - collators are equal
441 return(TRUE);
442 }
443 // hard part, need to construct tree from rules and see if they yield the same tailoring
444 UBool result = TRUE;
445 UParseError parseError;
446 UColTokenParser sourceParser, targetParser;
447 int32_t sourceListLen = 0, targetListLen = 0;
448 ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, &status);
449 ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, &status);
450 sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status);
451 targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status);
452
453 if(sourceListLen != targetListLen) {
454 // different number of resets
455 result = FALSE;
456 } else {
457 UColToken *sourceReset = NULL, *targetReset = NULL;
458 UChar *sourceResetString = NULL, *targetResetString = NULL;
459 int32_t sourceStringLen = 0, targetStringLen = 0;
460 for(i = 0; i < sourceListLen; i++) {
461 sourceReset = sourceParser.lh[i].reset;
462 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
463 sourceStringLen = sourceReset->source >> 24;
464 for(j = 0; j < sourceListLen; j++) {
465 targetReset = targetParser.lh[j].reset;
466 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
467 targetStringLen = targetReset->source >> 24;
468 if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) {
469 sourceReset = sourceParser.lh[i].first;
470 targetReset = targetParser.lh[j].first;
471 while(sourceReset != NULL && targetReset != NULL) {
472 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF);
473 sourceStringLen = sourceReset->source >> 24;
474 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF);
475 targetStringLen = targetReset->source >> 24;
476 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
477 result = FALSE;
478 goto returnResult;
479 }
480 // probably also need to check the expansions
481 if(sourceReset->expansion) {
482 if(!targetReset->expansion) {
483 result = FALSE;
484 goto returnResult;
485 } else {
486 // compare expansions
487 sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF);
488 sourceStringLen = sourceReset->expansion >> 24;
489 targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF);
490 targetStringLen = targetReset->expansion >> 24;
491 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) {
492 result = FALSE;
493 goto returnResult;
494 }
495 }
496 } else {
497 if(targetReset->expansion) {
498 result = FALSE;
499 goto returnResult;
500 }
501 }
502 sourceReset = sourceReset->next;
503 targetReset = targetReset->next;
504 }
505 if(sourceReset != targetReset) { // at least one is not NULL
506 // there are more tailored elements in one list
507 result = FALSE;
508 goto returnResult;
509 }
510
511
512 break;
513 }
514 }
515 // couldn't find the reset anchor, so the collators are not equal
516 if(j == sourceListLen) {
517 result = FALSE;
518 goto returnResult;
519 }
520 }
521 }
522
523 returnResult:
524 ucol_tok_closeTokenList(&sourceParser);
525 ucol_tok_closeTokenList(&targetParser);
526 return result;
527
528 }
529
530 U_CAPI int32_t U_EXPORT2
ucol_getDisplayName(const char * objLoc,const char * dispLoc,UChar * result,int32_t resultLength,UErrorCode * status)531 ucol_getDisplayName( const char *objLoc,
532 const char *dispLoc,
533 UChar *result,
534 int32_t resultLength,
535 UErrorCode *status)
536 {
537 U_NAMESPACE_USE
538
539 if(U_FAILURE(*status)) return -1;
540 UnicodeString dst;
541 if(!(result==NULL && resultLength==0)) {
542 // NULL destination for pure preflighting: empty dummy string
543 // otherwise, alias the destination buffer
544 dst.setTo(result, 0, resultLength);
545 }
546 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst);
547 return dst.extract(result, resultLength, *status);
548 }
549
550 U_CAPI const char* U_EXPORT2
ucol_getAvailable(int32_t index)551 ucol_getAvailable(int32_t index)
552 {
553 int32_t count = 0;
554 const Locale *loc = Collator::getAvailableLocales(count);
555 if (loc != NULL && index < count) {
556 return loc[index].getName();
557 }
558 return NULL;
559 }
560
561 U_CAPI int32_t U_EXPORT2
ucol_countAvailable()562 ucol_countAvailable()
563 {
564 int32_t count = 0;
565 Collator::getAvailableLocales(count);
566 return count;
567 }
568
569 #if !UCONFIG_NO_SERVICE
570 U_CAPI UEnumeration* U_EXPORT2
ucol_openAvailableLocales(UErrorCode * status)571 ucol_openAvailableLocales(UErrorCode *status) {
572 U_NAMESPACE_USE
573
574 // This is a wrapper over Collator::getAvailableLocales()
575 if (U_FAILURE(*status)) {
576 return NULL;
577 }
578 StringEnumeration *s = Collator::getAvailableLocales();
579 if (s == NULL) {
580 *status = U_MEMORY_ALLOCATION_ERROR;
581 return NULL;
582 }
583 return uenum_openStringEnumeration(s, status);
584 }
585 #endif
586
587 // Note: KEYWORDS[0] != RESOURCE_NAME - alan
588
589 static const char* RESOURCE_NAME = "collations";
590
591 static const char* KEYWORDS[] = { "collation" };
592
593 #define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0]))
594
595 U_CAPI UEnumeration* U_EXPORT2
ucol_getKeywords(UErrorCode * status)596 ucol_getKeywords(UErrorCode *status) {
597 UEnumeration *result = NULL;
598 if (U_SUCCESS(*status)) {
599 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status);
600 }
601 return result;
602 }
603
604 U_CAPI UEnumeration* U_EXPORT2
ucol_getKeywordValues(const char * keyword,UErrorCode * status)605 ucol_getKeywordValues(const char *keyword, UErrorCode *status) {
606 if (U_FAILURE(*status)) {
607 return NULL;
608 }
609 // hard-coded to accept exactly one collation keyword
610 // modify if additional collation keyword is added later
611 if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0)
612 {
613 *status = U_ILLEGAL_ARGUMENT_ERROR;
614 return NULL;
615 }
616 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status);
617 }
618
619 U_CAPI int32_t U_EXPORT2
ucol_getFunctionalEquivalent(char * result,int32_t resultCapacity,const char * keyword,const char * locale,UBool * isAvailable,UErrorCode * status)620 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
621 const char* keyword, const char* locale,
622 UBool* isAvailable, UErrorCode* status)
623 {
624 // N.B.: Resource name is "collations" but keyword is "collation"
625 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL,
626 "collations", keyword, locale,
627 isAvailable, TRUE, status);
628 }
629
630 /* returns the locale name the collation data comes from */
631 U_CAPI const char * U_EXPORT2
ucol_getLocale(const UCollator * coll,ULocDataLocaleType type,UErrorCode * status)632 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
633 return ucol_getLocaleByType(coll, type, status);
634 }
635
636 U_CAPI const char * U_EXPORT2
ucol_getLocaleByType(const UCollator * coll,ULocDataLocaleType type,UErrorCode * status)637 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
638 const char *result = NULL;
639 if(status == NULL || U_FAILURE(*status)) {
640 return NULL;
641 }
642 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
643 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
644
645 switch(type) {
646 case ULOC_ACTUAL_LOCALE:
647 // validLocale is set only if service registration has explicitly set the
648 // requested and valid locales. if this is the case, the actual locale
649 // is considered to be the valid locale.
650 if (coll->validLocale != NULL) {
651 result = coll->validLocale;
652 } else if(coll->elements != NULL) {
653 result = ures_getLocale(coll->elements, status);
654 }
655 break;
656 case ULOC_VALID_LOCALE:
657 if (coll->validLocale != NULL) {
658 result = coll->validLocale;
659 } else if(coll->rb != NULL) {
660 result = ures_getLocale(coll->rb, status);
661 }
662 break;
663 case ULOC_REQUESTED_LOCALE:
664 result = coll->requestedLocale;
665 break;
666 default:
667 *status = U_ILLEGAL_ARGUMENT_ERROR;
668 }
669 UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
670 UTRACE_EXIT_STATUS(*status);
671 return result;
672 }
673
674 U_CFUNC void U_EXPORT2
ucol_setReqValidLocales(UCollator * coll,char * requestedLocaleToAdopt,char * validLocaleToAdopt)675 ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt)
676 {
677 if (coll) {
678 if (coll->validLocale) {
679 uprv_free(coll->validLocale);
680 }
681 coll->validLocale = validLocaleToAdopt;
682 if (coll->requestedLocale) { // should always have
683 uprv_free(coll->requestedLocale);
684 }
685 coll->requestedLocale = requestedLocaleToAdopt;
686 }
687 }
688
689 U_CAPI USet * U_EXPORT2
ucol_getTailoredSet(const UCollator * coll,UErrorCode * status)690 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status)
691 {
692 U_NAMESPACE_USE
693
694 if(status == NULL || U_FAILURE(*status)) {
695 return NULL;
696 }
697 if(coll == NULL || coll->UCA == NULL) {
698 *status = U_ILLEGAL_ARGUMENT_ERROR;
699 return NULL;
700 }
701 UParseError parseError;
702 UColTokenParser src;
703 int32_t rulesLen = 0;
704 const UChar *rules = ucol_getRules(coll, &rulesLen);
705 UBool startOfRules = TRUE;
706 // we internally use the C++ class, for the following reasons:
707 // 1. we need to utilize canonical iterator, which is a C++ only class
708 // 2. canonical iterator returns UnicodeStrings - USet cannot take them
709 // 3. USet is internally really UnicodeSet, C is just a wrapper
710 UnicodeSet *tailored = new UnicodeSet();
711 UnicodeString pattern;
712 UnicodeString empty;
713 CanonicalIterator it(empty, *status);
714
715
716 // The idea is to tokenize the rule set. For each non-reset token,
717 // we add all the canonicaly equivalent FCD sequences
718 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, status);
719 while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) {
720 startOfRules = FALSE;
721 if(src.parsedToken.strength != UCOL_TOK_RESET) {
722 const UChar *stuff = src.source+(src.parsedToken.charsOffset);
723 it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status);
724 pattern = it.next();
725 while(!pattern.isBogus()) {
726 if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) {
727 tailored->add(pattern);
728 }
729 pattern = it.next();
730 }
731 }
732 }
733 ucol_tok_closeTokenList(&src);
734 return (USet *)tailored;
735 }
736
737 #endif /* #if !UCONFIG_NO_COLLATION */
738