1 /*
2 ******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 */
7
8 /**
9 * File tblcoll.cpp
10 *
11 * Created by: Helena Shih
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 2/5/97 aliu Added streamIn and streamOut methods. Added
17 * constructor which reads RuleBasedCollator object from
18 * a binary file. Added writeToFile method which streams
19 * RuleBasedCollator out to a binary file. The streamIn
20 * and streamOut methods use istream and ostream objects
21 * in binary mode.
22 * 2/11/97 aliu Moved declarations out of for loop initializer.
23 * Added Mac compatibility #ifdef for ios::nocreate.
24 * 2/12/97 aliu Modified to use TableCollationData sub-object to
25 * hold invariant data.
26 * 2/13/97 aliu Moved several methods into this class from Collation.
27 * Added a private RuleBasedCollator(Locale&) constructor,
28 * to be used by Collator::getInstance(). General
29 * clean up. Made use of UErrorCode variables consistent.
30 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy
31 * constructor and getDynamicClassID.
32 * 3/5/97 aliu Changed compaction cycle to improve performance. We
33 * use the maximum allowable value which is kBlockCount.
34 * Modified getRules() to load rules dynamically. Changed
35 * constructFromFile() call to accomodate this (added
36 * parameter to specify whether binary loading is to
37 * take place).
38 * 05/06/97 helena Added memory allocation error check.
39 * 6/20/97 helena Java class name change.
40 * 6/23/97 helena Adding comments to make code more readable.
41 * 09/03/97 helena Added createCollationKeyValues().
42 * 06/26/98 erm Changes for CollationKeys using byte arrays.
43 * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java
44 * 04/23/99 stephen Removed EDecompositionMode, merged with
45 * Normalizer::EMode
46 * 06/14/99 stephen Removed kResourceBundleSuffix
47 * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx
48 * files are no longer used.
49 * 11/02/99 helena Collator performance enhancements. Special case
50 * for NO_OP situations.
51 * 11/17/99 srl More performance enhancements. Inlined some internal functions.
52 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator
53 * to implementation file.
54 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h)
55 */
56
57 #include <typeinfo> // for 'typeid' to work
58
59 #include "unicode/utypes.h"
60
61 #if !UCONFIG_NO_COLLATION
62
63 #include "unicode/tblcoll.h"
64 #include "unicode/coleitr.h"
65 #include "unicode/ures.h"
66 #include "unicode/uset.h"
67 #include "ucol_imp.h"
68 #include "uresimp.h"
69 #include "uhash.h"
70 #include "cmemory.h"
71 #include "cstring.h"
72 #include "putilimp.h"
73
74 /* public RuleBasedCollator constructor ---------------------------------- */
75
76 U_NAMESPACE_BEGIN
77
78 /**
79 * Copy constructor, aliasing, not write-through
80 */
RuleBasedCollator(const RuleBasedCollator & that)81 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
82 : Collator(that)
83 , dataIsOwned(FALSE)
84 , isWriteThroughAlias(FALSE)
85 , ucollator(NULL)
86 {
87 RuleBasedCollator::operator=(that);
88 }
89
RuleBasedCollator(const UnicodeString & rules,UErrorCode & status)90 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
91 UErrorCode& status) :
92 dataIsOwned(FALSE)
93 {
94 construct(rules,
95 UCOL_DEFAULT_STRENGTH,
96 UCOL_DEFAULT,
97 status);
98 }
99
RuleBasedCollator(const UnicodeString & rules,ECollationStrength collationStrength,UErrorCode & status)100 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
101 ECollationStrength collationStrength,
102 UErrorCode& status) : dataIsOwned(FALSE)
103 {
104 construct(rules,
105 getUCollationStrength(collationStrength),
106 UCOL_DEFAULT,
107 status);
108 }
109
RuleBasedCollator(const UnicodeString & rules,UColAttributeValue decompositionMode,UErrorCode & status)110 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
111 UColAttributeValue decompositionMode,
112 UErrorCode& status) :
113 dataIsOwned(FALSE)
114 {
115 construct(rules,
116 UCOL_DEFAULT_STRENGTH,
117 decompositionMode,
118 status);
119 }
120
RuleBasedCollator(const UnicodeString & rules,ECollationStrength collationStrength,UColAttributeValue decompositionMode,UErrorCode & status)121 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
122 ECollationStrength collationStrength,
123 UColAttributeValue decompositionMode,
124 UErrorCode& status) : dataIsOwned(FALSE)
125 {
126 construct(rules,
127 getUCollationStrength(collationStrength),
128 decompositionMode,
129 status);
130 }
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & status)131 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
132 const RuleBasedCollator *base,
133 UErrorCode &status) :
134 dataIsOwned(TRUE),
135 isWriteThroughAlias(FALSE)
136 {
137 ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
138 }
139
140 void
setRuleStringFromCollator()141 RuleBasedCollator::setRuleStringFromCollator()
142 {
143 int32_t length;
144 const UChar *r = ucol_getRules(ucollator, &length);
145
146 if (r && length > 0) {
147 // alias the rules string
148 urulestring.setTo(TRUE, r, length);
149 }
150 else {
151 urulestring.truncate(0); // Clear string.
152 }
153 }
154
155 // not aliasing, not write-through
156 void
construct(const UnicodeString & rules,UColAttributeValue collationStrength,UColAttributeValue decompositionMode,UErrorCode & status)157 RuleBasedCollator::construct(const UnicodeString& rules,
158 UColAttributeValue collationStrength,
159 UColAttributeValue decompositionMode,
160 UErrorCode& status)
161 {
162 ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
163 decompositionMode, collationStrength,
164 NULL, &status);
165
166 dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
167 isWriteThroughAlias = FALSE;
168
169 if(ucollator == NULL) {
170 if(U_SUCCESS(status)) {
171 status = U_MEMORY_ALLOCATION_ERROR;
172 }
173 return; // Failure
174 }
175
176 setRuleStringFromCollator();
177 }
178
179 /* RuleBasedCollator public destructor ----------------------------------- */
180
~RuleBasedCollator()181 RuleBasedCollator::~RuleBasedCollator()
182 {
183 if (dataIsOwned)
184 {
185 ucol_close(ucollator);
186 }
187 ucollator = 0;
188 }
189
190 /* RuleBaseCollator public methods --------------------------------------- */
191
operator ==(const Collator & that) const192 UBool RuleBasedCollator::operator==(const Collator& that) const
193 {
194 /* only checks for address equals here */
195 if (Collator::operator==(that))
196 return TRUE;
197
198 if (typeid(*this) != typeid(that))
199 return FALSE; /* not the same class */
200
201 RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
202
203 // weiv: use C function, commented code below is wrong
204 return ucol_equals(this->ucollator, thatAlias.ucollator);
205 /*
206 synwee : orginal code does not check for data compatibility
207 */
208 /*
209 if (ucollator != thatAlias.ucollator)
210 return FALSE;
211
212 return TRUE;
213 */
214 }
215
operator !=(const Collator & other) const216 UBool RuleBasedCollator::operator!=(const Collator& other) const
217 {
218 return !(*this == other);
219 }
220
221 // aliasing, not write-through
operator =(const RuleBasedCollator & that)222 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
223 {
224 if (this != &that)
225 {
226 if (dataIsOwned)
227 {
228 ucol_close(ucollator);
229 }
230
231 urulestring.truncate(0); // empty the rule string
232 dataIsOwned = TRUE;
233 isWriteThroughAlias = FALSE;
234
235 UErrorCode intStatus = U_ZERO_ERROR;
236 int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
237 ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize,
238 &intStatus);
239 if (U_SUCCESS(intStatus)) {
240 setRuleStringFromCollator();
241 }
242 }
243 return *this;
244 }
245
246 // aliasing, not write-through
clone() const247 Collator* RuleBasedCollator::clone() const
248 {
249 return new RuleBasedCollator(*this);
250 }
251
252
createCollationElementIterator(const UnicodeString & source) const253 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
254 (const UnicodeString& source) const
255 {
256 UErrorCode status = U_ZERO_ERROR;
257 CollationElementIterator *result = new CollationElementIterator(source, this,
258 status);
259 if (U_FAILURE(status)) {
260 delete result;
261 return NULL;
262 }
263
264 return result;
265 }
266
267 /**
268 * Create a CollationElementIterator object that will iterate over the
269 * elements in a string, using the collation rules defined in this
270 * RuleBasedCollator
271 */
createCollationElementIterator(const CharacterIterator & source) const272 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
273 (const CharacterIterator& source) const
274 {
275 UErrorCode status = U_ZERO_ERROR;
276 CollationElementIterator *result = new CollationElementIterator(source, this,
277 status);
278
279 if (U_FAILURE(status)) {
280 delete result;
281 return NULL;
282 }
283
284 return result;
285 }
286
287 /**
288 * Return a string representation of this collator's rules. The string can
289 * later be passed to the constructor that takes a UnicodeString argument,
290 * which will construct a collator that's functionally identical to this one.
291 * You can also allow users to edit the string in order to change the collation
292 * data, or you can print it out for inspection, or whatever.
293 */
getRules() const294 const UnicodeString& RuleBasedCollator::getRules() const
295 {
296 return urulestring;
297 }
298
getRules(UColRuleOption delta,UnicodeString & buffer)299 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
300 {
301 int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
302
303 if (rulesize > 0) {
304 UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
305 if(rules != NULL) {
306 ucol_getRulesEx(ucollator, delta, rules, rulesize);
307 buffer.setTo(rules, rulesize);
308 uprv_free(rules);
309 } else { // couldn't allocate
310 buffer.remove();
311 }
312 }
313 else {
314 buffer.remove();
315 }
316 }
317
318 UnicodeSet *
getTailoredSet(UErrorCode & status) const319 RuleBasedCollator::getTailoredSet(UErrorCode &status) const
320 {
321 if(U_FAILURE(status)) {
322 return NULL;
323 }
324 return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
325 }
326
327
getVersion(UVersionInfo versionInfo) const328 void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
329 {
330 if (versionInfo!=NULL){
331 ucol_getVersion(ucollator, versionInfo);
332 }
333 }
334
compare(const UnicodeString & source,const UnicodeString & target,int32_t length) const335 Collator::EComparisonResult RuleBasedCollator::compare(
336 const UnicodeString& source,
337 const UnicodeString& target,
338 int32_t length) const
339 {
340 UErrorCode status = U_ZERO_ERROR;
341 return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
342 }
343
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const344 UCollationResult RuleBasedCollator::compare(
345 const UnicodeString& source,
346 const UnicodeString& target,
347 int32_t length,
348 UErrorCode &status) const
349 {
350 return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
351 }
352
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength) const353 Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
354 int32_t sourceLength,
355 const UChar* target,
356 int32_t targetLength)
357 const
358 {
359 return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
360 target, targetLength));
361 }
362
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const363 UCollationResult RuleBasedCollator::compare(const UChar* source,
364 int32_t sourceLength,
365 const UChar* target,
366 int32_t targetLength,
367 UErrorCode &status) const
368 {
369 if(U_SUCCESS(status)) {
370 return ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
371 } else {
372 return UCOL_EQUAL;
373 }
374 }
375
376 /**
377 * Compare two strings using this collator
378 */
compare(const UnicodeString & source,const UnicodeString & target) const379 Collator::EComparisonResult RuleBasedCollator::compare(
380 const UnicodeString& source,
381 const UnicodeString& target) const
382 {
383 return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(),
384 target.getBuffer(), target.length()));
385 }
386
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const387 UCollationResult RuleBasedCollator::compare(
388 const UnicodeString& source,
389 const UnicodeString& target,
390 UErrorCode &status) const
391 {
392 if(U_SUCCESS(status)) {
393 return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
394 target.getBuffer(), target.length());
395 } else {
396 return UCOL_EQUAL;
397 }
398 }
399
compare(UCharIterator & sIter,UCharIterator & tIter,UErrorCode & status) const400 UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
401 UCharIterator &tIter,
402 UErrorCode &status) const {
403 if(U_SUCCESS(status)) {
404 return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
405 } else {
406 return UCOL_EQUAL;
407 }
408 }
409
410 /**
411 * Retrieve a collation key for the specified string. The key can be compared
412 * with other collation keys using a bitwise comparison (e.g. memcmp) to find
413 * the ordering of their respective source strings. This is handy when doing a
414 * sort, where each sort key must be compared many times.
415 *
416 * The basic algorithm here is to find all of the collation elements for each
417 * character in the source string, convert them to an ASCII representation, and
418 * put them into the collation key. But it's trickier than that. Each
419 * collation element in a string has three components: primary ('A' vs 'B'),
420 * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
421 * at the end of a string takes precedence over a secondary or tertiary
422 * difference earlier in the string.
423 *
424 * To account for this, we put all of the primary orders at the beginning of
425 * the string, followed by the secondary and tertiary orders. Each set of
426 * orders is terminated by nulls so that a key for a string which is a initial
427 * substring of another key will compare less without any special case.
428 *
429 * Here's a hypothetical example, with the collation element represented as a
430 * three-digit number, one digit for primary, one for secondary, etc.
431 *
432 * String: A a B \u00C9
433 * Collation Elements: 101 100 201 511
434 * Collation Key: 1125<null>0001<null>1011<null>
435 *
436 * To make things even trickier, secondary differences (accent marks) are
437 * compared starting at the *end* of the string in languages with French
438 * secondary ordering. But when comparing the accent marks on a single base
439 * character, they are compared from the beginning. To handle this, we reverse
440 * all of the accents that belong to each base character, then we reverse the
441 * entire string of secondary orderings at the end.
442 */
getCollationKey(const UnicodeString & source,CollationKey & sortkey,UErrorCode & status) const443 CollationKey& RuleBasedCollator::getCollationKey(
444 const UnicodeString& source,
445 CollationKey& sortkey,
446 UErrorCode& status) const
447 {
448 return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
449 }
450
getCollationKey(const UChar * source,int32_t sourceLen,CollationKey & sortkey,UErrorCode & status) const451 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
452 int32_t sourceLen,
453 CollationKey& sortkey,
454 UErrorCode& status) const
455 {
456 if (U_FAILURE(status)) {
457 return sortkey.setToBogus();
458 }
459 if (sourceLen < -1 || (source == NULL && sourceLen != 0)) {
460 status = U_ILLEGAL_ARGUMENT_ERROR;
461 return sortkey.setToBogus();
462 }
463
464 if (sourceLen < 0) {
465 sourceLen = u_strlen(source);
466 }
467 if (sourceLen == 0) {
468 return sortkey.reset();
469 }
470
471 uint8_t *result;
472 int32_t resultCapacity;
473 if (sortkey.fCapacity >= (sourceLen * 3)) {
474 // Try to reuse the CollationKey.fBytes.
475 result = sortkey.fBytes;
476 resultCapacity = sortkey.fCapacity;
477 } else {
478 result = NULL;
479 resultCapacity = 0;
480 }
481 int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, source, sourceLen,
482 result, resultCapacity, &status);
483
484 if (U_SUCCESS(status)) {
485 if (result == sortkey.fBytes) {
486 sortkey.setLength(resultLen);
487 } else {
488 sortkey.adopt(result, resultCapacity, resultLen);
489 }
490 } else {
491 if (result != sortkey.fBytes) {
492 uprv_free(result);
493 }
494 sortkey.setToBogus();
495 }
496 return sortkey;
497 }
498
499 /**
500 * Return the maximum length of any expansion sequences that end with the
501 * specified comparison order.
502 * @param order a collation order returned by previous or next.
503 * @return the maximum length of any expansion seuences ending with the
504 * specified order or 1 if collation order does not occur at the end of any
505 * expansion sequence.
506 * @see CollationElementIterator#getMaxExpansion
507 */
getMaxExpansion(int32_t order) const508 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
509 {
510 uint8_t result;
511 UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
512 return result;
513 }
514
cloneRuleData(int32_t & length,UErrorCode & status)515 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
516 UErrorCode &status)
517 {
518 return ucol_cloneRuleData(ucollator, &length, &status);
519 }
520
521
cloneBinary(uint8_t * buffer,int32_t capacity,UErrorCode & status)522 int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
523 {
524 return ucol_cloneBinary(ucollator, buffer, capacity, &status);
525 }
526
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & status)527 void RuleBasedCollator::setAttribute(UColAttribute attr,
528 UColAttributeValue value,
529 UErrorCode &status)
530 {
531 if (U_FAILURE(status))
532 return;
533 checkOwned();
534 ucol_setAttribute(ucollator, attr, value, &status);
535 }
536
getAttribute(UColAttribute attr,UErrorCode & status)537 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
538 UErrorCode &status)
539 {
540 if (U_FAILURE(status))
541 return UCOL_DEFAULT;
542 return ucol_getAttribute(ucollator, attr, &status);
543 }
544
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)545 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
546 checkOwned();
547 return ucol_setVariableTop(ucollator, varTop, len, &status);
548 }
549
setVariableTop(const UnicodeString varTop,UErrorCode & status)550 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) {
551 checkOwned();
552 return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
553 }
554
setVariableTop(const uint32_t varTop,UErrorCode & status)555 void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) {
556 checkOwned();
557 ucol_restoreVariableTop(ucollator, varTop, &status);
558 }
559
getVariableTop(UErrorCode & status) const560 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
561 return ucol_getVariableTop(ucollator, &status);
562 }
563
safeClone(void)564 Collator* RuleBasedCollator::safeClone(void)
565 {
566 UErrorCode intStatus = U_ZERO_ERROR;
567 int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
568 UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
569 &intStatus);
570 if (U_FAILURE(intStatus)) {
571 return NULL;
572 }
573
574 RuleBasedCollator *result = new RuleBasedCollator();
575 // Null pointer check
576 if (result != NULL) {
577 result->ucollator = ucol;
578 result->dataIsOwned = TRUE;
579 result->isWriteThroughAlias = FALSE;
580 setRuleStringFromCollator();
581 }
582
583 return result;
584 }
585
586
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const587 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
588 uint8_t *result, int32_t resultLength)
589 const
590 {
591 return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
592 }
593
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const594 int32_t RuleBasedCollator::getSortKey(const UChar *source,
595 int32_t sourceLength, uint8_t *result,
596 int32_t resultLength) const
597 {
598 return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
599 }
600
getStrength(void) const601 Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
602 {
603 UErrorCode intStatus = U_ZERO_ERROR;
604 return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
605 &intStatus));
606 }
607
setStrength(ECollationStrength newStrength)608 void RuleBasedCollator::setStrength(ECollationStrength newStrength)
609 {
610 checkOwned();
611 UErrorCode intStatus = U_ZERO_ERROR;
612 UCollationStrength strength = getUCollationStrength(newStrength);
613 ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
614 }
615
getReorderCodes(int32_t * dest,int32_t destCapacity,UErrorCode & status) const616 int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
617 int32_t destCapacity,
618 UErrorCode& status) const
619 {
620 return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
621 }
622
setReorderCodes(const int32_t * reorderCodes,int32_t reorderCodesLength,UErrorCode & status)623 void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
624 int32_t reorderCodesLength,
625 UErrorCode& status)
626 {
627 checkOwned();
628 ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
629 }
630
getEquivalentReorderCodes(int32_t reorderCode,int32_t * dest,int32_t destCapacity,UErrorCode & status)631 int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode,
632 int32_t* dest,
633 int32_t destCapacity,
634 UErrorCode& status)
635 {
636 return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status);
637 }
638
639 /**
640 * Create a hash code for this collation. Just hash the main rule table -- that
641 * should be good enough for almost any use.
642 */
hashCode() const643 int32_t RuleBasedCollator::hashCode() const
644 {
645 int32_t length;
646 const UChar *rules = ucol_getRules(ucollator, &length);
647 return uhash_hashUCharsN(rules, length);
648 }
649
650 /**
651 * return the locale of this collator
652 */
getLocale(ULocDataLocaleType type,UErrorCode & status) const653 const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
654 const char *result = ucol_getLocaleByType(ucollator, type, &status);
655 if(result == NULL) {
656 Locale res("");
657 res.setToBogus();
658 return res;
659 } else {
660 return Locale(result);
661 }
662 }
663
664 void
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)665 RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
666 checkOwned();
667 char* rloc = uprv_strdup(requestedLocale.getName());
668 if (rloc) {
669 char* vloc = uprv_strdup(validLocale.getName());
670 if (vloc) {
671 char* aloc = uprv_strdup(actualLocale.getName());
672 if (aloc) {
673 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
674 return;
675 }
676 uprv_free(vloc);
677 }
678 uprv_free(rloc);
679 }
680 }
681
682 // RuleBaseCollatorNew private constructor ----------------------------------
683
RuleBasedCollator()684 RuleBasedCollator::RuleBasedCollator()
685 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
686 {
687 }
688
RuleBasedCollator(const Locale & desiredLocale,UErrorCode & status)689 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
690 UErrorCode& status)
691 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
692 {
693 if (U_FAILURE(status))
694 return;
695
696 /*
697 Try to load, in order:
698 1. The desired locale's collation.
699 2. A fallback of the desired locale.
700 3. The default locale's collation.
701 4. A fallback of the default locale.
702 5. The default collation rules, which contains en_US collation rules.
703
704 To reiterate, we try:
705 Specific:
706 language+country+variant
707 language+country
708 language
709 Default:
710 language+country+variant
711 language+country
712 language
713 Root: (aka DEFAULTRULES)
714 steps 1-5 are handled by resource bundle fallback mechanism.
715 however, in a very unprobable situation that no resource bundle
716 data exists, step 5 is repeated with hardcoded default rules.
717 */
718
719 setUCollator(desiredLocale, status);
720
721 if (U_FAILURE(status))
722 {
723 status = U_ZERO_ERROR;
724
725 setUCollator(kRootLocaleName, status);
726 if (status == U_ZERO_ERROR) {
727 status = U_USING_DEFAULT_WARNING;
728 }
729 }
730
731 if (U_SUCCESS(status))
732 {
733 setRuleStringFromCollator();
734 }
735 }
736
737 void
setUCollator(const char * locale,UErrorCode & status)738 RuleBasedCollator::setUCollator(const char *locale,
739 UErrorCode &status)
740 {
741 if (U_FAILURE(status)) {
742 return;
743 }
744 if (ucollator && dataIsOwned)
745 ucol_close(ucollator);
746 ucollator = ucol_open_internal(locale, &status);
747 dataIsOwned = TRUE;
748 isWriteThroughAlias = FALSE;
749 }
750
751
752 void
checkOwned()753 RuleBasedCollator::checkOwned() {
754 if (!(dataIsOwned || isWriteThroughAlias)) {
755 UErrorCode status = U_ZERO_ERROR;
756 ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
757 setRuleStringFromCollator();
758 dataIsOwned = TRUE;
759 isWriteThroughAlias = FALSE;
760 }
761 }
762
763 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
764
765 U_NAMESPACE_END
766
767 #endif /* #if !UCONFIG_NO_COLLATION */
768