• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 1996-2011, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ******************************************************************************
6  */
7 
8 /**
9  * File tblcoll.cpp
10  *
11  * Created by: Helena Shih
12  *
13  * Modification History:
14  *
15  *  Date        Name        Description
16  *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
17  *                          constructor which reads RuleBasedCollator object from
18  *                          a binary file.  Added writeToFile method which streams
19  *                          RuleBasedCollator out to a binary file.  The streamIn
20  *                          and streamOut methods use istream and ostream objects
21  *                          in binary mode.
22  *  2/11/97     aliu        Moved declarations out of for loop initializer.
23  *                          Added Mac compatibility #ifdef for ios::nocreate.
24  *  2/12/97     aliu        Modified to use TableCollationData sub-object to
25  *                          hold invariant data.
26  *  2/13/97     aliu        Moved several methods into this class from Collation.
27  *                          Added a private RuleBasedCollator(Locale&) constructor,
28  *                          to be used by Collator::getInstance().  General
29  *                          clean up.  Made use of UErrorCode variables consistent.
30  *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
31  *                          constructor and getDynamicClassID.
32  *  3/5/97      aliu        Changed compaction cycle to improve performance.  We
33  *                          use the maximum allowable value which is kBlockCount.
34  *                          Modified getRules() to load rules dynamically.  Changed
35  *                          constructFromFile() call to accomodate this (added
36  *                          parameter to specify whether binary loading is to
37  *                          take place).
38  * 05/06/97     helena      Added memory allocation error check.
39  *  6/20/97     helena      Java class name change.
40  *  6/23/97     helena      Adding comments to make code more readable.
41  * 09/03/97     helena      Added createCollationKeyValues().
42  * 06/26/98     erm         Changes for CollationKeys using byte arrays.
43  * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
44  * 04/23/99     stephen     Removed EDecompositionMode, merged with
45  *                          Normalizer::EMode
46  * 06/14/99     stephen     Removed kResourceBundleSuffix
47  * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
48  *                          files are no longer used.
49  * 11/02/99     helena      Collator performance enhancements.  Special case
50  *                          for NO_OP situations.
51  * 11/17/99     srl         More performance enhancements. Inlined some internal functions.
52  * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
53  *                          to implementation file.
54  * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
55  */
56 
57 #include <typeinfo>  // for 'typeid' to work
58 
59 #include "unicode/utypes.h"
60 
61 #if !UCONFIG_NO_COLLATION
62 
63 #include "unicode/tblcoll.h"
64 #include "unicode/coleitr.h"
65 #include "unicode/ures.h"
66 #include "unicode/uset.h"
67 #include "ucol_imp.h"
68 #include "uresimp.h"
69 #include "uhash.h"
70 #include "cmemory.h"
71 #include "cstring.h"
72 #include "putilimp.h"
73 
74 /* public RuleBasedCollator constructor ---------------------------------- */
75 
76 U_NAMESPACE_BEGIN
77 
78 /**
79 * Copy constructor, aliasing, not write-through
80 */
RuleBasedCollator(const RuleBasedCollator & that)81 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
82 : Collator(that)
83 , dataIsOwned(FALSE)
84 , isWriteThroughAlias(FALSE)
85 , ucollator(NULL)
86 {
87     RuleBasedCollator::operator=(that);
88 }
89 
RuleBasedCollator(const UnicodeString & rules,UErrorCode & status)90 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
91                                      UErrorCode& status) :
92 dataIsOwned(FALSE)
93 {
94     construct(rules,
95         UCOL_DEFAULT_STRENGTH,
96         UCOL_DEFAULT,
97         status);
98 }
99 
RuleBasedCollator(const UnicodeString & rules,ECollationStrength collationStrength,UErrorCode & status)100 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
101                                      ECollationStrength collationStrength,
102                                      UErrorCode& status) : dataIsOwned(FALSE)
103 {
104     construct(rules,
105         getUCollationStrength(collationStrength),
106         UCOL_DEFAULT,
107         status);
108 }
109 
RuleBasedCollator(const UnicodeString & rules,UColAttributeValue decompositionMode,UErrorCode & status)110 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
111                                      UColAttributeValue decompositionMode,
112                                      UErrorCode& status) :
113 dataIsOwned(FALSE)
114 {
115     construct(rules,
116         UCOL_DEFAULT_STRENGTH,
117         decompositionMode,
118         status);
119 }
120 
RuleBasedCollator(const UnicodeString & rules,ECollationStrength collationStrength,UColAttributeValue decompositionMode,UErrorCode & status)121 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
122                                      ECollationStrength collationStrength,
123                                      UColAttributeValue decompositionMode,
124                                      UErrorCode& status) : dataIsOwned(FALSE)
125 {
126     construct(rules,
127         getUCollationStrength(collationStrength),
128         decompositionMode,
129         status);
130 }
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & status)131 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
132                     const RuleBasedCollator *base,
133                     UErrorCode &status) :
134 dataIsOwned(TRUE),
135 isWriteThroughAlias(FALSE)
136 {
137   ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
138 }
139 
140 void
setRuleStringFromCollator()141 RuleBasedCollator::setRuleStringFromCollator()
142 {
143     int32_t length;
144     const UChar *r = ucol_getRules(ucollator, &length);
145 
146     if (r && length > 0) {
147         // alias the rules string
148         urulestring.setTo(TRUE, r, length);
149     }
150     else {
151         urulestring.truncate(0); // Clear string.
152     }
153 }
154 
155 // not aliasing, not write-through
156 void
construct(const UnicodeString & rules,UColAttributeValue collationStrength,UColAttributeValue decompositionMode,UErrorCode & status)157 RuleBasedCollator::construct(const UnicodeString& rules,
158                              UColAttributeValue collationStrength,
159                              UColAttributeValue decompositionMode,
160                              UErrorCode& status)
161 {
162     ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
163         decompositionMode, collationStrength,
164         NULL, &status);
165 
166     dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
167     isWriteThroughAlias = FALSE;
168 
169     if(ucollator == NULL) {
170         if(U_SUCCESS(status)) {
171             status = U_MEMORY_ALLOCATION_ERROR;
172         }
173         return; // Failure
174     }
175 
176     setRuleStringFromCollator();
177 }
178 
179 /* RuleBasedCollator public destructor ----------------------------------- */
180 
~RuleBasedCollator()181 RuleBasedCollator::~RuleBasedCollator()
182 {
183     if (dataIsOwned)
184     {
185         ucol_close(ucollator);
186     }
187     ucollator = 0;
188 }
189 
190 /* RuleBaseCollator public methods --------------------------------------- */
191 
operator ==(const Collator & that) const192 UBool RuleBasedCollator::operator==(const Collator& that) const
193 {
194   /* only checks for address equals here */
195   if (Collator::operator==(that))
196     return TRUE;
197 
198   if (typeid(*this) != typeid(that))
199     return FALSE;  /* not the same class */
200 
201   RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
202 
203   // weiv: use C function, commented code below is wrong
204   return ucol_equals(this->ucollator, thatAlias.ucollator);
205   /*
206   synwee : orginal code does not check for data compatibility
207   */
208   /*
209   if (ucollator != thatAlias.ucollator)
210     return FALSE;
211 
212   return TRUE;
213   */
214 }
215 
operator !=(const Collator & other) const216 UBool RuleBasedCollator::operator!=(const Collator& other) const
217 {
218     return !(*this == other);
219 }
220 
221 // aliasing, not write-through
operator =(const RuleBasedCollator & that)222 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
223 {
224     if (this != &that)
225     {
226         if (dataIsOwned)
227         {
228             ucol_close(ucollator);
229         }
230 
231         urulestring.truncate(0); // empty the rule string
232         dataIsOwned = TRUE;
233         isWriteThroughAlias = FALSE;
234 
235         UErrorCode intStatus = U_ZERO_ERROR;
236         int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
237         ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize,
238                                         &intStatus);
239         if (U_SUCCESS(intStatus)) {
240             setRuleStringFromCollator();
241         }
242     }
243     return *this;
244 }
245 
246 // aliasing, not write-through
clone() const247 Collator* RuleBasedCollator::clone() const
248 {
249     return new RuleBasedCollator(*this);
250 }
251 
252 
createCollationElementIterator(const UnicodeString & source) const253 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
254                                            (const UnicodeString& source) const
255 {
256     UErrorCode status = U_ZERO_ERROR;
257     CollationElementIterator *result = new CollationElementIterator(source, this,
258                                                                     status);
259     if (U_FAILURE(status)) {
260         delete result;
261         return NULL;
262     }
263 
264     return result;
265 }
266 
267 /**
268 * Create a CollationElementIterator object that will iterate over the
269 * elements in a string, using the collation rules defined in this
270 * RuleBasedCollator
271 */
createCollationElementIterator(const CharacterIterator & source) const272 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
273                                        (const CharacterIterator& source) const
274 {
275     UErrorCode status = U_ZERO_ERROR;
276     CollationElementIterator *result = new CollationElementIterator(source, this,
277                                                                     status);
278 
279     if (U_FAILURE(status)) {
280         delete result;
281         return NULL;
282     }
283 
284     return result;
285 }
286 
287 /**
288 * Return a string representation of this collator's rules. The string can
289 * later be passed to the constructor that takes a UnicodeString argument,
290 * which will construct a collator that's functionally identical to this one.
291 * You can also allow users to edit the string in order to change the collation
292 * data, or you can print it out for inspection, or whatever.
293 */
getRules() const294 const UnicodeString& RuleBasedCollator::getRules() const
295 {
296     return urulestring;
297 }
298 
getRules(UColRuleOption delta,UnicodeString & buffer)299 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
300 {
301     int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
302 
303     if (rulesize > 0) {
304         UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
305         if(rules != NULL) {
306             ucol_getRulesEx(ucollator, delta, rules, rulesize);
307             buffer.setTo(rules, rulesize);
308             uprv_free(rules);
309         } else { // couldn't allocate
310             buffer.remove();
311         }
312     }
313     else {
314         buffer.remove();
315     }
316 }
317 
318 UnicodeSet *
getTailoredSet(UErrorCode & status) const319 RuleBasedCollator::getTailoredSet(UErrorCode &status) const
320 {
321     if(U_FAILURE(status)) {
322         return NULL;
323     }
324     return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
325 }
326 
327 
getVersion(UVersionInfo versionInfo) const328 void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
329 {
330     if (versionInfo!=NULL){
331         ucol_getVersion(ucollator, versionInfo);
332     }
333 }
334 
compare(const UnicodeString & source,const UnicodeString & target,int32_t length) const335 Collator::EComparisonResult RuleBasedCollator::compare(
336                                                const UnicodeString& source,
337                                                const UnicodeString& target,
338                                                int32_t length) const
339 {
340     UErrorCode status = U_ZERO_ERROR;
341     return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
342 }
343 
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const344 UCollationResult RuleBasedCollator::compare(
345                                                const UnicodeString& source,
346                                                const UnicodeString& target,
347                                                int32_t length,
348                                                UErrorCode &status) const
349 {
350     return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
351 }
352 
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength) const353 Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
354                                                        int32_t sourceLength,
355                                                        const UChar* target,
356                                                        int32_t targetLength)
357                                                        const
358 {
359     return  getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
360                                                          target, targetLength));
361 }
362 
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const363 UCollationResult RuleBasedCollator::compare(const UChar* source,
364                                                        int32_t sourceLength,
365                                                        const UChar* target,
366                                                        int32_t targetLength,
367                                                        UErrorCode &status) const
368 {
369     if(U_SUCCESS(status)) {
370         return  ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
371     } else {
372         return UCOL_EQUAL;
373     }
374 }
375 
376 /**
377 * Compare two strings using this collator
378 */
compare(const UnicodeString & source,const UnicodeString & target) const379 Collator::EComparisonResult RuleBasedCollator::compare(
380                                              const UnicodeString& source,
381                                              const UnicodeString& target) const
382 {
383     return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(),
384                                                         target.getBuffer(), target.length()));
385 }
386 
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const387 UCollationResult RuleBasedCollator::compare(
388                                              const UnicodeString& source,
389                                              const UnicodeString& target,
390                                              UErrorCode &status) const
391 {
392     if(U_SUCCESS(status)) {
393         return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
394                                        target.getBuffer(), target.length());
395     } else {
396         return UCOL_EQUAL;
397     }
398 }
399 
compare(UCharIterator & sIter,UCharIterator & tIter,UErrorCode & status) const400 UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
401                                             UCharIterator &tIter,
402                                             UErrorCode &status) const {
403     if(U_SUCCESS(status)) {
404         return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
405     } else {
406         return UCOL_EQUAL;
407     }
408 }
409 
410 /**
411 * Retrieve a collation key for the specified string. The key can be compared
412 * with other collation keys using a bitwise comparison (e.g. memcmp) to find
413 * the ordering of their respective source strings. This is handy when doing a
414 * sort, where each sort key must be compared many times.
415 *
416 * The basic algorithm here is to find all of the collation elements for each
417 * character in the source string, convert them to an ASCII representation, and
418 * put them into the collation key.  But it's trickier than that. Each
419 * collation element in a string has three components: primary ('A' vs 'B'),
420 * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
421 * at the end of a string takes precedence over a secondary or tertiary
422 * difference earlier in the string.
423 *
424 * To account for this, we put all of the primary orders at the beginning of
425 * the string, followed by the secondary and tertiary orders. Each set of
426 * orders is terminated by nulls so that a key for a string which is a initial
427 * substring of another key will compare less without any special case.
428 *
429 * Here's a hypothetical example, with the collation element represented as a
430 * three-digit number, one digit for primary, one for secondary, etc.
431 *
432 * String:              A     a     B    \u00C9
433 * Collation Elements: 101   100   201  511
434 * Collation Key:      1125<null>0001<null>1011<null>
435 *
436 * To make things even trickier, secondary differences (accent marks) are
437 * compared starting at the *end* of the string in languages with French
438 * secondary ordering. But when comparing the accent marks on a single base
439 * character, they are compared from the beginning. To handle this, we reverse
440 * all of the accents that belong to each base character, then we reverse the
441 * entire string of secondary orderings at the end.
442 */
getCollationKey(const UnicodeString & source,CollationKey & sortkey,UErrorCode & status) const443 CollationKey& RuleBasedCollator::getCollationKey(
444                                                   const UnicodeString& source,
445                                                   CollationKey& sortkey,
446                                                   UErrorCode& status) const
447 {
448     return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
449 }
450 
getCollationKey(const UChar * source,int32_t sourceLen,CollationKey & sortkey,UErrorCode & status) const451 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
452                                                     int32_t sourceLen,
453                                                     CollationKey& sortkey,
454                                                     UErrorCode& status) const
455 {
456     if (U_FAILURE(status)) {
457         return sortkey.setToBogus();
458     }
459     if (sourceLen < -1 || (source == NULL && sourceLen != 0)) {
460         status = U_ILLEGAL_ARGUMENT_ERROR;
461         return sortkey.setToBogus();
462     }
463 
464     if (sourceLen < 0) {
465         sourceLen = u_strlen(source);
466     }
467     if (sourceLen == 0) {
468         return sortkey.reset();
469     }
470 
471     uint8_t *result;
472     int32_t resultCapacity;
473     if (sortkey.fCapacity >= (sourceLen * 3)) {
474         // Try to reuse the CollationKey.fBytes.
475         result = sortkey.fBytes;
476         resultCapacity = sortkey.fCapacity;
477     } else {
478         result = NULL;
479         resultCapacity = 0;
480     }
481     int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, source, sourceLen,
482                                                       result, resultCapacity, &status);
483 
484     if (U_SUCCESS(status)) {
485         if (result == sortkey.fBytes) {
486             sortkey.setLength(resultLen);
487         } else {
488             sortkey.adopt(result, resultCapacity, resultLen);
489         }
490     } else {
491         if (result != sortkey.fBytes) {
492             uprv_free(result);
493         }
494         sortkey.setToBogus();
495     }
496     return sortkey;
497 }
498 
499 /**
500  * Return the maximum length of any expansion sequences that end with the
501  * specified comparison order.
502  * @param order a collation order returned by previous or next.
503  * @return the maximum length of any expansion seuences ending with the
504  *         specified order or 1 if collation order does not occur at the end of any
505  *         expansion sequence.
506  * @see CollationElementIterator#getMaxExpansion
507  */
getMaxExpansion(int32_t order) const508 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
509 {
510     uint8_t result;
511     UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
512     return result;
513 }
514 
cloneRuleData(int32_t & length,UErrorCode & status)515 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
516                                               UErrorCode &status)
517 {
518     return ucol_cloneRuleData(ucollator, &length, &status);
519 }
520 
521 
cloneBinary(uint8_t * buffer,int32_t capacity,UErrorCode & status)522 int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
523 {
524   return ucol_cloneBinary(ucollator, buffer, capacity, &status);
525 }
526 
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & status)527 void RuleBasedCollator::setAttribute(UColAttribute attr,
528                                      UColAttributeValue value,
529                                      UErrorCode &status)
530 {
531     if (U_FAILURE(status))
532         return;
533     checkOwned();
534     ucol_setAttribute(ucollator, attr, value, &status);
535 }
536 
getAttribute(UColAttribute attr,UErrorCode & status)537 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
538                                                       UErrorCode &status)
539 {
540     if (U_FAILURE(status))
541         return UCOL_DEFAULT;
542     return ucol_getAttribute(ucollator, attr, &status);
543 }
544 
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)545 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
546     checkOwned();
547     return ucol_setVariableTop(ucollator, varTop, len, &status);
548 }
549 
setVariableTop(const UnicodeString varTop,UErrorCode & status)550 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) {
551     checkOwned();
552     return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
553 }
554 
setVariableTop(const uint32_t varTop,UErrorCode & status)555 void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) {
556     checkOwned();
557     ucol_restoreVariableTop(ucollator, varTop, &status);
558 }
559 
getVariableTop(UErrorCode & status) const560 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
561   return ucol_getVariableTop(ucollator, &status);
562 }
563 
safeClone(void)564 Collator* RuleBasedCollator::safeClone(void)
565 {
566     UErrorCode intStatus = U_ZERO_ERROR;
567     int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
568     UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
569                                     &intStatus);
570     if (U_FAILURE(intStatus)) {
571         return NULL;
572     }
573 
574     RuleBasedCollator *result = new RuleBasedCollator();
575     // Null pointer check
576     if (result != NULL) {
577 	    result->ucollator = ucol;
578 	    result->dataIsOwned = TRUE;
579 	    result->isWriteThroughAlias = FALSE;
580 	    setRuleStringFromCollator();
581     }
582 
583     return result;
584 }
585 
586 
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const587 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
588                                          uint8_t *result, int32_t resultLength)
589                                          const
590 {
591     return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
592 }
593 
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const594 int32_t RuleBasedCollator::getSortKey(const UChar *source,
595                                          int32_t sourceLength, uint8_t *result,
596                                          int32_t resultLength) const
597 {
598     return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
599 }
600 
getStrength(void) const601 Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
602 {
603     UErrorCode intStatus = U_ZERO_ERROR;
604     return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
605                                 &intStatus));
606 }
607 
setStrength(ECollationStrength newStrength)608 void RuleBasedCollator::setStrength(ECollationStrength newStrength)
609 {
610     checkOwned();
611     UErrorCode intStatus = U_ZERO_ERROR;
612     UCollationStrength strength = getUCollationStrength(newStrength);
613     ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
614 }
615 
getReorderCodes(int32_t * dest,int32_t destCapacity,UErrorCode & status) const616 int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
617                                           int32_t destCapacity,
618                                           UErrorCode& status) const
619 {
620     return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
621 }
622 
setReorderCodes(const int32_t * reorderCodes,int32_t reorderCodesLength,UErrorCode & status)623 void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
624                                        int32_t reorderCodesLength,
625                                        UErrorCode& status)
626 {
627     checkOwned();
628     ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
629 }
630 
getEquivalentReorderCodes(int32_t reorderCode,int32_t * dest,int32_t destCapacity,UErrorCode & status)631 int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode,
632                                 int32_t* dest,
633                                 int32_t destCapacity,
634                                 UErrorCode& status)
635 {
636     return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status);
637 }
638 
639 /**
640 * Create a hash code for this collation. Just hash the main rule table -- that
641 * should be good enough for almost any use.
642 */
hashCode() const643 int32_t RuleBasedCollator::hashCode() const
644 {
645     int32_t length;
646     const UChar *rules = ucol_getRules(ucollator, &length);
647     return uhash_hashUCharsN(rules, length);
648 }
649 
650 /**
651 * return the locale of this collator
652 */
getLocale(ULocDataLocaleType type,UErrorCode & status) const653 const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
654     const char *result = ucol_getLocaleByType(ucollator, type, &status);
655     if(result == NULL) {
656         Locale res("");
657         res.setToBogus();
658         return res;
659     } else {
660         return Locale(result);
661     }
662 }
663 
664 void
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)665 RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
666     checkOwned();
667     char* rloc  = uprv_strdup(requestedLocale.getName());
668     if (rloc) {
669         char* vloc = uprv_strdup(validLocale.getName());
670         if (vloc) {
671             char* aloc = uprv_strdup(actualLocale.getName());
672             if (aloc) {
673                 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
674                 return;
675             }
676             uprv_free(vloc);
677         }
678         uprv_free(rloc);
679     }
680 }
681 
682 // RuleBaseCollatorNew private constructor ----------------------------------
683 
RuleBasedCollator()684 RuleBasedCollator::RuleBasedCollator()
685   : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
686 {
687 }
688 
RuleBasedCollator(const Locale & desiredLocale,UErrorCode & status)689 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
690                                            UErrorCode& status)
691  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
692 {
693     if (U_FAILURE(status))
694         return;
695 
696     /*
697     Try to load, in order:
698      1. The desired locale's collation.
699      2. A fallback of the desired locale.
700      3. The default locale's collation.
701      4. A fallback of the default locale.
702      5. The default collation rules, which contains en_US collation rules.
703 
704      To reiterate, we try:
705      Specific:
706       language+country+variant
707       language+country
708       language
709      Default:
710       language+country+variant
711       language+country
712       language
713      Root: (aka DEFAULTRULES)
714      steps 1-5 are handled by resource bundle fallback mechanism.
715      however, in a very unprobable situation that no resource bundle
716      data exists, step 5 is repeated with hardcoded default rules.
717     */
718 
719     setUCollator(desiredLocale, status);
720 
721     if (U_FAILURE(status))
722     {
723         status = U_ZERO_ERROR;
724 
725         setUCollator(kRootLocaleName, status);
726         if (status == U_ZERO_ERROR) {
727             status = U_USING_DEFAULT_WARNING;
728         }
729     }
730 
731     if (U_SUCCESS(status))
732     {
733         setRuleStringFromCollator();
734     }
735 }
736 
737 void
setUCollator(const char * locale,UErrorCode & status)738 RuleBasedCollator::setUCollator(const char *locale,
739                                 UErrorCode &status)
740 {
741     if (U_FAILURE(status)) {
742         return;
743     }
744     if (ucollator && dataIsOwned)
745         ucol_close(ucollator);
746     ucollator = ucol_open_internal(locale, &status);
747     dataIsOwned = TRUE;
748     isWriteThroughAlias = FALSE;
749 }
750 
751 
752 void
checkOwned()753 RuleBasedCollator::checkOwned() {
754     if (!(dataIsOwned || isWriteThroughAlias)) {
755         UErrorCode status = U_ZERO_ERROR;
756         ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
757         setRuleStringFromCollator();
758         dataIsOwned = TRUE;
759         isWriteThroughAlias = FALSE;
760     }
761 }
762 
763 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
764 
765 U_NAMESPACE_END
766 
767 #endif /* #if !UCONFIG_NO_COLLATION */
768