• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 1996-2010, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ******************************************************************************
6  */
7 
8 /**
9  * File tblcoll.cpp
10  *
11  * Created by: Helena Shih
12  *
13  * Modification History:
14  *
15  *  Date        Name        Description
16  *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
17  *                          constructor which reads RuleBasedCollator object from
18  *                          a binary file.  Added writeToFile method which streams
19  *                          RuleBasedCollator out to a binary file.  The streamIn
20  *                          and streamOut methods use istream and ostream objects
21  *                          in binary mode.
22  *  2/11/97     aliu        Moved declarations out of for loop initializer.
23  *                          Added Mac compatibility #ifdef for ios::nocreate.
24  *  2/12/97     aliu        Modified to use TableCollationData sub-object to
25  *                          hold invariant data.
26  *  2/13/97     aliu        Moved several methods into this class from Collation.
27  *                          Added a private RuleBasedCollator(Locale&) constructor,
28  *                          to be used by Collator::getInstance().  General
29  *                          clean up.  Made use of UErrorCode variables consistent.
30  *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
31  *                          constructor and getDynamicClassID.
32  *  3/5/97      aliu        Changed compaction cycle to improve performance.  We
33  *                          use the maximum allowable value which is kBlockCount.
34  *                          Modified getRules() to load rules dynamically.  Changed
35  *                          constructFromFile() call to accomodate this (added
36  *                          parameter to specify whether binary loading is to
37  *                          take place).
38  * 05/06/97     helena      Added memory allocation error check.
39  *  6/20/97     helena      Java class name change.
40  *  6/23/97     helena      Adding comments to make code more readable.
41  * 09/03/97     helena      Added createCollationKeyValues().
42  * 06/26/98     erm         Changes for CollationKeys using byte arrays.
43  * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
44  * 04/23/99     stephen     Removed EDecompositionMode, merged with
45  *                          Normalizer::EMode
46  * 06/14/99     stephen     Removed kResourceBundleSuffix
47  * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
48  *                          files are no longer used.
49  * 11/02/99     helena      Collator performance enhancements.  Special case
50  *                          for NO_OP situations.
51  * 11/17/99     srl         More performance enhancements. Inlined some internal functions.
52  * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
53  *                          to implementation file.
54  * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
55  */
56 
57 #include "unicode/utypeinfo.h"  // for 'typeid' to work
58 
59 #include "unicode/utypes.h"
60 
61 #if !UCONFIG_NO_COLLATION
62 
63 #include "unicode/tblcoll.h"
64 #include "unicode/coleitr.h"
65 #include "unicode/ures.h"
66 #include "unicode/uset.h"
67 #include "ucol_imp.h"
68 #include "uresimp.h"
69 #include "uhash.h"
70 #include "cmemory.h"
71 #include "cstring.h"
72 #include "putilimp.h"
73 
74 /* public RuleBasedCollator constructor ---------------------------------- */
75 
76 U_NAMESPACE_BEGIN
77 
78 /**
79 * Copy constructor, aliasing, not write-through
80 */
RuleBasedCollator(const RuleBasedCollator & that)81 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
82 : Collator(that)
83 , dataIsOwned(FALSE)
84 , isWriteThroughAlias(FALSE)
85 , ucollator(NULL)
86 {
87     RuleBasedCollator::operator=(that);
88 }
89 
RuleBasedCollator(const UnicodeString & rules,UErrorCode & status)90 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
91                                      UErrorCode& status) :
92 dataIsOwned(FALSE)
93 {
94     construct(rules,
95         UCOL_DEFAULT_STRENGTH,
96         UCOL_DEFAULT,
97         status);
98 }
99 
RuleBasedCollator(const UnicodeString & rules,ECollationStrength collationStrength,UErrorCode & status)100 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
101                                      ECollationStrength collationStrength,
102                                      UErrorCode& status) : dataIsOwned(FALSE)
103 {
104     construct(rules,
105         getUCollationStrength(collationStrength),
106         UCOL_DEFAULT,
107         status);
108 }
109 
RuleBasedCollator(const UnicodeString & rules,UColAttributeValue decompositionMode,UErrorCode & status)110 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
111                                      UColAttributeValue decompositionMode,
112                                      UErrorCode& status) :
113 dataIsOwned(FALSE)
114 {
115     construct(rules,
116         UCOL_DEFAULT_STRENGTH,
117         decompositionMode,
118         status);
119 }
120 
RuleBasedCollator(const UnicodeString & rules,ECollationStrength collationStrength,UColAttributeValue decompositionMode,UErrorCode & status)121 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
122                                      ECollationStrength collationStrength,
123                                      UColAttributeValue decompositionMode,
124                                      UErrorCode& status) : dataIsOwned(FALSE)
125 {
126     construct(rules,
127         getUCollationStrength(collationStrength),
128         decompositionMode,
129         status);
130 }
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & status)131 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
132                     const RuleBasedCollator *base,
133                     UErrorCode &status) :
134 dataIsOwned(TRUE),
135 isWriteThroughAlias(FALSE)
136 {
137   ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
138 }
139 
140 void
setRuleStringFromCollator()141 RuleBasedCollator::setRuleStringFromCollator()
142 {
143     int32_t length;
144     const UChar *r = ucol_getRules(ucollator, &length);
145 
146     if (r && length > 0) {
147         // alias the rules string
148         urulestring.setTo(TRUE, r, length);
149     }
150     else {
151         urulestring.truncate(0); // Clear string.
152     }
153 }
154 
155 // not aliasing, not write-through
156 void
construct(const UnicodeString & rules,UColAttributeValue collationStrength,UColAttributeValue decompositionMode,UErrorCode & status)157 RuleBasedCollator::construct(const UnicodeString& rules,
158                              UColAttributeValue collationStrength,
159                              UColAttributeValue decompositionMode,
160                              UErrorCode& status)
161 {
162     ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
163         decompositionMode, collationStrength,
164         NULL, &status);
165 
166     dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
167     isWriteThroughAlias = FALSE;
168 
169     if(ucollator == NULL) {
170         if(U_SUCCESS(status)) {
171             status = U_MEMORY_ALLOCATION_ERROR;
172         }
173         return; // Failure
174     }
175 
176     setRuleStringFromCollator();
177 }
178 
179 /* RuleBasedCollator public destructor ----------------------------------- */
180 
~RuleBasedCollator()181 RuleBasedCollator::~RuleBasedCollator()
182 {
183     if (dataIsOwned)
184     {
185         ucol_close(ucollator);
186     }
187     ucollator = 0;
188 }
189 
190 /* RuleBaseCollator public methods --------------------------------------- */
191 
operator ==(const Collator & that) const192 UBool RuleBasedCollator::operator==(const Collator& that) const
193 {
194   /* only checks for address equals here */
195   if (Collator::operator==(that))
196     return TRUE;
197 
198   if (typeid(*this) != typeid(that))
199     return FALSE;  /* not the same class */
200 
201   RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
202 
203   // weiv: use C function, commented code below is wrong
204   return ucol_equals(this->ucollator, thatAlias.ucollator);
205   /*
206   synwee : orginal code does not check for data compatibility
207   */
208   /*
209   if (ucollator != thatAlias.ucollator)
210     return FALSE;
211 
212   return TRUE;
213   */
214 }
215 
operator !=(const Collator & other) const216 UBool RuleBasedCollator::operator!=(const Collator& other) const
217 {
218     return !(*this == other);
219 }
220 
221 // aliasing, not write-through
operator =(const RuleBasedCollator & that)222 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
223 {
224     if (this != &that)
225     {
226         if (dataIsOwned)
227         {
228             ucol_close(ucollator);
229         }
230 
231         urulestring.truncate(0); // empty the rule string
232         dataIsOwned = TRUE;
233         isWriteThroughAlias = FALSE;
234 
235         UErrorCode intStatus = U_ZERO_ERROR;
236         int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
237         ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize,
238                                         &intStatus);
239         if (U_SUCCESS(intStatus)) {
240             setRuleStringFromCollator();
241         }
242     }
243     return *this;
244 }
245 
246 // aliasing, not write-through
clone() const247 Collator* RuleBasedCollator::clone() const
248 {
249     return new RuleBasedCollator(*this);
250 }
251 
createCollationElementIterator(const UnicodeString & source) const252 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
253                                            (const UnicodeString& source) const
254 {
255     UErrorCode status = U_ZERO_ERROR;
256     CollationElementIterator *result = new CollationElementIterator(source, this,
257                                                                     status);
258     if (U_FAILURE(status)) {
259         delete result;
260         return NULL;
261     }
262 
263     return result;
264 }
265 
266 /**
267 * Create a CollationElementIterator object that will iterate over the
268 * elements in a string, using the collation rules defined in this
269 * RuleBasedCollator
270 */
createCollationElementIterator(const CharacterIterator & source) const271 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
272                                        (const CharacterIterator& source) const
273 {
274     UErrorCode status = U_ZERO_ERROR;
275     CollationElementIterator *result = new CollationElementIterator(source, this,
276                                                                     status);
277 
278     if (U_FAILURE(status)) {
279         delete result;
280         return NULL;
281     }
282 
283     return result;
284 }
285 
286 /**
287 * Return a string representation of this collator's rules. The string can
288 * later be passed to the constructor that takes a UnicodeString argument,
289 * which will construct a collator that's functionally identical to this one.
290 * You can also allow users to edit the string in order to change the collation
291 * data, or you can print it out for inspection, or whatever.
292 */
getRules() const293 const UnicodeString& RuleBasedCollator::getRules() const
294 {
295     return urulestring;
296 }
297 
getRules(UColRuleOption delta,UnicodeString & buffer)298 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
299 {
300     int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
301 
302     if (rulesize > 0) {
303         UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
304         if(rules != NULL) {
305             ucol_getRulesEx(ucollator, delta, rules, rulesize);
306             buffer.setTo(rules, rulesize);
307             uprv_free(rules);
308         } else { // couldn't allocate
309             buffer.remove();
310         }
311     }
312     else {
313         buffer.remove();
314     }
315 }
316 
317 UnicodeSet *
getTailoredSet(UErrorCode & status) const318 RuleBasedCollator::getTailoredSet(UErrorCode &status) const
319 {
320     if(U_FAILURE(status)) {
321         return NULL;
322     }
323     return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
324 }
325 
326 
getVersion(UVersionInfo versionInfo) const327 void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
328 {
329     if (versionInfo!=NULL){
330         ucol_getVersion(ucollator, versionInfo);
331     }
332 }
333 
compare(const UnicodeString & source,const UnicodeString & target,int32_t length) const334 Collator::EComparisonResult RuleBasedCollator::compare(
335                                                const UnicodeString& source,
336                                                const UnicodeString& target,
337                                                int32_t length) const
338 {
339     UErrorCode status = U_ZERO_ERROR;
340     return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
341 }
342 
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const343 UCollationResult RuleBasedCollator::compare(
344                                                const UnicodeString& source,
345                                                const UnicodeString& target,
346                                                int32_t length,
347                                                UErrorCode &status) const
348 {
349     return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
350 }
351 
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength) const352 Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
353                                                        int32_t sourceLength,
354                                                        const UChar* target,
355                                                        int32_t targetLength)
356                                                        const
357 {
358     return  getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
359                                                          target, targetLength));
360 }
361 
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const362 UCollationResult RuleBasedCollator::compare(const UChar* source,
363                                                        int32_t sourceLength,
364                                                        const UChar* target,
365                                                        int32_t targetLength,
366                                                        UErrorCode &status) const
367 {
368     if(U_SUCCESS(status)) {
369         return  ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
370     } else {
371         return UCOL_EQUAL;
372     }
373 }
374 
375 /**
376 * Compare two strings using this collator
377 */
compare(const UnicodeString & source,const UnicodeString & target) const378 Collator::EComparisonResult RuleBasedCollator::compare(
379                                              const UnicodeString& source,
380                                              const UnicodeString& target) const
381 {
382     return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(),
383                                                         target.getBuffer(), target.length()));
384 }
385 
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const386 UCollationResult RuleBasedCollator::compare(
387                                              const UnicodeString& source,
388                                              const UnicodeString& target,
389                                              UErrorCode &status) const
390 {
391     if(U_SUCCESS(status)) {
392         return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
393                                        target.getBuffer(), target.length());
394     } else {
395         return UCOL_EQUAL;
396     }
397 }
398 
compare(UCharIterator & sIter,UCharIterator & tIter,UErrorCode & status) const399 UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
400                                             UCharIterator &tIter,
401                                             UErrorCode &status) const {
402     if(U_SUCCESS(status)) {
403         return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
404     } else {
405         return UCOL_EQUAL;
406     }
407 }
408 
409 /**
410 * Retrieve a collation key for the specified string. The key can be compared
411 * with other collation keys using a bitwise comparison (e.g. memcmp) to find
412 * the ordering of their respective source strings. This is handy when doing a
413 * sort, where each sort key must be compared many times.
414 *
415 * The basic algorithm here is to find all of the collation elements for each
416 * character in the source string, convert them to an ASCII representation, and
417 * put them into the collation key.  But it's trickier than that. Each
418 * collation element in a string has three components: primary ('A' vs 'B'),
419 * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
420 * at the end of a string takes precedence over a secondary or tertiary
421 * difference earlier in the string.
422 *
423 * To account for this, we put all of the primary orders at the beginning of
424 * the string, followed by the secondary and tertiary orders. Each set of
425 * orders is terminated by nulls so that a key for a string which is a initial
426 * substring of another key will compare less without any special case.
427 *
428 * Here's a hypothetical example, with the collation element represented as a
429 * three-digit number, one digit for primary, one for secondary, etc.
430 *
431 * String:              A     a     B    \u00C9
432 * Collation Elements: 101   100   201  511
433 * Collation Key:      1125<null>0001<null>1011<null>
434 *
435 * To make things even trickier, secondary differences (accent marks) are
436 * compared starting at the *end* of the string in languages with French
437 * secondary ordering. But when comparing the accent marks on a single base
438 * character, they are compared from the beginning. To handle this, we reverse
439 * all of the accents that belong to each base character, then we reverse the
440 * entire string of secondary orderings at the end.
441 */
getCollationKey(const UnicodeString & source,CollationKey & sortkey,UErrorCode & status) const442 CollationKey& RuleBasedCollator::getCollationKey(
443                                                   const UnicodeString& source,
444                                                   CollationKey& sortkey,
445                                                   UErrorCode& status) const
446 {
447     return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
448 }
449 
getCollationKey(const UChar * source,int32_t sourceLen,CollationKey & sortkey,UErrorCode & status) const450 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
451                                                     int32_t sourceLen,
452                                                     CollationKey& sortkey,
453                                                     UErrorCode& status) const
454 {
455     if (U_FAILURE(status))
456     {
457         return sortkey.setToBogus();
458     }
459 
460     if ((!source) || (sourceLen == 0)) {
461         return sortkey.reset();
462     }
463 
464     uint8_t *result;
465     int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator,
466                                                       source, sourceLen,
467                                                       &result,
468                                                       &status);
469     sortkey.adopt(result, resultLen);
470     return sortkey;
471 }
472 
473 /**
474  * Return the maximum length of any expansion sequences that end with the
475  * specified comparison order.
476  * @param order a collation order returned by previous or next.
477  * @return the maximum length of any expansion seuences ending with the
478  *         specified order or 1 if collation order does not occur at the end of any
479  *         expansion sequence.
480  * @see CollationElementIterator#getMaxExpansion
481  */
getMaxExpansion(int32_t order) const482 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
483 {
484     uint8_t result;
485     UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
486     return result;
487 }
488 
cloneRuleData(int32_t & length,UErrorCode & status)489 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
490                                               UErrorCode &status)
491 {
492     return ucol_cloneRuleData(ucollator, &length, &status);
493 }
494 
495 
cloneBinary(uint8_t * buffer,int32_t capacity,UErrorCode & status)496 int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
497 {
498   return ucol_cloneBinary(ucollator, buffer, capacity, &status);
499 }
500 
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & status)501 void RuleBasedCollator::setAttribute(UColAttribute attr,
502                                      UColAttributeValue value,
503                                      UErrorCode &status)
504 {
505     if (U_FAILURE(status))
506         return;
507     checkOwned();
508     ucol_setAttribute(ucollator, attr, value, &status);
509 }
510 
getAttribute(UColAttribute attr,UErrorCode & status)511 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
512                                                       UErrorCode &status)
513 {
514     if (U_FAILURE(status))
515         return UCOL_DEFAULT;
516     return ucol_getAttribute(ucollator, attr, &status);
517 }
518 
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)519 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
520     checkOwned();
521     return ucol_setVariableTop(ucollator, varTop, len, &status);
522 }
523 
setVariableTop(const UnicodeString varTop,UErrorCode & status)524 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) {
525     checkOwned();
526     return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
527 }
528 
setVariableTop(const uint32_t varTop,UErrorCode & status)529 void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) {
530     checkOwned();
531     ucol_restoreVariableTop(ucollator, varTop, &status);
532 }
533 
getVariableTop(UErrorCode & status) const534 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
535   return ucol_getVariableTop(ucollator, &status);
536 }
537 
safeClone(void)538 Collator* RuleBasedCollator::safeClone(void)
539 {
540     UErrorCode intStatus = U_ZERO_ERROR;
541     int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
542     UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
543                                     &intStatus);
544     if (U_FAILURE(intStatus)) {
545         return NULL;
546     }
547 
548     RuleBasedCollator *result = new RuleBasedCollator();
549     // Null pointer check
550     if (result != NULL) {
551 	    result->ucollator = ucol;
552 	    result->dataIsOwned = TRUE;
553 	    result->isWriteThroughAlias = FALSE;
554 	    setRuleStringFromCollator();
555     }
556 
557     return result;
558 }
559 
560 
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const561 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
562                                          uint8_t *result, int32_t resultLength)
563                                          const
564 {
565     return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
566 }
567 
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const568 int32_t RuleBasedCollator::getSortKey(const UChar *source,
569                                          int32_t sourceLength, uint8_t *result,
570                                          int32_t resultLength) const
571 {
572     return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
573 }
574 
getStrength(void) const575 Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
576 {
577     UErrorCode intStatus = U_ZERO_ERROR;
578     return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
579                                 &intStatus));
580 }
581 
setStrength(ECollationStrength newStrength)582 void RuleBasedCollator::setStrength(ECollationStrength newStrength)
583 {
584     checkOwned();
585     UErrorCode intStatus = U_ZERO_ERROR;
586     UCollationStrength strength = getUCollationStrength(newStrength);
587     ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
588 }
589 
getReorderCodes(int32_t * dest,int32_t destCapacity,UErrorCode & status) const590 int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
591                                           int32_t destCapacity,
592                                           UErrorCode& status) const
593 {
594     return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
595 }
596 
setReorderCodes(const int32_t * reorderCodes,int32_t reorderCodesLength,UErrorCode & status)597 void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
598                                        int32_t reorderCodesLength,
599                                        UErrorCode& status)
600 {
601     ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
602 }
603 
604 
605 /**
606 * Create a hash code for this collation. Just hash the main rule table -- that
607 * should be good enough for almost any use.
608 */
hashCode() const609 int32_t RuleBasedCollator::hashCode() const
610 {
611     int32_t length;
612     const UChar *rules = ucol_getRules(ucollator, &length);
613     return uhash_hashUCharsN(rules, length);
614 }
615 
616 /**
617 * return the locale of this collator
618 */
getLocale(ULocDataLocaleType type,UErrorCode & status) const619 const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
620     const char *result = ucol_getLocaleByType(ucollator, type, &status);
621     if(result == NULL) {
622         Locale res("");
623         res.setToBogus();
624         return res;
625     } else {
626         return Locale(result);
627     }
628 }
629 
630 void
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)631 RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
632     checkOwned();
633     char* rloc  = uprv_strdup(requestedLocale.getName());
634     if (rloc) {
635         char* vloc = uprv_strdup(validLocale.getName());
636         if (vloc) {
637             char* aloc = uprv_strdup(actualLocale.getName());
638             if (aloc) {
639                 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
640                 return;
641             }
642             uprv_free(vloc);
643         }
644         uprv_free(rloc);
645     }
646 }
647 
648 // RuleBaseCollatorNew private constructor ----------------------------------
649 
RuleBasedCollator()650 RuleBasedCollator::RuleBasedCollator()
651   : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
652 {
653 }
654 
RuleBasedCollator(const Locale & desiredLocale,UErrorCode & status)655 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
656                                            UErrorCode& status)
657  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
658 {
659     if (U_FAILURE(status))
660         return;
661 
662     /*
663     Try to load, in order:
664      1. The desired locale's collation.
665      2. A fallback of the desired locale.
666      3. The default locale's collation.
667      4. A fallback of the default locale.
668      5. The default collation rules, which contains en_US collation rules.
669 
670      To reiterate, we try:
671      Specific:
672       language+country+variant
673       language+country
674       language
675      Default:
676       language+country+variant
677       language+country
678       language
679      Root: (aka DEFAULTRULES)
680      steps 1-5 are handled by resource bundle fallback mechanism.
681      however, in a very unprobable situation that no resource bundle
682      data exists, step 5 is repeated with hardcoded default rules.
683     */
684 
685     setUCollator(desiredLocale, status);
686 
687     if (U_FAILURE(status))
688     {
689         status = U_ZERO_ERROR;
690 
691         setUCollator(kRootLocaleName, status);
692         if (status == U_ZERO_ERROR) {
693             status = U_USING_DEFAULT_WARNING;
694         }
695     }
696 
697     if (U_SUCCESS(status))
698     {
699         setRuleStringFromCollator();
700     }
701 }
702 
703 void
setUCollator(const char * locale,UErrorCode & status)704 RuleBasedCollator::setUCollator(const char *locale,
705                                 UErrorCode &status)
706 {
707     if (U_FAILURE(status))
708         return;
709     if (ucollator && dataIsOwned)
710         ucol_close(ucollator);
711     ucollator = ucol_open_internal(locale, &status);
712     dataIsOwned = TRUE;
713     isWriteThroughAlias = FALSE;
714 }
715 
716 
717 void
checkOwned()718 RuleBasedCollator::checkOwned() {
719     if (!(dataIsOwned || isWriteThroughAlias)) {
720         UErrorCode status = U_ZERO_ERROR;
721         ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
722         setRuleStringFromCollator();
723         dataIsOwned = TRUE;
724         isWriteThroughAlias = FALSE;
725     }
726 }
727 
728 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
729 
730 U_NAMESPACE_END
731 
732 #endif /* #if !UCONFIG_NO_COLLATION */
733