• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 1996-2013, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ******************************************************************************
6  */
7 
8 /**
9  * File tblcoll.cpp
10  *
11  * Created by: Helena Shih
12  *
13  * Modification History:
14  *
15  *  Date        Name        Description
16  *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
17  *                          constructor which reads RuleBasedCollator object from
18  *                          a binary file.  Added writeToFile method which streams
19  *                          RuleBasedCollator out to a binary file.  The streamIn
20  *                          and streamOut methods use istream and ostream objects
21  *                          in binary mode.
22  *  2/11/97     aliu        Moved declarations out of for loop initializer.
23  *                          Added Mac compatibility #ifdef for ios::nocreate.
24  *  2/12/97     aliu        Modified to use TableCollationData sub-object to
25  *                          hold invariant data.
26  *  2/13/97     aliu        Moved several methods into this class from Collation.
27  *                          Added a private RuleBasedCollator(Locale&) constructor,
28  *                          to be used by Collator::getInstance().  General
29  *                          clean up.  Made use of UErrorCode variables consistent.
30  *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
31  *                          constructor and getDynamicClassID.
32  *  3/5/97      aliu        Changed compaction cycle to improve performance.  We
33  *                          use the maximum allowable value which is kBlockCount.
34  *                          Modified getRules() to load rules dynamically.  Changed
35  *                          constructFromFile() call to accomodate this (added
36  *                          parameter to specify whether binary loading is to
37  *                          take place).
38  * 05/06/97     helena      Added memory allocation error check.
39  *  6/20/97     helena      Java class name change.
40  *  6/23/97     helena      Adding comments to make code more readable.
41  * 09/03/97     helena      Added createCollationKeyValues().
42  * 06/26/98     erm         Changes for CollationKeys using byte arrays.
43  * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
44  * 04/23/99     stephen     Removed EDecompositionMode, merged with
45  *                          Normalizer::EMode
46  * 06/14/99     stephen     Removed kResourceBundleSuffix
47  * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
48  *                          files are no longer used.
49  * 11/02/99     helena      Collator performance enhancements.  Special case
50  *                          for NO_OP situations.
51  * 11/17/99     srl         More performance enhancements. Inlined some internal functions.
52  * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
53  *                          to implementation file.
54  * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
55  */
56 
57 #include "unicode/utypes.h"
58 
59 #if !UCONFIG_NO_COLLATION
60 
61 #include "unicode/tblcoll.h"
62 #include "unicode/coleitr.h"
63 #include "unicode/ures.h"
64 #include "unicode/uset.h"
65 #include "ucol_imp.h"
66 #include "uresimp.h"
67 #include "uhash.h"
68 #include "cmemory.h"
69 #include "cstring.h"
70 #include "putilimp.h"
71 #include "ustr_imp.h"
72 
73 /* public RuleBasedCollator constructor ---------------------------------- */
74 
75 U_NAMESPACE_BEGIN
76 
77 /**
78 * Copy constructor, aliasing, not write-through
79 */
RuleBasedCollator(const RuleBasedCollator & that)80 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
81 : Collator(that)
82 , dataIsOwned(FALSE)
83 , isWriteThroughAlias(FALSE)
84 , ucollator(NULL)
85 {
86     RuleBasedCollator::operator=(that);
87 }
88 
RuleBasedCollator(const UnicodeString & rules,UErrorCode & status)89 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
90                                      UErrorCode& status) :
91 dataIsOwned(FALSE)
92 {
93     construct(rules,
94         UCOL_DEFAULT_STRENGTH,
95         UCOL_DEFAULT,
96         status);
97 }
98 
RuleBasedCollator(const UnicodeString & rules,ECollationStrength collationStrength,UErrorCode & status)99 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
100                                      ECollationStrength collationStrength,
101                                      UErrorCode& status) : dataIsOwned(FALSE)
102 {
103     construct(rules,
104         (UColAttributeValue)collationStrength,
105         UCOL_DEFAULT,
106         status);
107 }
108 
RuleBasedCollator(const UnicodeString & rules,UColAttributeValue decompositionMode,UErrorCode & status)109 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
110                                      UColAttributeValue decompositionMode,
111                                      UErrorCode& status) :
112 dataIsOwned(FALSE)
113 {
114     construct(rules,
115         UCOL_DEFAULT_STRENGTH,
116         decompositionMode,
117         status);
118 }
119 
RuleBasedCollator(const UnicodeString & rules,ECollationStrength collationStrength,UColAttributeValue decompositionMode,UErrorCode & status)120 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
121                                      ECollationStrength collationStrength,
122                                      UColAttributeValue decompositionMode,
123                                      UErrorCode& status) : dataIsOwned(FALSE)
124 {
125     construct(rules,
126         (UColAttributeValue)collationStrength,
127         decompositionMode,
128         status);
129 }
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & status)130 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
131                     const RuleBasedCollator *base,
132                     UErrorCode &status) :
133 dataIsOwned(TRUE),
134 isWriteThroughAlias(FALSE)
135 {
136   ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
137 }
138 
139 void
setRuleStringFromCollator()140 RuleBasedCollator::setRuleStringFromCollator()
141 {
142     int32_t length;
143     const UChar *r = ucol_getRules(ucollator, &length);
144 
145     if (r && length > 0) {
146         // alias the rules string
147         urulestring.setTo(TRUE, r, length);
148     }
149     else {
150         urulestring.truncate(0); // Clear string.
151     }
152 }
153 
154 // not aliasing, not write-through
155 void
construct(const UnicodeString & rules,UColAttributeValue collationStrength,UColAttributeValue decompositionMode,UErrorCode & status)156 RuleBasedCollator::construct(const UnicodeString& rules,
157                              UColAttributeValue collationStrength,
158                              UColAttributeValue decompositionMode,
159                              UErrorCode& status)
160 {
161     ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
162         decompositionMode, collationStrength,
163         NULL, &status);
164 
165     dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
166     isWriteThroughAlias = FALSE;
167 
168     if(ucollator == NULL) {
169         if(U_SUCCESS(status)) {
170             status = U_MEMORY_ALLOCATION_ERROR;
171         }
172         return; // Failure
173     }
174 
175     setRuleStringFromCollator();
176 }
177 
178 /* RuleBasedCollator public destructor ----------------------------------- */
179 
~RuleBasedCollator()180 RuleBasedCollator::~RuleBasedCollator()
181 {
182     if (dataIsOwned)
183     {
184         ucol_close(ucollator);
185     }
186     ucollator = 0;
187 }
188 
189 /* RuleBaseCollator public methods --------------------------------------- */
190 
operator ==(const Collator & that) const191 UBool RuleBasedCollator::operator==(const Collator& that) const
192 {
193   /* only checks for address equals here */
194   if (this == &that) {
195     return TRUE;
196   }
197   if (!Collator::operator==(that)) {
198     return FALSE;  /* not the same class */
199   }
200 
201   RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
202 
203   return ucol_equals(this->ucollator, thatAlias.ucollator);
204 }
205 
206 // aliasing, not write-through
operator =(const RuleBasedCollator & that)207 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
208 {
209     if (this == &that) { return *this; }
210 
211     UErrorCode intStatus = U_ZERO_ERROR;
212     UCollator *ucol = ucol_safeClone(that.ucollator, NULL, NULL, &intStatus);
213     if (U_FAILURE(intStatus)) { return *this; }
214 
215     if (dataIsOwned) {
216         ucol_close(ucollator);
217     }
218     ucollator = ucol;
219     dataIsOwned = TRUE;
220     isWriteThroughAlias = FALSE;
221     setRuleStringFromCollator();
222     return *this;
223 }
224 
225 // aliasing, not write-through
clone() const226 Collator* RuleBasedCollator::clone() const
227 {
228     RuleBasedCollator* coll = new RuleBasedCollator(*this);
229     // There is a small chance that the internal ucol_safeClone() call fails.
230     if (coll != NULL && coll->ucollator == NULL) {
231         delete coll;
232         return NULL;
233     }
234     return coll;
235 }
236 
237 
createCollationElementIterator(const UnicodeString & source) const238 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
239                                            (const UnicodeString& source) const
240 {
241     UErrorCode status = U_ZERO_ERROR;
242     CollationElementIterator *result = new CollationElementIterator(source, this,
243                                                                     status);
244     if (U_FAILURE(status)) {
245         delete result;
246         return NULL;
247     }
248 
249     return result;
250 }
251 
252 /**
253 * Create a CollationElementIterator object that will iterate over the
254 * elements in a string, using the collation rules defined in this
255 * RuleBasedCollator
256 */
createCollationElementIterator(const CharacterIterator & source) const257 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
258                                        (const CharacterIterator& source) const
259 {
260     UErrorCode status = U_ZERO_ERROR;
261     CollationElementIterator *result = new CollationElementIterator(source, this,
262                                                                     status);
263 
264     if (U_FAILURE(status)) {
265         delete result;
266         return NULL;
267     }
268 
269     return result;
270 }
271 
272 /**
273 * Return a string representation of this collator's rules. The string can
274 * later be passed to the constructor that takes a UnicodeString argument,
275 * which will construct a collator that's functionally identical to this one.
276 * You can also allow users to edit the string in order to change the collation
277 * data, or you can print it out for inspection, or whatever.
278 */
getRules() const279 const UnicodeString& RuleBasedCollator::getRules() const
280 {
281     return urulestring;
282 }
283 
getRules(UColRuleOption delta,UnicodeString & buffer)284 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
285 {
286     int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
287 
288     if (rulesize > 0) {
289         UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
290         if(rules != NULL) {
291             ucol_getRulesEx(ucollator, delta, rules, rulesize);
292             buffer.setTo(rules, rulesize);
293             uprv_free(rules);
294         } else { // couldn't allocate
295             buffer.remove();
296         }
297     }
298     else {
299         buffer.remove();
300     }
301 }
302 
303 UnicodeSet *
getTailoredSet(UErrorCode & status) const304 RuleBasedCollator::getTailoredSet(UErrorCode &status) const
305 {
306     if(U_FAILURE(status)) {
307         return NULL;
308     }
309     return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
310 }
311 
312 
getVersion(UVersionInfo versionInfo) const313 void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
314 {
315     if (versionInfo!=NULL){
316         ucol_getVersion(ucollator, versionInfo);
317     }
318 }
319 
320 /**
321 * Compare two strings using this collator
322 */
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const323 UCollationResult RuleBasedCollator::compare(
324                                                const UnicodeString& source,
325                                                const UnicodeString& target,
326                                                int32_t length,
327                                                UErrorCode &status) const
328 {
329     return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
330 }
331 
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const332 UCollationResult RuleBasedCollator::compare(const UChar* source,
333                                                        int32_t sourceLength,
334                                                        const UChar* target,
335                                                        int32_t targetLength,
336                                                        UErrorCode &status) const
337 {
338     if(U_SUCCESS(status)) {
339         return  ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
340     } else {
341         return UCOL_EQUAL;
342     }
343 }
344 
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const345 UCollationResult RuleBasedCollator::compare(
346                                              const UnicodeString& source,
347                                              const UnicodeString& target,
348                                              UErrorCode &status) const
349 {
350     if(U_SUCCESS(status)) {
351         return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
352                                        target.getBuffer(), target.length());
353     } else {
354         return UCOL_EQUAL;
355     }
356 }
357 
compare(UCharIterator & sIter,UCharIterator & tIter,UErrorCode & status) const358 UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
359                                             UCharIterator &tIter,
360                                             UErrorCode &status) const {
361     if(U_SUCCESS(status)) {
362         return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
363     } else {
364         return UCOL_EQUAL;
365     }
366 }
367 
368 /**
369 * Retrieve a collation key for the specified string. The key can be compared
370 * with other collation keys using a bitwise comparison (e.g. memcmp) to find
371 * the ordering of their respective source strings. This is handy when doing a
372 * sort, where each sort key must be compared many times.
373 *
374 * The basic algorithm here is to find all of the collation elements for each
375 * character in the source string, convert them to an ASCII representation, and
376 * put them into the collation key.  But it's trickier than that. Each
377 * collation element in a string has three components: primary ('A' vs 'B'),
378 * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
379 * at the end of a string takes precedence over a secondary or tertiary
380 * difference earlier in the string.
381 *
382 * To account for this, we put all of the primary orders at the beginning of
383 * the string, followed by the secondary and tertiary orders. Each set of
384 * orders is terminated by nulls so that a key for a string which is a initial
385 * substring of another key will compare less without any special case.
386 *
387 * Here's a hypothetical example, with the collation element represented as a
388 * three-digit number, one digit for primary, one for secondary, etc.
389 *
390 * String:              A     a     B    \u00C9
391 * Collation Elements: 101   100   201  511
392 * Collation Key:      1125<null>0001<null>1011<null>
393 *
394 * To make things even trickier, secondary differences (accent marks) are
395 * compared starting at the *end* of the string in languages with French
396 * secondary ordering. But when comparing the accent marks on a single base
397 * character, they are compared from the beginning. To handle this, we reverse
398 * all of the accents that belong to each base character, then we reverse the
399 * entire string of secondary orderings at the end.
400 */
getCollationKey(const UnicodeString & source,CollationKey & sortkey,UErrorCode & status) const401 CollationKey& RuleBasedCollator::getCollationKey(
402                                                   const UnicodeString& source,
403                                                   CollationKey& sortkey,
404                                                   UErrorCode& status) const
405 {
406     return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
407 }
408 
getCollationKey(const UChar * source,int32_t sourceLen,CollationKey & sortkey,UErrorCode & status) const409 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
410                                                     int32_t sourceLen,
411                                                     CollationKey& sortkey,
412                                                     UErrorCode& status) const
413 {
414     if (U_FAILURE(status)) {
415         return sortkey.setToBogus();
416     }
417     if (sourceLen < -1 || (source == NULL && sourceLen != 0)) {
418         status = U_ILLEGAL_ARGUMENT_ERROR;
419         return sortkey.setToBogus();
420     }
421 
422     if (sourceLen < 0) {
423         sourceLen = u_strlen(source);
424     }
425     if (sourceLen == 0) {
426         return sortkey.reset();
427     }
428 
429     int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortkey, status);
430 
431     if (U_SUCCESS(status)) {
432         sortkey.setLength(resultLen);
433     } else {
434         sortkey.setToBogus();
435     }
436     return sortkey;
437 }
438 
439 /**
440  * Return the maximum length of any expansion sequences that end with the
441  * specified comparison order.
442  * @param order a collation order returned by previous or next.
443  * @return the maximum length of any expansion seuences ending with the
444  *         specified order or 1 if collation order does not occur at the end of any
445  *         expansion sequence.
446  * @see CollationElementIterator#getMaxExpansion
447  */
getMaxExpansion(int32_t order) const448 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
449 {
450     uint8_t result;
451     UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
452     return result;
453 }
454 
cloneRuleData(int32_t & length,UErrorCode & status)455 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
456                                               UErrorCode &status)
457 {
458     if (U_FAILURE(status)) { return NULL; }
459     LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000));
460     if (buffer.isNull()) {
461         status = U_MEMORY_ALLOCATION_ERROR;
462         return NULL;
463     }
464     length = cloneBinary(buffer.getAlias(), 20000, status);
465     if (status == U_BUFFER_OVERFLOW_ERROR) {
466         if (buffer.allocateInsteadAndCopy(length, 0) == NULL) {
467             status = U_MEMORY_ALLOCATION_ERROR;
468             return NULL;
469         }
470         status = U_ZERO_ERROR;
471         length = cloneBinary(buffer.getAlias(), length, status);
472     }
473     if (U_FAILURE(status)) { return NULL; }
474     return buffer.orphan();
475 }
476 
477 
cloneBinary(uint8_t * buffer,int32_t capacity,UErrorCode & status)478 int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
479 {
480   return ucol_cloneBinary(ucollator, buffer, capacity, &status);
481 }
482 
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & status)483 void RuleBasedCollator::setAttribute(UColAttribute attr,
484                                      UColAttributeValue value,
485                                      UErrorCode &status)
486 {
487     if (U_FAILURE(status))
488         return;
489     checkOwned();
490     ucol_setAttribute(ucollator, attr, value, &status);
491 }
492 
getAttribute(UColAttribute attr,UErrorCode & status) const493 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
494                                                       UErrorCode &status) const
495 {
496     if (U_FAILURE(status))
497         return UCOL_DEFAULT;
498     return ucol_getAttribute(ucollator, attr, &status);
499 }
500 
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)501 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
502     checkOwned();
503     return ucol_setVariableTop(ucollator, varTop, len, &status);
504 }
505 
setVariableTop(const UnicodeString & varTop,UErrorCode & status)506 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &status) {
507     checkOwned();
508     return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
509 }
510 
setVariableTop(uint32_t varTop,UErrorCode & status)511 void RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &status) {
512     checkOwned();
513     ucol_restoreVariableTop(ucollator, varTop, &status);
514 }
515 
getVariableTop(UErrorCode & status) const516 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
517   return ucol_getVariableTop(ucollator, &status);
518 }
519 
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const520 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
521                                          uint8_t *result, int32_t resultLength)
522                                          const
523 {
524     return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
525 }
526 
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const527 int32_t RuleBasedCollator::getSortKey(const UChar *source,
528                                          int32_t sourceLength, uint8_t *result,
529                                          int32_t resultLength) const
530 {
531     return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
532 }
533 
getReorderCodes(int32_t * dest,int32_t destCapacity,UErrorCode & status) const534 int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
535                                           int32_t destCapacity,
536                                           UErrorCode& status) const
537 {
538     return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
539 }
540 
setReorderCodes(const int32_t * reorderCodes,int32_t reorderCodesLength,UErrorCode & status)541 void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
542                                        int32_t reorderCodesLength,
543                                        UErrorCode& status)
544 {
545     checkOwned();
546     ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
547 }
548 
getEquivalentReorderCodes(int32_t reorderCode,int32_t * dest,int32_t destCapacity,UErrorCode & status)549 int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode,
550                                 int32_t* dest,
551                                 int32_t destCapacity,
552                                 UErrorCode& status)
553 {
554     return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status);
555 }
556 
557 /**
558 * Create a hash code for this collation. Just hash the main rule table -- that
559 * should be good enough for almost any use.
560 */
hashCode() const561 int32_t RuleBasedCollator::hashCode() const
562 {
563     int32_t length;
564     const UChar *rules = ucol_getRules(ucollator, &length);
565     return ustr_hashUCharsN(rules, length);
566 }
567 
568 /**
569 * return the locale of this collator
570 */
getLocale(ULocDataLocaleType type,UErrorCode & status) const571 Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
572     const char *result = ucol_getLocaleByType(ucollator, type, &status);
573     if(result == NULL) {
574         Locale res("");
575         res.setToBogus();
576         return res;
577     } else {
578         return Locale(result);
579     }
580 }
581 
582 void
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)583 RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
584     checkOwned();
585     char* rloc  = uprv_strdup(requestedLocale.getName());
586     if (rloc) {
587         char* vloc = uprv_strdup(validLocale.getName());
588         if (vloc) {
589             char* aloc = uprv_strdup(actualLocale.getName());
590             if (aloc) {
591                 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
592                 return;
593             }
594             uprv_free(vloc);
595         }
596         uprv_free(rloc);
597     }
598 }
599 
600 // RuleBaseCollatorNew private constructor ----------------------------------
601 
RuleBasedCollator()602 RuleBasedCollator::RuleBasedCollator()
603   : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
604 {
605 }
606 
RuleBasedCollator(const Locale & desiredLocale,UErrorCode & status)607 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
608                                            UErrorCode& status)
609  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
610 {
611     if (U_FAILURE(status))
612         return;
613 
614     /*
615     Try to load, in order:
616      1. The desired locale's collation.
617      2. A fallback of the desired locale.
618      3. The default locale's collation.
619      4. A fallback of the default locale.
620      5. The default collation rules, which contains en_US collation rules.
621 
622      To reiterate, we try:
623      Specific:
624       language+country+variant
625       language+country
626       language
627      Default:
628       language+country+variant
629       language+country
630       language
631      Root: (aka DEFAULTRULES)
632      steps 1-5 are handled by resource bundle fallback mechanism.
633      however, in a very unprobable situation that no resource bundle
634      data exists, step 5 is repeated with hardcoded default rules.
635     */
636 
637     setUCollator(desiredLocale, status);
638 
639     if (U_FAILURE(status))
640     {
641         status = U_ZERO_ERROR;
642 
643         setUCollator(kRootLocaleName, status);
644         if (status == U_ZERO_ERROR) {
645             status = U_USING_DEFAULT_WARNING;
646         }
647     }
648 
649     if (U_SUCCESS(status))
650     {
651         setRuleStringFromCollator();
652     }
653 }
654 
655 void
setUCollator(const char * locale,UErrorCode & status)656 RuleBasedCollator::setUCollator(const char *locale,
657                                 UErrorCode &status)
658 {
659     if (U_FAILURE(status)) {
660         return;
661     }
662     if (ucollator && dataIsOwned)
663         ucol_close(ucollator);
664     ucollator = ucol_open_internal(locale, &status);
665     dataIsOwned = TRUE;
666     isWriteThroughAlias = FALSE;
667 }
668 
669 
670 void
checkOwned()671 RuleBasedCollator::checkOwned() {
672     if (!(dataIsOwned || isWriteThroughAlias)) {
673         UErrorCode status = U_ZERO_ERROR;
674         ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
675         setRuleStringFromCollator();
676         dataIsOwned = TRUE;
677         isWriteThroughAlias = FALSE;
678     }
679 }
680 
681 
internalGetShortDefinitionString(const char * locale,char * buffer,int32_t capacity,UErrorCode & status) const682 int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
683                                                                       char *buffer,
684                                                                       int32_t capacity,
685                                                                       UErrorCode &status) const {
686   /* simply delegate */
687   return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &status);
688 }
689 
690 
691 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
692 
693 U_NAMESPACE_END
694 
695 #endif /* #if !UCONFIG_NO_COLLATION */
696