• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ******************************************************************************
5  * Copyright (C) 1996-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ******************************************************************************
8  */
9 
10 /**
11  * File coll.cpp
12  *
13  * Created by: Helena Shih
14  *
15  * Modification History:
16  *
17  *  Date        Name        Description
18  *  2/5/97      aliu        Modified createDefault to load collation data from
19  *                          binary files when possible.  Added related methods
20  *                          createCollationFromFile, chopLocale, createPathName.
21  *  2/11/97     aliu        Added methods addToCache, findInCache, which implement
22  *                          a Collation cache.  Modified createDefault to look in
23  *                          cache first, and also to store newly created Collation
24  *                          objects in the cache.  Modified to not use gLocPath.
25  *  2/12/97     aliu        Modified to create objects from RuleBasedCollator cache.
26  *                          Moved cache out of Collation class.
27  *  2/13/97     aliu        Moved several methods out of this class and into
28  *                          RuleBasedCollator, with modifications.  Modified
29  *                          createDefault() to call new RuleBasedCollator(Locale&)
30  *                          constructor.  General clean up and documentation.
31  *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
32  *                          constructor.
33  * 05/06/97     helena      Added memory allocation error detection.
34  * 05/08/97     helena      Added createInstance().
35  *  6/20/97     helena      Java class name change.
36  * 04/23/99     stephen     Removed EDecompositionMode, merged with
37  *                          Normalizer::EMode
38  * 11/23/9      srl         Inlining of some critical functions
39  * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
40  * 2012-2014    markus      Rewritten in C++ again.
41  */
42 
43 #include "utypeinfo.h"  // for 'typeid' to work
44 
45 #include "unicode/utypes.h"
46 
47 #if !UCONFIG_NO_COLLATION
48 
49 #include "unicode/coll.h"
50 #include "unicode/tblcoll.h"
51 #include "collationdata.h"
52 #include "collationroot.h"
53 #include "collationtailoring.h"
54 #include "ucol_imp.h"
55 #include "cstring.h"
56 #include "cmemory.h"
57 #include "umutex.h"
58 #include "servloc.h"
59 #include "uassert.h"
60 #include "ustrenum.h"
61 #include "uresimp.h"
62 #include "ucln_in.h"
63 
64 static icu::Locale* availableLocaleList = nullptr;
65 static int32_t  availableLocaleListCount;
66 #if !UCONFIG_NO_SERVICE
67 static icu::ICULocaleService* gService = nullptr;
68 static icu::UInitOnce gServiceInitOnce {};
69 #endif
70 static icu::UInitOnce gAvailableLocaleListInitOnce {};
71 
72 /**
73  * Release all static memory held by collator.
74  */
75 U_CDECL_BEGIN
collator_cleanup()76 static UBool U_CALLCONV collator_cleanup() {
77 #if !UCONFIG_NO_SERVICE
78     if (gService) {
79         delete gService;
80         gService = nullptr;
81     }
82     gServiceInitOnce.reset();
83 #endif
84     if (availableLocaleList) {
85         delete []availableLocaleList;
86         availableLocaleList = nullptr;
87     }
88     availableLocaleListCount = 0;
89     gAvailableLocaleListInitOnce.reset();
90     return true;
91 }
92 
93 U_CDECL_END
94 
95 U_NAMESPACE_BEGIN
96 
97 #if !UCONFIG_NO_SERVICE
98 
99 // ------------------------------------------
100 //
101 // Registration
102 //
103 
104 //-------------------------------------------
105 
~CollatorFactory()106 CollatorFactory::~CollatorFactory() {}
107 
108 //-------------------------------------------
109 
110 UBool
visible() const111 CollatorFactory::visible() const {
112     return true;
113 }
114 
115 //-------------------------------------------
116 
117 UnicodeString&
getDisplayName(const Locale & objectLocale,const Locale & displayLocale,UnicodeString & result)118 CollatorFactory::getDisplayName(const Locale& objectLocale,
119                                 const Locale& displayLocale,
120                                 UnicodeString& result)
121 {
122   return objectLocale.getDisplayName(displayLocale, result);
123 }
124 
125 // -------------------------------------
126 
127 class ICUCollatorFactory : public ICUResourceBundleFactory {
128  public:
ICUCollatorFactory()129     ICUCollatorFactory() : ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL, -1, US_INV)) { }
130     virtual ~ICUCollatorFactory();
131  protected:
132     virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const override;
133 };
134 
~ICUCollatorFactory()135 ICUCollatorFactory::~ICUCollatorFactory() {}
136 
137 UObject*
create(const ICUServiceKey & key,const ICUService *,UErrorCode & status) const138 ICUCollatorFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const {
139     if (handlesKey(key, status)) {
140         const LocaleKey& lkey = static_cast<const LocaleKey&>(key);
141         Locale loc;
142         // make sure the requested locale is correct
143         // default LocaleFactory uses currentLocale since that's the one vetted by handlesKey
144         // but for ICU rb resources we use the actual one since it will fallback again
145         lkey.canonicalLocale(loc);
146 
147         return Collator::makeInstance(loc, status);
148     }
149     return nullptr;
150 }
151 
152 // -------------------------------------
153 
154 class ICUCollatorService : public ICULocaleService {
155 public:
ICUCollatorService()156     ICUCollatorService()
157         : ICULocaleService(UNICODE_STRING_SIMPLE("Collator"))
158     {
159         UErrorCode status = U_ZERO_ERROR;
160         registerFactory(new ICUCollatorFactory(), status);
161     }
162 
163     virtual ~ICUCollatorService();
164 
cloneInstance(UObject * instance) const165     virtual UObject* cloneInstance(UObject* instance) const override {
166         return ((Collator*)instance)->clone();
167     }
168 
handleDefault(const ICUServiceKey & key,UnicodeString * actualID,UErrorCode & status) const169     virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const override {
170         const LocaleKey* lkey = dynamic_cast<const LocaleKey*>(&key);
171         U_ASSERT(lkey != nullptr);
172         if (actualID) {
173             // Ugly Hack Alert! We return an empty actualID to signal
174             // to callers that this is a default object, not a "real"
175             // service-created object. (TODO remove in 3.0) [aliu]
176             actualID->truncate(0);
177         }
178         Locale loc("");
179         lkey->canonicalLocale(loc);
180         return Collator::makeInstance(loc, status);
181     }
182 
getKey(ICUServiceKey & key,UnicodeString * actualReturn,UErrorCode & status) const183     virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const override {
184         UnicodeString ar;
185         if (actualReturn == nullptr) {
186             actualReturn = &ar;
187         }
188         return (Collator*)ICULocaleService::getKey(key, actualReturn, status);
189     }
190 
isDefault() const191     virtual UBool isDefault() const override {
192         return countFactories() == 1;
193     }
194 };
195 
~ICUCollatorService()196 ICUCollatorService::~ICUCollatorService() {}
197 
198 // -------------------------------------
199 
initService()200 static void U_CALLCONV initService() {
201     gService = new ICUCollatorService();
202     ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
203 }
204 
205 
206 static ICULocaleService*
getService()207 getService()
208 {
209     umtx_initOnce(gServiceInitOnce, &initService);
210     return gService;
211 }
212 
213 // -------------------------------------
214 
215 static inline UBool
hasService()216 hasService()
217 {
218     UBool retVal = !gServiceInitOnce.isReset() && (getService() != nullptr);
219     return retVal;
220 }
221 
222 #endif /* UCONFIG_NO_SERVICE */
223 
224 static void U_CALLCONV
initAvailableLocaleList(UErrorCode & status)225 initAvailableLocaleList(UErrorCode &status) {
226     U_ASSERT(availableLocaleListCount == 0);
227     U_ASSERT(availableLocaleList == nullptr);
228     // for now, there is a hardcoded list, so just walk through that list and set it up.
229     UResourceBundle *index = nullptr;
230     StackUResourceBundle installed;
231     int32_t i = 0;
232 
233     index = ures_openDirect(U_ICUDATA_COLL, "res_index", &status);
234     ures_getByKey(index, "InstalledLocales", installed.getAlias(), &status);
235 
236     if(U_SUCCESS(status)) {
237         availableLocaleListCount = ures_getSize(installed.getAlias());
238         availableLocaleList = new Locale[availableLocaleListCount];
239 
240         if (availableLocaleList != nullptr) {
241             ures_resetIterator(installed.getAlias());
242             while(ures_hasNext(installed.getAlias())) {
243                 const char *tempKey = nullptr;
244                 ures_getNextString(installed.getAlias(), nullptr, &tempKey, &status);
245                 availableLocaleList[i++] = Locale(tempKey);
246             }
247         }
248         U_ASSERT(availableLocaleListCount == i);
249     }
250     ures_close(index);
251     ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
252 }
253 
isAvailableLocaleListInitialized(UErrorCode & status)254 static UBool isAvailableLocaleListInitialized(UErrorCode &status) {
255     umtx_initOnce(gAvailableLocaleListInitOnce, &initAvailableLocaleList, status);
256     return U_SUCCESS(status);
257 }
258 
259 
260 // Collator public methods -----------------------------------------------
261 
262 namespace {
263 
264 static const struct {
265     const char *name;
266     UColAttribute attr;
267 } collAttributes[] = {
268     { "colStrength", UCOL_STRENGTH },
269     { "colBackwards", UCOL_FRENCH_COLLATION },
270     { "colCaseLevel", UCOL_CASE_LEVEL },
271     { "colCaseFirst", UCOL_CASE_FIRST },
272     { "colAlternate", UCOL_ALTERNATE_HANDLING },
273     { "colNormalization", UCOL_NORMALIZATION_MODE },
274     { "colNumeric", UCOL_NUMERIC_COLLATION }
275 };
276 
277 static const struct {
278     const char *name;
279     UColAttributeValue value;
280 } collAttributeValues[] = {
281     { "primary", UCOL_PRIMARY },
282     { "secondary", UCOL_SECONDARY },
283     { "tertiary", UCOL_TERTIARY },
284     { "quaternary", UCOL_QUATERNARY },
285     // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
286     { "identical", UCOL_IDENTICAL },
287     { "no", UCOL_OFF },
288     { "yes", UCOL_ON },
289     { "shifted", UCOL_SHIFTED },
290     { "non-ignorable", UCOL_NON_IGNORABLE },
291     { "lower", UCOL_LOWER_FIRST },
292     { "upper", UCOL_UPPER_FIRST }
293 };
294 
295 static const char *collReorderCodes[UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST] = {
296     "space", "punct", "symbol", "currency", "digit"
297 };
298 
getReorderCode(const char * s)299 int32_t getReorderCode(const char *s) {
300     for (int32_t i = 0; i < UPRV_LENGTHOF(collReorderCodes); ++i) {
301         if (uprv_stricmp(s, collReorderCodes[i]) == 0) {
302             return UCOL_REORDER_CODE_FIRST + i;
303         }
304     }
305     // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
306     // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
307     // Avoid introducing synonyms/aliases.
308     return -1;
309 }
310 
311 /**
312  * Sets collation attributes according to locale keywords. See
313  * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
314  *
315  * Using "alias" keywords and values where defined:
316  * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
317  * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
318  */
setAttributesFromKeywords(const Locale & loc,Collator & coll,UErrorCode & errorCode)319 void setAttributesFromKeywords(const Locale &loc, Collator &coll, UErrorCode &errorCode) {
320     if (U_FAILURE(errorCode)) {
321         return;
322     }
323     if (uprv_strcmp(loc.getName(), loc.getBaseName()) == 0) {
324         // No keywords.
325         return;
326     }
327     char value[1024];  // The reordering value could be long.
328     // Check for collation keywords that were already deprecated
329     // before any were supported in createInstance() (except for "collation").
330     int32_t length = loc.getKeywordValue("colHiraganaQuaternary", value, UPRV_LENGTHOF(value), errorCode);
331     if (U_FAILURE(errorCode)) {
332         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
333         return;
334     }
335     if (length != 0) {
336         errorCode = U_UNSUPPORTED_ERROR;
337         return;
338     }
339     length = loc.getKeywordValue("variableTop", value, UPRV_LENGTHOF(value), errorCode);
340     if (U_FAILURE(errorCode)) {
341         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
342         return;
343     }
344     if (length != 0) {
345         errorCode = U_UNSUPPORTED_ERROR;
346         return;
347     }
348     // Parse known collation keywords, ignore others.
349     if (errorCode == U_STRING_NOT_TERMINATED_WARNING) {
350         errorCode = U_ZERO_ERROR;
351     }
352     for (int32_t i = 0; i < UPRV_LENGTHOF(collAttributes); ++i) {
353         length = loc.getKeywordValue(collAttributes[i].name, value, UPRV_LENGTHOF(value), errorCode);
354         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
355             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
356             return;
357         }
358         if (length == 0) { continue; }
359         for (int32_t j = 0;; ++j) {
360             if (j == UPRV_LENGTHOF(collAttributeValues)) {
361                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
362                 return;
363             }
364             if (uprv_stricmp(value, collAttributeValues[j].name) == 0) {
365                 coll.setAttribute(collAttributes[i].attr, collAttributeValues[j].value, errorCode);
366                 break;
367             }
368         }
369     }
370     length = loc.getKeywordValue("colReorder", value, UPRV_LENGTHOF(value), errorCode);
371     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
372         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
373         return;
374     }
375     if (length != 0) {
376         int32_t codes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
377         int32_t codesLength = 0;
378         char *scriptName = value;
379         for (;;) {
380             if (codesLength == UPRV_LENGTHOF(codes)) {
381                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
382                 return;
383             }
384             char *limit = scriptName;
385             char c;
386             while ((c = *limit) != 0 && c != '-') { ++limit; }
387             *limit = 0;
388             int32_t code;
389             if ((limit - scriptName) == 4) {
390                 // Strict parsing, accept only 4-letter script codes, not long names.
391                 code = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName);
392             } else {
393                 code = getReorderCode(scriptName);
394             }
395             if (code < 0) {
396                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
397                 return;
398             }
399             codes[codesLength++] = code;
400             if (c == 0) { break; }
401             scriptName = limit + 1;
402         }
403         coll.setReorderCodes(codes, codesLength, errorCode);
404     }
405     length = loc.getKeywordValue("kv", value, UPRV_LENGTHOF(value), errorCode);
406     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
407         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
408         return;
409     }
410     if (length != 0) {
411         int32_t code = getReorderCode(value);
412         if (code < 0) {
413             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
414             return;
415         }
416         coll.setMaxVariable((UColReorderCode)code, errorCode);
417     }
418     if (U_FAILURE(errorCode)) {
419         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
420     }
421 }
422 
423 }  // namespace
424 
createInstance(UErrorCode & success)425 Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
426 {
427     return createInstance(Locale::getDefault(), success);
428 }
429 
createInstance(const Locale & desiredLocale,UErrorCode & status)430 Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
431                                    UErrorCode& status)
432 {
433     if (U_FAILURE(status))
434         return 0;
435     if (desiredLocale.isBogus()) {
436         // Locale constructed from malformed locale ID or language tag.
437         status = U_ILLEGAL_ARGUMENT_ERROR;
438         return nullptr;
439     }
440 
441     Collator* coll;
442 #if !UCONFIG_NO_SERVICE
443     if (hasService()) {
444         Locale actualLoc;
445         coll = (Collator*)gService->get(desiredLocale, &actualLoc, status);
446     } else
447 #endif
448     {
449         coll = makeInstance(desiredLocale, status);
450         // Either returns nullptr with U_FAILURE(status), or non-nullptr with U_SUCCESS(status)
451     }
452     // The use of *coll in setAttributesFromKeywords can cause the nullptr check to be
453     // optimized out of the delete even though setAttributesFromKeywords returns
454     // immediately if U_FAILURE(status), so we add a check here.
455     if (U_FAILURE(status)) {
456         return nullptr;
457     }
458     setAttributesFromKeywords(desiredLocale, *coll, status);
459     if (U_FAILURE(status)) {
460         delete coll;
461         return nullptr;
462     }
463     return coll;
464 }
465 
466 
makeInstance(const Locale & desiredLocale,UErrorCode & status)467 Collator* Collator::makeInstance(const Locale&  desiredLocale, UErrorCode& status) {
468     const CollationCacheEntry *entry = CollationLoader::loadTailoring(desiredLocale, status);
469     if (U_SUCCESS(status)) {
470         Collator *result = new RuleBasedCollator(entry);
471         if (result != nullptr) {
472             // Both the unified cache's get() and the RBC constructor
473             // did addRef(). Undo one of them.
474             entry->removeRef();
475             return result;
476         }
477         status = U_MEMORY_ALLOCATION_ERROR;
478     }
479     if (entry != nullptr) {
480         // Undo the addRef() from the cache.get().
481         entry->removeRef();
482     }
483     return nullptr;
484 }
485 
486 Collator *
safeClone() const487 Collator::safeClone() const {
488     return clone();
489 }
490 
491 // implement deprecated, previously abstract method
compare(const UnicodeString & source,const UnicodeString & target) const492 Collator::EComparisonResult Collator::compare(const UnicodeString& source,
493                                     const UnicodeString& target) const
494 {
495     UErrorCode ec = U_ZERO_ERROR;
496     return (EComparisonResult)compare(source, target, ec);
497 }
498 
499 // implement deprecated, previously abstract method
compare(const UnicodeString & source,const UnicodeString & target,int32_t length) const500 Collator::EComparisonResult Collator::compare(const UnicodeString& source,
501                                     const UnicodeString& target,
502                                     int32_t length) const
503 {
504     UErrorCode ec = U_ZERO_ERROR;
505     return (EComparisonResult)compare(source, target, length, ec);
506 }
507 
508 // implement deprecated, previously abstract method
compare(const char16_t * source,int32_t sourceLength,const char16_t * target,int32_t targetLength) const509 Collator::EComparisonResult Collator::compare(const char16_t* source, int32_t sourceLength,
510                                     const char16_t* target, int32_t targetLength)
511                                     const
512 {
513     UErrorCode ec = U_ZERO_ERROR;
514     return (EComparisonResult)compare(source, sourceLength, target, targetLength, ec);
515 }
516 
compare(UCharIterator &,UCharIterator &,UErrorCode & status) const517 UCollationResult Collator::compare(UCharIterator &/*sIter*/,
518                                    UCharIterator &/*tIter*/,
519                                    UErrorCode &status) const {
520     if(U_SUCCESS(status)) {
521         // Not implemented in the base class.
522         status = U_UNSUPPORTED_ERROR;
523     }
524     return UCOL_EQUAL;
525 }
526 
compareUTF8(const StringPiece & source,const StringPiece & target,UErrorCode & status) const527 UCollationResult Collator::compareUTF8(const StringPiece &source,
528                                        const StringPiece &target,
529                                        UErrorCode &status) const {
530     if(U_FAILURE(status)) {
531         return UCOL_EQUAL;
532     }
533     UCharIterator sIter, tIter;
534     uiter_setUTF8(&sIter, source.data(), source.length());
535     uiter_setUTF8(&tIter, target.data(), target.length());
536     return compare(sIter, tIter, status);
537 }
538 
equals(const UnicodeString & source,const UnicodeString & target) const539 UBool Collator::equals(const UnicodeString& source,
540                        const UnicodeString& target) const
541 {
542     UErrorCode ec = U_ZERO_ERROR;
543     return (compare(source, target, ec) == UCOL_EQUAL);
544 }
545 
greaterOrEqual(const UnicodeString & source,const UnicodeString & target) const546 UBool Collator::greaterOrEqual(const UnicodeString& source,
547                                const UnicodeString& target) const
548 {
549     UErrorCode ec = U_ZERO_ERROR;
550     return (compare(source, target, ec) != UCOL_LESS);
551 }
552 
greater(const UnicodeString & source,const UnicodeString & target) const553 UBool Collator::greater(const UnicodeString& source,
554                         const UnicodeString& target) const
555 {
556     UErrorCode ec = U_ZERO_ERROR;
557     return (compare(source, target, ec) == UCOL_GREATER);
558 }
559 
560 // this API  ignores registered collators, since it returns an
561 // array of indefinite lifetime
getAvailableLocales(int32_t & count)562 const Locale* U_EXPORT2 Collator::getAvailableLocales(int32_t& count)
563 {
564     UErrorCode status = U_ZERO_ERROR;
565     Locale *result = nullptr;
566     count = 0;
567     if (isAvailableLocaleListInitialized(status))
568     {
569         result = availableLocaleList;
570         count = availableLocaleListCount;
571     }
572     return result;
573 }
574 
getDisplayName(const Locale & objectLocale,const Locale & displayLocale,UnicodeString & name)575 UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
576                                         const Locale& displayLocale,
577                                         UnicodeString& name)
578 {
579 #if !UCONFIG_NO_SERVICE
580     if (hasService()) {
581         UnicodeString locNameStr;
582         LocaleUtility::initNameFromLocale(objectLocale, locNameStr);
583         return gService->getDisplayName(locNameStr, name, displayLocale);
584     }
585 #endif
586     return objectLocale.getDisplayName(displayLocale, name);
587 }
588 
getDisplayName(const Locale & objectLocale,UnicodeString & name)589 UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
590                                         UnicodeString& name)
591 {
592     return getDisplayName(objectLocale, Locale::getDefault(), name);
593 }
594 
595 /* This is useless information */
596 /*void Collator::getVersion(UVersionInfo versionInfo) const
597 {
598   if (versionInfo!=nullptr)
599     uprv_memcpy(versionInfo, fVersion, U_MAX_VERSION_LENGTH);
600 }
601 */
602 
603 // UCollator protected constructor destructor ----------------------------
604 
605 /**
606 * Default constructor.
607 * Constructor is different from the old default Collator constructor.
608 * The task for determining the default collation strength and normalization mode
609 * is left to the child class.
610 */
Collator()611 Collator::Collator()
612 : UObject()
613 {
614 }
615 
616 /**
617 * Constructor.
618 * Empty constructor, does not handle the arguments.
619 * This constructor is done for backward compatibility with 1.7 and 1.8.
620 * The task for handling the argument collation strength and normalization
621 * mode is left to the child class.
622 * @param collationStrength collation strength
623 * @param decompositionMode
624 * @deprecated 2.4 use the default constructor instead
625 */
Collator(UCollationStrength,UNormalizationMode)626 Collator::Collator(UCollationStrength, UNormalizationMode )
627 : UObject()
628 {
629 }
630 
~Collator()631 Collator::~Collator()
632 {
633 }
634 
Collator(const Collator & other)635 Collator::Collator(const Collator &other)
636     : UObject(other)
637 {
638 }
639 
operator ==(const Collator & other) const640 bool Collator::operator==(const Collator& other) const
641 {
642     // Subclasses: Call this method and then add more specific checks.
643     return typeid(*this) == typeid(other);
644 }
645 
operator !=(const Collator & other) const646 bool Collator::operator!=(const Collator& other) const
647 {
648     return !operator==(other);
649 }
650 
getBound(const uint8_t * source,int32_t sourceLength,UColBoundMode boundType,uint32_t noOfLevels,uint8_t * result,int32_t resultLength,UErrorCode & status)651 int32_t U_EXPORT2 Collator::getBound(const uint8_t       *source,
652                            int32_t             sourceLength,
653                            UColBoundMode       boundType,
654                            uint32_t            noOfLevels,
655                            uint8_t             *result,
656                            int32_t             resultLength,
657                            UErrorCode          &status)
658 {
659     return ucol_getBound(source, sourceLength, boundType, noOfLevels, result, resultLength, &status);
660 }
661 
662 void
setLocales(const Locale &,const Locale &,const Locale &)663 Collator::setLocales(const Locale& /* requestedLocale */, const Locale& /* validLocale */, const Locale& /*actualLocale*/) {
664 }
665 
getTailoredSet(UErrorCode & status) const666 UnicodeSet *Collator::getTailoredSet(UErrorCode &status) const
667 {
668     if(U_FAILURE(status)) {
669         return nullptr;
670     }
671     // everything can be changed
672     return new UnicodeSet(0, 0x10FFFF);
673 }
674 
675 // -------------------------------------
676 
677 #if !UCONFIG_NO_SERVICE
678 URegistryKey U_EXPORT2
registerInstance(Collator * toAdopt,const Locale & locale,UErrorCode & status)679 Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
680 {
681     if (U_SUCCESS(status)) {
682         // Set the collator locales while registering so that createInstance()
683         // need not guess whether the collator's locales are already set properly
684         // (as they are by the data loader).
685         toAdopt->setLocales(locale, locale, locale);
686         return getService()->registerInstance(toAdopt, locale, status);
687     }
688     return nullptr;
689 }
690 
691 // -------------------------------------
692 
693 class CFactory : public LocaleKeyFactory {
694 private:
695     CollatorFactory* _delegate;
696     Hashtable* _ids;
697 
698 public:
CFactory(CollatorFactory * delegate,UErrorCode & status)699     CFactory(CollatorFactory* delegate, UErrorCode& status)
700         : LocaleKeyFactory(delegate->visible() ? VISIBLE : INVISIBLE)
701         , _delegate(delegate)
702         , _ids(nullptr)
703     {
704         if (U_SUCCESS(status)) {
705             int32_t count = 0;
706             _ids = new Hashtable(status);
707             if (_ids) {
708                 const UnicodeString * idlist = _delegate->getSupportedIDs(count, status);
709                 for (int i = 0; i < count; ++i) {
710                     _ids->put(idlist[i], (void*)this, status);
711                     if (U_FAILURE(status)) {
712                         delete _ids;
713                         _ids = nullptr;
714                         return;
715                     }
716                 }
717             } else {
718                 status = U_MEMORY_ALLOCATION_ERROR;
719             }
720         }
721     }
722 
723     virtual ~CFactory();
724 
725     virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const override;
726 
727 protected:
getSupportedIDs(UErrorCode & status) const728     virtual const Hashtable* getSupportedIDs(UErrorCode& status) const override
729     {
730         if (U_SUCCESS(status)) {
731             return _ids;
732         }
733         return nullptr;
734     }
735 
736     virtual UnicodeString&
737         getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const override;
738 };
739 
~CFactory()740 CFactory::~CFactory()
741 {
742     delete _delegate;
743     delete _ids;
744 }
745 
746 UObject*
create(const ICUServiceKey & key,const ICUService *,UErrorCode & status) const747 CFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const
748 {
749     if (handlesKey(key, status)) {
750         const LocaleKey* lkey = dynamic_cast<const LocaleKey*>(&key);
751         U_ASSERT(lkey != nullptr);
752         Locale validLoc;
753         lkey->currentLocale(validLoc);
754         return _delegate->createCollator(validLoc);
755     }
756     return nullptr;
757 }
758 
759 UnicodeString&
getDisplayName(const UnicodeString & id,const Locale & locale,UnicodeString & result) const760 CFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const
761 {
762     if ((_coverage & 0x1) == 0) {
763         UErrorCode status = U_ZERO_ERROR;
764         const Hashtable* ids = getSupportedIDs(status);
765         if (ids && (ids->get(id) != nullptr)) {
766             Locale loc;
767             LocaleUtility::initLocaleFromName(id, loc);
768             return _delegate->getDisplayName(loc, locale, result);
769         }
770     }
771     result.setToBogus();
772     return result;
773 }
774 
775 URegistryKey U_EXPORT2
registerFactory(CollatorFactory * toAdopt,UErrorCode & status)776 Collator::registerFactory(CollatorFactory* toAdopt, UErrorCode& status)
777 {
778     if (U_SUCCESS(status)) {
779         CFactory* f = new CFactory(toAdopt, status);
780         if (f) {
781             return getService()->registerFactory(f, status);
782         }
783         status = U_MEMORY_ALLOCATION_ERROR;
784     }
785     return nullptr;
786 }
787 
788 // -------------------------------------
789 
790 UBool U_EXPORT2
unregister(URegistryKey key,UErrorCode & status)791 Collator::unregister(URegistryKey key, UErrorCode& status)
792 {
793     if (U_SUCCESS(status)) {
794         if (hasService()) {
795             return gService->unregister(key, status);
796         }
797         status = U_ILLEGAL_ARGUMENT_ERROR;
798     }
799     return false;
800 }
801 #endif /* UCONFIG_NO_SERVICE */
802 
803 class CollationLocaleListEnumeration : public StringEnumeration {
804 private:
805     int32_t index;
806 public:
807     static UClassID U_EXPORT2 getStaticClassID();
808     virtual UClassID getDynamicClassID() const override;
809 public:
CollationLocaleListEnumeration()810     CollationLocaleListEnumeration()
811         : index(0)
812     {
813         // The global variables should already be initialized.
814         //isAvailableLocaleListInitialized(status);
815     }
816 
817     virtual ~CollationLocaleListEnumeration();
818 
clone() const819     virtual StringEnumeration * clone() const override
820     {
821         CollationLocaleListEnumeration *result = new CollationLocaleListEnumeration();
822         if (result) {
823             result->index = index;
824         }
825         return result;
826     }
827 
count(UErrorCode &) const828     virtual int32_t count(UErrorCode &/*status*/) const override {
829         return availableLocaleListCount;
830     }
831 
next(int32_t * resultLength,UErrorCode &)832     virtual const char* next(int32_t* resultLength, UErrorCode& /*status*/) override {
833         const char* result;
834         if(index < availableLocaleListCount) {
835             result = availableLocaleList[index++].getName();
836             if(resultLength != nullptr) {
837                 *resultLength = (int32_t)uprv_strlen(result);
838             }
839         } else {
840             if(resultLength != nullptr) {
841                 *resultLength = 0;
842             }
843             result = nullptr;
844         }
845         return result;
846     }
847 
snext(UErrorCode & status)848     virtual const UnicodeString* snext(UErrorCode& status) override {
849         int32_t resultLength = 0;
850         const char *s = next(&resultLength, status);
851         return setChars(s, resultLength, status);
852     }
853 
reset(UErrorCode &)854     virtual void reset(UErrorCode& /*status*/) override {
855         index = 0;
856     }
857 };
858 
~CollationLocaleListEnumeration()859 CollationLocaleListEnumeration::~CollationLocaleListEnumeration() {}
860 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)861 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)
862 
863 
864 // -------------------------------------
865 
866 StringEnumeration* U_EXPORT2
867 Collator::getAvailableLocales()
868 {
869 #if !UCONFIG_NO_SERVICE
870     if (hasService()) {
871         return getService()->getAvailableLocales();
872     }
873 #endif /* UCONFIG_NO_SERVICE */
874     UErrorCode status = U_ZERO_ERROR;
875     if (isAvailableLocaleListInitialized(status)) {
876         return new CollationLocaleListEnumeration();
877     }
878     return nullptr;
879 }
880 
881 StringEnumeration* U_EXPORT2
getKeywords(UErrorCode & status)882 Collator::getKeywords(UErrorCode& status) {
883     return UStringEnumeration::fromUEnumeration(
884             ucol_getKeywords(&status), status);
885 }
886 
887 StringEnumeration* U_EXPORT2
getKeywordValues(const char * keyword,UErrorCode & status)888 Collator::getKeywordValues(const char *keyword, UErrorCode& status) {
889     return UStringEnumeration::fromUEnumeration(
890             ucol_getKeywordValues(keyword, &status), status);
891 }
892 
893 StringEnumeration* U_EXPORT2
getKeywordValuesForLocale(const char * key,const Locale & locale,UBool commonlyUsed,UErrorCode & status)894 Collator::getKeywordValuesForLocale(const char* key, const Locale& locale,
895                                     UBool commonlyUsed, UErrorCode& status) {
896     return UStringEnumeration::fromUEnumeration(
897             ucol_getKeywordValuesForLocale(
898                     key, locale.getName(), commonlyUsed, &status),
899             status);
900 }
901 
902 Locale U_EXPORT2
getFunctionalEquivalent(const char * keyword,const Locale & locale,UBool & isAvailable,UErrorCode & status)903 Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
904                                   UBool& isAvailable, UErrorCode& status) {
905     // This is a wrapper over ucol_getFunctionalEquivalent
906     char loc[ULOC_FULLNAME_CAPACITY];
907     /*int32_t len =*/ ucol_getFunctionalEquivalent(loc, sizeof(loc),
908                     keyword, locale.getName(), &isAvailable, &status);
909     if (U_FAILURE(status)) {
910         *loc = 0; // root
911     }
912     return Locale::createFromName(loc);
913 }
914 
915 Collator::ECollationStrength
getStrength() const916 Collator::getStrength() const {
917     UErrorCode intStatus = U_ZERO_ERROR;
918     return (ECollationStrength)getAttribute(UCOL_STRENGTH, intStatus);
919 }
920 
921 void
setStrength(ECollationStrength newStrength)922 Collator::setStrength(ECollationStrength newStrength) {
923     UErrorCode intStatus = U_ZERO_ERROR;
924     setAttribute(UCOL_STRENGTH, (UColAttributeValue)newStrength, intStatus);
925 }
926 
927 Collator &
setMaxVariable(UColReorderCode,UErrorCode & errorCode)928 Collator::setMaxVariable(UColReorderCode /*group*/, UErrorCode &errorCode) {
929     if (U_SUCCESS(errorCode)) {
930         errorCode = U_UNSUPPORTED_ERROR;
931     }
932     return *this;
933 }
934 
935 UColReorderCode
getMaxVariable() const936 Collator::getMaxVariable() const {
937     return UCOL_REORDER_CODE_PUNCTUATION;
938 }
939 
940 int32_t
getReorderCodes(int32_t *,int32_t,UErrorCode & status) const941 Collator::getReorderCodes(int32_t* /* dest*/,
942                           int32_t /* destCapacity*/,
943                           UErrorCode& status) const
944 {
945     if (U_SUCCESS(status)) {
946         status = U_UNSUPPORTED_ERROR;
947     }
948     return 0;
949 }
950 
951 void
setReorderCodes(const int32_t *,int32_t,UErrorCode & status)952 Collator::setReorderCodes(const int32_t* /* reorderCodes */,
953                           int32_t /* reorderCodesLength */,
954                           UErrorCode& status)
955 {
956     if (U_SUCCESS(status)) {
957         status = U_UNSUPPORTED_ERROR;
958     }
959 }
960 
961 int32_t
getEquivalentReorderCodes(int32_t reorderCode,int32_t * dest,int32_t capacity,UErrorCode & errorCode)962 Collator::getEquivalentReorderCodes(int32_t reorderCode,
963                                     int32_t *dest, int32_t capacity,
964                                     UErrorCode &errorCode) {
965     if(U_FAILURE(errorCode)) { return 0; }
966     if(capacity < 0 || (dest == nullptr && capacity > 0)) {
967         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
968         return 0;
969     }
970     const CollationData *baseData = CollationRoot::getData(errorCode);
971     if(U_FAILURE(errorCode)) { return 0; }
972     return baseData->getEquivalentScripts(reorderCode, dest, capacity, errorCode);
973 }
974 
975 int32_t
internalGetShortDefinitionString(const char *,char *,int32_t,UErrorCode & status) const976 Collator::internalGetShortDefinitionString(const char * /*locale*/,
977                                                              char * /*buffer*/,
978                                                              int32_t /*capacity*/,
979                                                              UErrorCode &status) const {
980   if(U_SUCCESS(status)) {
981     status = U_UNSUPPORTED_ERROR; /* Shouldn't happen, internal function */
982   }
983   return 0;
984 }
985 
986 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const987 Collator::internalCompareUTF8(const char *left, int32_t leftLength,
988                               const char *right, int32_t rightLength,
989                               UErrorCode &errorCode) const {
990     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
991     if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
992         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
993         return UCOL_EQUAL;
994     }
995     return compareUTF8(
996             StringPiece(left, (leftLength < 0) ? static_cast<int32_t>(uprv_strlen(left)) : leftLength),
997             StringPiece(right, (rightLength < 0) ? static_cast<int32_t>(uprv_strlen(right)) : rightLength),
998             errorCode);
999 }
1000 
1001 int32_t
internalNextSortKeyPart(UCharIterator *,uint32_t[2],uint8_t *,int32_t,UErrorCode & errorCode) const1002 Collator::internalNextSortKeyPart(UCharIterator * /*iter*/, uint32_t /*state*/[2],
1003                                   uint8_t * /*dest*/, int32_t /*count*/, UErrorCode &errorCode) const {
1004     if (U_SUCCESS(errorCode)) {
1005         errorCode = U_UNSUPPORTED_ERROR;
1006     }
1007     return 0;
1008 }
1009 
1010 // UCollator private data members ----------------------------------------
1011 
1012 /* This is useless information */
1013 /*const UVersionInfo Collator::fVersion = {1, 1, 0, 0};*/
1014 
1015 // -------------------------------------
1016 
1017 U_NAMESPACE_END
1018 
1019 #endif /* #if !UCONFIG_NO_COLLATION */
1020 
1021 /* eof */
1022