• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * rulebasedcollator.cpp
9 *
10 * (replaced the former tblcoll.cpp)
11 *
12 * created on: 2012feb14 with new and old collation code
13 * created by: Markus W. Scherer
14 */
15 
16 #include "unicode/utypes.h"
17 
18 #if !UCONFIG_NO_COLLATION
19 
20 #include "unicode/coll.h"
21 #include "unicode/coleitr.h"
22 #include "unicode/localpointer.h"
23 #include "unicode/locid.h"
24 #include "unicode/sortkey.h"
25 #include "unicode/tblcoll.h"
26 #include "unicode/ucol.h"
27 #include "unicode/uiter.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uniset.h"
30 #include "unicode/unistr.h"
31 #include "unicode/usetiter.h"
32 #include "unicode/utf8.h"
33 #include "unicode/uversion.h"
34 #include "bocsu.h"
35 #include "charstr.h"
36 #include "cmemory.h"
37 #include "collation.h"
38 #include "collationcompare.h"
39 #include "collationdata.h"
40 #include "collationdatareader.h"
41 #include "collationfastlatin.h"
42 #include "collationiterator.h"
43 #include "collationkeys.h"
44 #include "collationroot.h"
45 #include "collationsets.h"
46 #include "collationsettings.h"
47 #include "collationtailoring.h"
48 #include "cstring.h"
49 #include "uassert.h"
50 #include "ucol_imp.h"
51 #include "uhash.h"
52 #include "uitercollationiterator.h"
53 #include "ustr_imp.h"
54 #include "utf16collationiterator.h"
55 #include "utf8collationiterator.h"
56 #include "uvectr64.h"
57 
58 U_NAMESPACE_BEGIN
59 
60 namespace {
61 
62 class FixedSortKeyByteSink : public SortKeyByteSink {
63 public:
FixedSortKeyByteSink(char * dest,int32_t destCapacity)64     FixedSortKeyByteSink(char *dest, int32_t destCapacity)
65             : SortKeyByteSink(dest, destCapacity) {}
66     virtual ~FixedSortKeyByteSink();
67 
68 private:
69     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
70     virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
71 };
72 
~FixedSortKeyByteSink()73 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
74 
75 void
AppendBeyondCapacity(const char * bytes,int32_t,int32_t length)76 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
77     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
78     // Fill the buffer completely.
79     int32_t available = capacity_ - length;
80     if (available > 0) {
81         uprv_memcpy(buffer_ + length, bytes, available);
82     }
83 }
84 
85 UBool
Resize(int32_t,int32_t)86 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
87     return false;
88 }
89 
90 }  // namespace
91 
92 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
93 class CollationKeyByteSink : public SortKeyByteSink {
94 public:
CollationKeyByteSink(CollationKey & key)95     CollationKeyByteSink(CollationKey &key)
96             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
97               key_(key) {}
98     virtual ~CollationKeyByteSink();
99 
100 private:
101     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
102     virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
103 
104     CollationKey &key_;
105 };
106 
~CollationKeyByteSink()107 CollationKeyByteSink::~CollationKeyByteSink() {}
108 
109 void
AppendBeyondCapacity(const char * bytes,int32_t n,int32_t length)110 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
111     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
112     if (Resize(n, length)) {
113         uprv_memcpy(buffer_ + length, bytes, n);
114     }
115 }
116 
117 UBool
Resize(int32_t appendCapacity,int32_t length)118 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
119     if (buffer_ == NULL) {
120         return false;  // allocation failed before already
121     }
122     int32_t newCapacity = 2 * capacity_;
123     int32_t altCapacity = length + 2 * appendCapacity;
124     if (newCapacity < altCapacity) {
125         newCapacity = altCapacity;
126     }
127     if (newCapacity < 200) {
128         newCapacity = 200;
129     }
130     uint8_t *newBuffer = key_.reallocate(newCapacity, length);
131     if (newBuffer == NULL) {
132         SetNotOk();
133         return false;
134     }
135     buffer_ = reinterpret_cast<char *>(newBuffer);
136     capacity_ = newCapacity;
137     return true;
138 }
139 
RuleBasedCollator(const RuleBasedCollator & other)140 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
141         : Collator(other),
142           data(other.data),
143           settings(other.settings),
144           tailoring(other.tailoring),
145           cacheEntry(other.cacheEntry),
146           validLocale(other.validLocale),
147           explicitlySetAttributes(other.explicitlySetAttributes),
148           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
149     settings->addRef();
150     cacheEntry->addRef();
151 }
152 
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & errorCode)153 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
154                                      const RuleBasedCollator *base, UErrorCode &errorCode)
155         : data(NULL),
156           settings(NULL),
157           tailoring(NULL),
158           cacheEntry(NULL),
159           validLocale(""),
160           explicitlySetAttributes(0),
161           actualLocaleIsSameAsValid(false) {
162     if(U_FAILURE(errorCode)) { return; }
163     if(bin == NULL || length == 0 || base == NULL) {
164         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
165         return;
166     }
167     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
168     if(U_FAILURE(errorCode)) { return; }
169     if(base->tailoring != root) {
170         errorCode = U_UNSUPPORTED_ERROR;
171         return;
172     }
173     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
174     if(t.isNull() || t->isBogus()) {
175         errorCode = U_MEMORY_ALLOCATION_ERROR;
176         return;
177     }
178     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
179     if(U_FAILURE(errorCode)) { return; }
180     t->actualLocale.setToBogus();
181     adoptTailoring(t.orphan(), errorCode);
182 }
183 
RuleBasedCollator(const CollationCacheEntry * entry)184 RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
185         : data(entry->tailoring->data),
186           settings(entry->tailoring->settings),
187           tailoring(entry->tailoring),
188           cacheEntry(entry),
189           validLocale(entry->validLocale),
190           explicitlySetAttributes(0),
191           actualLocaleIsSameAsValid(false) {
192     settings->addRef();
193     cacheEntry->addRef();
194 }
195 
~RuleBasedCollator()196 RuleBasedCollator::~RuleBasedCollator() {
197     SharedObject::clearPtr(settings);
198     SharedObject::clearPtr(cacheEntry);
199 }
200 
201 void
adoptTailoring(CollationTailoring * t,UErrorCode & errorCode)202 RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
203     if(U_FAILURE(errorCode)) {
204         t->deleteIfZeroRefCount();
205         return;
206     }
207     U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);
208     cacheEntry = new CollationCacheEntry(t->actualLocale, t);
209     if(cacheEntry == NULL) {
210         errorCode = U_MEMORY_ALLOCATION_ERROR;
211         t->deleteIfZeroRefCount();
212         return;
213     }
214     data = t->data;
215     settings = t->settings;
216     settings->addRef();
217     tailoring = t;
218     cacheEntry->addRef();
219     validLocale = t->actualLocale;
220     actualLocaleIsSameAsValid = false;
221 }
222 
223 RuleBasedCollator *
clone() const224 RuleBasedCollator::clone() const {
225     return new RuleBasedCollator(*this);
226 }
227 
operator =(const RuleBasedCollator & other)228 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
229     if(this == &other) { return *this; }
230     SharedObject::copyPtr(other.settings, settings);
231     tailoring = other.tailoring;
232     SharedObject::copyPtr(other.cacheEntry, cacheEntry);
233     data = tailoring->data;
234     validLocale = other.validLocale;
235     explicitlySetAttributes = other.explicitlySetAttributes;
236     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
237     return *this;
238 }
239 
240 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
241 
242 bool
243 RuleBasedCollator::operator==(const Collator& other) const {
244     if(this == &other) { return true; }
245     if(!Collator::operator==(other)) { return false; }
246     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
247     if(*settings != *o.settings) { return false; }
248     if(data == o.data) { return true; }
249     UBool thisIsRoot = data->base == NULL;
250     UBool otherIsRoot = o.data->base == NULL;
251     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
252     if(thisIsRoot != otherIsRoot) { return false; }
253     if((thisIsRoot || !tailoring->rules.isEmpty()) &&
254             (otherIsRoot || !o.tailoring->rules.isEmpty())) {
255         // Shortcut: If both collators have valid rule strings, then compare those.
256         if(tailoring->rules == o.tailoring->rules) { return true; }
257     }
258     // Different rule strings can result in the same or equivalent tailoring.
259     // The rule strings are optional in ICU resource bundles, although included by default.
260     // cloneBinary() drops the rule string.
261     UErrorCode errorCode = U_ZERO_ERROR;
262     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
263     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
264     if(U_FAILURE(errorCode)) { return false; }
265     if(*thisTailored != *otherTailored) { return false; }
266     // For completeness, we should compare all of the mappings;
267     // or we should create a list of strings, sort it with one collator,
268     // and check if both collators compare adjacent strings the same
269     // (order & strength, down to quaternary); or similar.
270     // Testing equality of collators seems unusual.
271     return true;
272 }
273 
274 int32_t
hashCode() const275 RuleBasedCollator::hashCode() const {
276     int32_t h = settings->hashCode();
277     if(data->base == NULL) { return h; }  // root collator
278     // Do not rely on the rule string, see comments in operator==().
279     UErrorCode errorCode = U_ZERO_ERROR;
280     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
281     if(U_FAILURE(errorCode)) { return 0; }
282     UnicodeSetIterator iter(*set);
283     while(iter.next() && !iter.isString()) {
284         h ^= data->getCE32(iter.getCodepoint());
285     }
286     return h;
287 }
288 
289 void
setLocales(const Locale & requested,const Locale & valid,const Locale & actual)290 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
291                               const Locale &actual) {
292     if(actual == tailoring->actualLocale) {
293         actualLocaleIsSameAsValid = false;
294     } else {
295         U_ASSERT(actual == valid);
296         actualLocaleIsSameAsValid = true;
297     }
298     // Do not modify tailoring.actualLocale:
299     // We cannot be sure that that would be thread-safe.
300     validLocale = valid;
301     (void)requested;  // Ignore, see also ticket #10477.
302 }
303 
304 Locale
getLocale(ULocDataLocaleType type,UErrorCode & errorCode) const305 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
306     if(U_FAILURE(errorCode)) {
307         return Locale::getRoot();
308     }
309     switch(type) {
310     case ULOC_ACTUAL_LOCALE:
311         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
312     case ULOC_VALID_LOCALE:
313         return validLocale;
314     case ULOC_REQUESTED_LOCALE:
315     default:
316         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
317         return Locale::getRoot();
318     }
319 }
320 
321 const char *
internalGetLocaleID(ULocDataLocaleType type,UErrorCode & errorCode) const322 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
323     if(U_FAILURE(errorCode)) {
324         return NULL;
325     }
326     const Locale *result;
327     switch(type) {
328     case ULOC_ACTUAL_LOCALE:
329         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
330         break;
331     case ULOC_VALID_LOCALE:
332         result = &validLocale;
333         break;
334     case ULOC_REQUESTED_LOCALE:
335     default:
336         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
337         return NULL;
338     }
339     if(result->isBogus()) { return NULL; }
340     const char *id = result->getName();
341     return id[0] == 0 ? "root" : id;
342 }
343 
344 const UnicodeString&
getRules() const345 RuleBasedCollator::getRules() const {
346     return tailoring->rules;
347 }
348 
349 void
getRules(UColRuleOption delta,UnicodeString & buffer) const350 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
351     if(delta == UCOL_TAILORING_ONLY) {
352         buffer = tailoring->rules;
353         return;
354     }
355     // UCOL_FULL_RULES
356     buffer.remove();
357     CollationLoader::appendRootRules(buffer);
358     buffer.append(tailoring->rules).getTerminatedBuffer();
359 }
360 
361 void
getVersion(UVersionInfo version) const362 RuleBasedCollator::getVersion(UVersionInfo version) const {
363     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
364     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
365 }
366 
367 UnicodeSet *
getTailoredSet(UErrorCode & errorCode) const368 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
369     if(U_FAILURE(errorCode)) { return NULL; }
370     UnicodeSet *tailored = new UnicodeSet();
371     if(tailored == NULL) {
372         errorCode = U_MEMORY_ALLOCATION_ERROR;
373         return NULL;
374     }
375     if(data->base != NULL) {
376         TailoredSet(tailored).forData(data, errorCode);
377         if(U_FAILURE(errorCode)) {
378             delete tailored;
379             return NULL;
380         }
381     }
382     return tailored;
383 }
384 
385 void
internalGetContractionsAndExpansions(UnicodeSet * contractions,UnicodeSet * expansions,UBool addPrefixes,UErrorCode & errorCode) const386 RuleBasedCollator::internalGetContractionsAndExpansions(
387         UnicodeSet *contractions, UnicodeSet *expansions,
388         UBool addPrefixes, UErrorCode &errorCode) const {
389     if(U_FAILURE(errorCode)) { return; }
390     if(contractions != NULL) {
391         contractions->clear();
392     }
393     if(expansions != NULL) {
394         expansions->clear();
395     }
396     ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
397 }
398 
399 void
internalAddContractions(UChar32 c,UnicodeSet & set,UErrorCode & errorCode) const400 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
401     if(U_FAILURE(errorCode)) { return; }
402     ContractionsAndExpansions(&set, NULL, NULL, false).forCodePoint(data, c, errorCode);
403 }
404 
405 const CollationSettings &
getDefaultSettings() const406 RuleBasedCollator::getDefaultSettings() const {
407     return *tailoring->settings;
408 }
409 
410 UColAttributeValue
getAttribute(UColAttribute attr,UErrorCode & errorCode) const411 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
412     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
413     int32_t option;
414     switch(attr) {
415     case UCOL_FRENCH_COLLATION:
416         option = CollationSettings::BACKWARD_SECONDARY;
417         break;
418     case UCOL_ALTERNATE_HANDLING:
419         return settings->getAlternateHandling();
420     case UCOL_CASE_FIRST:
421         return settings->getCaseFirst();
422     case UCOL_CASE_LEVEL:
423         option = CollationSettings::CASE_LEVEL;
424         break;
425     case UCOL_NORMALIZATION_MODE:
426         option = CollationSettings::CHECK_FCD;
427         break;
428     case UCOL_STRENGTH:
429         return (UColAttributeValue)settings->getStrength();
430     case UCOL_HIRAGANA_QUATERNARY_MODE:
431         // Deprecated attribute, unsettable.
432         return UCOL_OFF;
433     case UCOL_NUMERIC_COLLATION:
434         option = CollationSettings::NUMERIC;
435         break;
436     default:
437         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
438         return UCOL_DEFAULT;
439     }
440     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
441 }
442 
443 void
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & errorCode)444 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
445                                 UErrorCode &errorCode) {
446     UColAttributeValue oldValue = getAttribute(attr, errorCode);
447     if(U_FAILURE(errorCode)) { return; }
448     if(value == oldValue) {
449         setAttributeExplicitly(attr);
450         return;
451     }
452     const CollationSettings &defaultSettings = getDefaultSettings();
453     if(settings == &defaultSettings) {
454         if(value == UCOL_DEFAULT) {
455             setAttributeDefault(attr);
456             return;
457         }
458     }
459     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
460     if(ownedSettings == NULL) {
461         errorCode = U_MEMORY_ALLOCATION_ERROR;
462         return;
463     }
464 
465     switch(attr) {
466     case UCOL_FRENCH_COLLATION:
467         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
468                                defaultSettings.options, errorCode);
469         break;
470     case UCOL_ALTERNATE_HANDLING:
471         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
472         break;
473     case UCOL_CASE_FIRST:
474         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
475         break;
476     case UCOL_CASE_LEVEL:
477         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
478                                defaultSettings.options, errorCode);
479         break;
480     case UCOL_NORMALIZATION_MODE:
481         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
482                                defaultSettings.options, errorCode);
483         break;
484     case UCOL_STRENGTH:
485         ownedSettings->setStrength(value, defaultSettings.options, errorCode);
486         break;
487     case UCOL_HIRAGANA_QUATERNARY_MODE:
488         // Deprecated attribute. Check for valid values but do not change anything.
489         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
490             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
491         }
492         break;
493     case UCOL_NUMERIC_COLLATION:
494         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
495         break;
496     default:
497         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
498         break;
499     }
500     if(U_FAILURE(errorCode)) { return; }
501     setFastLatinOptions(*ownedSettings);
502     if(value == UCOL_DEFAULT) {
503         setAttributeDefault(attr);
504     } else {
505         setAttributeExplicitly(attr);
506     }
507 }
508 
509 Collator &
setMaxVariable(UColReorderCode group,UErrorCode & errorCode)510 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
511     if(U_FAILURE(errorCode)) { return *this; }
512     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
513     int32_t value;
514     if(group == UCOL_REORDER_CODE_DEFAULT) {
515         value = UCOL_DEFAULT;
516     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
517         value = group - UCOL_REORDER_CODE_FIRST;
518     } else {
519         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
520         return *this;
521     }
522     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
523     if(value == oldValue) {
524         setAttributeExplicitly(ATTR_VARIABLE_TOP);
525         return *this;
526     }
527     const CollationSettings &defaultSettings = getDefaultSettings();
528     if(settings == &defaultSettings) {
529         if(value == UCOL_DEFAULT) {
530             setAttributeDefault(ATTR_VARIABLE_TOP);
531             return *this;
532         }
533     }
534     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
535     if(ownedSettings == NULL) {
536         errorCode = U_MEMORY_ALLOCATION_ERROR;
537         return *this;
538     }
539 
540     if(group == UCOL_REORDER_CODE_DEFAULT) {
541         group = (UColReorderCode)(
542             UCOL_REORDER_CODE_FIRST + int32_t{defaultSettings.getMaxVariable()});
543     }
544     uint32_t varTop = data->getLastPrimaryForGroup(group);
545     U_ASSERT(varTop != 0);
546     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
547     if(U_FAILURE(errorCode)) { return *this; }
548     ownedSettings->variableTop = varTop;
549     setFastLatinOptions(*ownedSettings);
550     if(value == UCOL_DEFAULT) {
551         setAttributeDefault(ATTR_VARIABLE_TOP);
552     } else {
553         setAttributeExplicitly(ATTR_VARIABLE_TOP);
554     }
555     return *this;
556 }
557 
558 UColReorderCode
getMaxVariable() const559 RuleBasedCollator::getMaxVariable() const {
560     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + int32_t{settings->getMaxVariable()});
561 }
562 
563 uint32_t
getVariableTop(UErrorCode &) const564 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
565     return settings->variableTop;
566 }
567 
568 uint32_t
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & errorCode)569 RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
570     if(U_FAILURE(errorCode)) { return 0; }
571     if(varTop == NULL && len !=0) {
572         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
573         return 0;
574     }
575     if(len < 0) { len = u_strlen(varTop); }
576     if(len == 0) {
577         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
578         return 0;
579     }
580     UBool numeric = settings->isNumeric();
581     int64_t ce1, ce2;
582     if(settings->dontCheckFCD()) {
583         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
584         ce1 = ci.nextCE(errorCode);
585         ce2 = ci.nextCE(errorCode);
586     } else {
587         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
588         ce1 = ci.nextCE(errorCode);
589         ce2 = ci.nextCE(errorCode);
590     }
591     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
592         errorCode = U_CE_NOT_FOUND_ERROR;
593         return 0;
594     }
595     setVariableTop((uint32_t)(ce1 >> 32), errorCode);
596     return settings->variableTop;
597 }
598 
599 uint32_t
setVariableTop(const UnicodeString & varTop,UErrorCode & errorCode)600 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
601     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
602 }
603 
604 void
setVariableTop(uint32_t varTop,UErrorCode & errorCode)605 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
606     if(U_FAILURE(errorCode)) { return; }
607     if(varTop != settings->variableTop) {
608         // Pin the variable top to the end of the reordering group which contains it.
609         // Only a few special groups are supported.
610         int32_t group = data->getGroupForPrimary(varTop);
611         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
612             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
613             return;
614         }
615         uint32_t v = data->getLastPrimaryForGroup(group);
616         U_ASSERT(v != 0 && v >= varTop);
617         varTop = v;
618         if(varTop != settings->variableTop) {
619             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
620             if(ownedSettings == NULL) {
621                 errorCode = U_MEMORY_ALLOCATION_ERROR;
622                 return;
623             }
624             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
625                                           getDefaultSettings().options, errorCode);
626             if(U_FAILURE(errorCode)) { return; }
627             ownedSettings->variableTop = varTop;
628             setFastLatinOptions(*ownedSettings);
629         }
630     }
631     if(varTop == getDefaultSettings().variableTop) {
632         setAttributeDefault(ATTR_VARIABLE_TOP);
633     } else {
634         setAttributeExplicitly(ATTR_VARIABLE_TOP);
635     }
636 }
637 
638 int32_t
getReorderCodes(int32_t * dest,int32_t capacity,UErrorCode & errorCode) const639 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
640                                    UErrorCode &errorCode) const {
641     if(U_FAILURE(errorCode)) { return 0; }
642     if(capacity < 0 || (dest == NULL && capacity > 0)) {
643         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
644         return 0;
645     }
646     int32_t length = settings->reorderCodesLength;
647     if(length == 0) { return 0; }
648     if(length > capacity) {
649         errorCode = U_BUFFER_OVERFLOW_ERROR;
650         return length;
651     }
652     uprv_memcpy(dest, settings->reorderCodes, length * 4);
653     return length;
654 }
655 
656 void
setReorderCodes(const int32_t * reorderCodes,int32_t length,UErrorCode & errorCode)657 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
658                                    UErrorCode &errorCode) {
659     if(U_FAILURE(errorCode)) { return; }
660     if(length < 0 || (reorderCodes == NULL && length > 0)) {
661         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
662         return;
663     }
664     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
665         length = 0;
666     }
667     if(length == settings->reorderCodesLength &&
668             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
669         return;
670     }
671     const CollationSettings &defaultSettings = getDefaultSettings();
672     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
673         if(settings != &defaultSettings) {
674             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
675             if(ownedSettings == NULL) {
676                 errorCode = U_MEMORY_ALLOCATION_ERROR;
677                 return;
678             }
679             ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
680             setFastLatinOptions(*ownedSettings);
681         }
682         return;
683     }
684     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
685     if(ownedSettings == NULL) {
686         errorCode = U_MEMORY_ALLOCATION_ERROR;
687         return;
688     }
689     ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
690     setFastLatinOptions(*ownedSettings);
691 }
692 
693 void
setFastLatinOptions(CollationSettings & ownedSettings) const694 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
695     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
696             data, ownedSettings,
697             ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
698 }
699 
700 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,UErrorCode & errorCode) const701 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
702                            UErrorCode &errorCode) const {
703     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
704     return doCompare(left.getBuffer(), left.length(),
705                      right.getBuffer(), right.length(), errorCode);
706 }
707 
708 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,int32_t length,UErrorCode & errorCode) const709 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
710                            int32_t length, UErrorCode &errorCode) const {
711     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
712     if(length < 0) {
713         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
714         return UCOL_EQUAL;
715     }
716     int32_t leftLength = left.length();
717     int32_t rightLength = right.length();
718     if(leftLength > length) { leftLength = length; }
719     if(rightLength > length) { rightLength = length; }
720     return doCompare(left.getBuffer(), leftLength,
721                      right.getBuffer(), rightLength, errorCode);
722 }
723 
724 UCollationResult
compare(const UChar * left,int32_t leftLength,const UChar * right,int32_t rightLength,UErrorCode & errorCode) const725 RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
726                            const UChar *right, int32_t rightLength,
727                            UErrorCode &errorCode) const {
728     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
729     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
730         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
731         return UCOL_EQUAL;
732     }
733     // Make sure both or neither strings have a known length.
734     // We do not optimize for mixed length/termination.
735     if(leftLength >= 0) {
736         if(rightLength < 0) { rightLength = u_strlen(right); }
737     } else {
738         if(rightLength >= 0) { leftLength = u_strlen(left); }
739     }
740     return doCompare(left, leftLength, right, rightLength, errorCode);
741 }
742 
743 UCollationResult
compareUTF8(const StringPiece & left,const StringPiece & right,UErrorCode & errorCode) const744 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
745                                UErrorCode &errorCode) const {
746     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
747     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
748     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
749     if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
750         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
751         return UCOL_EQUAL;
752     }
753     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
754 }
755 
756 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const757 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
758                                        const char *right, int32_t rightLength,
759                                        UErrorCode &errorCode) const {
760     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
761     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
762         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
763         return UCOL_EQUAL;
764     }
765     // Make sure both or neither strings have a known length.
766     // We do not optimize for mixed length/termination.
767     if(leftLength >= 0) {
768         if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right)); }
769     } else {
770         if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left)); }
771     }
772     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
773                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
774 }
775 
776 namespace {
777 
778 /**
779  * Abstract iterator for identical-level string comparisons.
780  * Returns FCD code points and handles temporary switching to NFD.
781  */
782 class NFDIterator : public UObject {
783 public:
NFDIterator()784     NFDIterator() : index(-1), length(0) {}
~NFDIterator()785     virtual ~NFDIterator() {}
786     /**
787      * Returns the next code point from the internal normalization buffer,
788      * or else the next text code point.
789      * Returns -1 at the end of the text.
790      */
nextCodePoint()791     UChar32 nextCodePoint() {
792         if(index >= 0) {
793             if(index == length) {
794                 index = -1;
795             } else {
796                 UChar32 c;
797                 U16_NEXT_UNSAFE(decomp, index, c);
798                 return c;
799             }
800         }
801         return nextRawCodePoint();
802     }
803     /**
804      * @param nfcImpl
805      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
806      * @return the first code point in c's decomposition,
807      *         or c itself if it was decomposed already or if it does not decompose
808      */
nextDecomposedCodePoint(const Normalizer2Impl & nfcImpl,UChar32 c)809     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
810         if(index >= 0) { return c; }
811         decomp = nfcImpl.getDecomposition(c, buffer, length);
812         if(decomp == NULL) { return c; }
813         index = 0;
814         U16_NEXT_UNSAFE(decomp, index, c);
815         return c;
816     }
817 protected:
818     /**
819      * Returns the next text code point in FCD order.
820      * Returns -1 at the end of the text.
821      */
822     virtual UChar32 nextRawCodePoint() = 0;
823 private:
824     const UChar *decomp;
825     UChar buffer[4];
826     int32_t index;
827     int32_t length;
828 };
829 
830 class UTF16NFDIterator : public NFDIterator {
831 public:
UTF16NFDIterator(const UChar * text,const UChar * textLimit)832     UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
833 protected:
nextRawCodePoint()834     virtual UChar32 nextRawCodePoint() override {
835         if(s == limit) { return U_SENTINEL; }
836         UChar32 c = *s++;
837         if(limit == NULL && c == 0) {
838             s = NULL;
839             return U_SENTINEL;
840         }
841         UChar trail;
842         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
843             ++s;
844             c = U16_GET_SUPPLEMENTARY(c, trail);
845         }
846         return c;
847     }
848 
849     const UChar *s;
850     const UChar *limit;
851 };
852 
853 class FCDUTF16NFDIterator : public UTF16NFDIterator {
854 public:
FCDUTF16NFDIterator(const Normalizer2Impl & nfcImpl,const UChar * text,const UChar * textLimit)855     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
856             : UTF16NFDIterator(NULL, NULL) {
857         UErrorCode errorCode = U_ZERO_ERROR;
858         const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
859         if(U_FAILURE(errorCode)) { return; }
860         if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
861             s = text;
862             limit = spanLimit;
863         } else {
864             str.setTo(text, (int32_t)(spanLimit - text));
865             {
866                 ReorderingBuffer r_buffer(nfcImpl, str);
867                 if(r_buffer.init(str.length(), errorCode)) {
868                     nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);
869                 }
870             }
871             if(U_SUCCESS(errorCode)) {
872                 s = str.getBuffer();
873                 limit = s + str.length();
874             }
875         }
876     }
877 private:
878     UnicodeString str;
879 };
880 
881 class UTF8NFDIterator : public NFDIterator {
882 public:
UTF8NFDIterator(const uint8_t * text,int32_t textLength)883     UTF8NFDIterator(const uint8_t *text, int32_t textLength)
884         : s(text), pos(0), length(textLength) {}
885 protected:
nextRawCodePoint()886     virtual UChar32 nextRawCodePoint() override {
887         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
888         UChar32 c;
889         U8_NEXT_OR_FFFD(s, pos, length, c);
890         return c;
891     }
892 
893     const uint8_t *s;
894     int32_t pos;
895     int32_t length;
896 };
897 
898 class FCDUTF8NFDIterator : public NFDIterator {
899 public:
FCDUTF8NFDIterator(const CollationData * data,const uint8_t * text,int32_t textLength)900     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
901             : u8ci(data, false, text, 0, textLength) {}
902 protected:
nextRawCodePoint()903     virtual UChar32 nextRawCodePoint() override {
904         UErrorCode errorCode = U_ZERO_ERROR;
905         return u8ci.nextCodePoint(errorCode);
906     }
907 private:
908     FCDUTF8CollationIterator u8ci;
909 };
910 
911 class UIterNFDIterator : public NFDIterator {
912 public:
UIterNFDIterator(UCharIterator & it)913     UIterNFDIterator(UCharIterator &it) : iter(it) {}
914 protected:
nextRawCodePoint()915     virtual UChar32 nextRawCodePoint() override {
916         return uiter_next32(&iter);
917     }
918 private:
919     UCharIterator &iter;
920 };
921 
922 class FCDUIterNFDIterator : public NFDIterator {
923 public:
FCDUIterNFDIterator(const CollationData * data,UCharIterator & it,int32_t startIndex)924     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
925             : uici(data, false, it, startIndex) {}
926 protected:
nextRawCodePoint()927     virtual UChar32 nextRawCodePoint() override {
928         UErrorCode errorCode = U_ZERO_ERROR;
929         return uici.nextCodePoint(errorCode);
930     }
931 private:
932     FCDUIterCollationIterator uici;
933 };
934 
compareNFDIter(const Normalizer2Impl & nfcImpl,NFDIterator & left,NFDIterator & right)935 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
936                                 NFDIterator &left, NFDIterator &right) {
937     for(;;) {
938         // Fetch the next FCD code point from each string.
939         UChar32 leftCp = left.nextCodePoint();
940         UChar32 rightCp = right.nextCodePoint();
941         if(leftCp == rightCp) {
942             if(leftCp < 0) { break; }
943             continue;
944         }
945         // If they are different, then decompose each and compare again.
946         if(leftCp < 0) {
947             leftCp = -2;  // end of string
948         } else if(leftCp == 0xfffe) {
949             leftCp = -1;  // U+FFFE: merge separator
950         } else {
951             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
952         }
953         if(rightCp < 0) {
954             rightCp = -2;  // end of string
955         } else if(rightCp == 0xfffe) {
956             rightCp = -1;  // U+FFFE: merge separator
957         } else {
958             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
959         }
960         if(leftCp < rightCp) { return UCOL_LESS; }
961         if(leftCp > rightCp) { return UCOL_GREATER; }
962     }
963     return UCOL_EQUAL;
964 }
965 
966 }  // namespace
967 
968 UCollationResult
doCompare(const UChar * left,int32_t leftLength,const UChar * right,int32_t rightLength,UErrorCode & errorCode) const969 RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
970                              const UChar *right, int32_t rightLength,
971                              UErrorCode &errorCode) const {
972     // U_FAILURE(errorCode) checked by caller.
973     if(left == right && leftLength == rightLength) {
974         return UCOL_EQUAL;
975     }
976 
977     // Identical-prefix test.
978     const UChar *leftLimit;
979     const UChar *rightLimit;
980     int32_t equalPrefixLength = 0;
981     if(leftLength < 0) {
982         leftLimit = NULL;
983         rightLimit = NULL;
984         UChar c;
985         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
986             if(c == 0) { return UCOL_EQUAL; }
987             ++equalPrefixLength;
988         }
989     } else {
990         leftLimit = left + leftLength;
991         rightLimit = right + rightLength;
992         for(;;) {
993             if(equalPrefixLength == leftLength) {
994                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
995                 break;
996             } else if(equalPrefixLength == rightLength ||
997                       left[equalPrefixLength] != right[equalPrefixLength]) {
998                 break;
999             }
1000             ++equalPrefixLength;
1001         }
1002     }
1003 
1004     UBool numeric = settings->isNumeric();
1005     if(equalPrefixLength > 0) {
1006         if((equalPrefixLength != leftLength &&
1007                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1008                 (equalPrefixLength != rightLength &&
1009                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1010             // Identical prefix: Back up to the start of a contraction or reordering sequence.
1011             while(--equalPrefixLength > 0 &&
1012                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1013         }
1014         // Notes:
1015         // - A longer string can compare equal to a prefix of it if only ignorables follow.
1016         // - With a backward level, a longer string can compare less-than a prefix of it.
1017 
1018         // Pass the actual start of each string into the CollationIterators,
1019         // plus the equalPrefixLength position,
1020         // so that prefix matches back into the equal prefix work.
1021     }
1022 
1023     int32_t result;
1024     int32_t fastLatinOptions = settings->fastLatinOptions;
1025     if(fastLatinOptions >= 0 &&
1026             (equalPrefixLength == leftLength ||
1027                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1028             (equalPrefixLength == rightLength ||
1029                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1030         if(leftLength >= 0) {
1031             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1032                                                       settings->fastLatinPrimaries,
1033                                                       fastLatinOptions,
1034                                                       left + equalPrefixLength,
1035                                                       leftLength - equalPrefixLength,
1036                                                       right + equalPrefixLength,
1037                                                       rightLength - equalPrefixLength);
1038         } else {
1039             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1040                                                       settings->fastLatinPrimaries,
1041                                                       fastLatinOptions,
1042                                                       left + equalPrefixLength, -1,
1043                                                       right + equalPrefixLength, -1);
1044         }
1045     } else {
1046         result = CollationFastLatin::BAIL_OUT_RESULT;
1047     }
1048 
1049     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1050         if(settings->dontCheckFCD()) {
1051             UTF16CollationIterator leftIter(data, numeric,
1052                                             left, left + equalPrefixLength, leftLimit);
1053             UTF16CollationIterator rightIter(data, numeric,
1054                                             right, right + equalPrefixLength, rightLimit);
1055             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1056         } else {
1057             FCDUTF16CollationIterator leftIter(data, numeric,
1058                                               left, left + equalPrefixLength, leftLimit);
1059             FCDUTF16CollationIterator rightIter(data, numeric,
1060                                                 right, right + equalPrefixLength, rightLimit);
1061             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1062         }
1063     }
1064     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1065         return (UCollationResult)result;
1066     }
1067 
1068     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1069     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1070     // and the benefit seems unlikely to be measurable.
1071 
1072     // Compare identical level.
1073     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1074     left += equalPrefixLength;
1075     right += equalPrefixLength;
1076     if(settings->dontCheckFCD()) {
1077         UTF16NFDIterator leftIter(left, leftLimit);
1078         UTF16NFDIterator rightIter(right, rightLimit);
1079         return compareNFDIter(nfcImpl, leftIter, rightIter);
1080     } else {
1081         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1082         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1083         return compareNFDIter(nfcImpl, leftIter, rightIter);
1084     }
1085 }
1086 
1087 UCollationResult
doCompare(const uint8_t * left,int32_t leftLength,const uint8_t * right,int32_t rightLength,UErrorCode & errorCode) const1088 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1089                              const uint8_t *right, int32_t rightLength,
1090                              UErrorCode &errorCode) const {
1091     // U_FAILURE(errorCode) checked by caller.
1092     if(left == right && leftLength == rightLength) {
1093         return UCOL_EQUAL;
1094     }
1095 
1096     // Identical-prefix test.
1097     int32_t equalPrefixLength = 0;
1098     if(leftLength < 0) {
1099         uint8_t c;
1100         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1101             if(c == 0) { return UCOL_EQUAL; }
1102             ++equalPrefixLength;
1103         }
1104     } else {
1105         for(;;) {
1106             if(equalPrefixLength == leftLength) {
1107                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1108                 break;
1109             } else if(equalPrefixLength == rightLength ||
1110                       left[equalPrefixLength] != right[equalPrefixLength]) {
1111                 break;
1112             }
1113             ++equalPrefixLength;
1114         }
1115     }
1116     // Back up to the start of a partially-equal code point.
1117     if(equalPrefixLength > 0 &&
1118             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1119             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1120         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1121     }
1122 
1123     UBool numeric = settings->isNumeric();
1124     if(equalPrefixLength > 0) {
1125         UBool unsafe = false;
1126         if(equalPrefixLength != leftLength) {
1127             int32_t i = equalPrefixLength;
1128             UChar32 c;
1129             U8_NEXT_OR_FFFD(left, i, leftLength, c);
1130             unsafe = data->isUnsafeBackward(c, numeric);
1131         }
1132         if(!unsafe && equalPrefixLength != rightLength) {
1133             int32_t i = equalPrefixLength;
1134             UChar32 c;
1135             U8_NEXT_OR_FFFD(right, i, rightLength, c);
1136             unsafe = data->isUnsafeBackward(c, numeric);
1137         }
1138         if(unsafe) {
1139             // Identical prefix: Back up to the start of a contraction or reordering sequence.
1140             UChar32 c;
1141             do {
1142                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1143             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1144         }
1145         // See the notes in the UTF-16 version.
1146 
1147         // Pass the actual start of each string into the CollationIterators,
1148         // plus the equalPrefixLength position,
1149         // so that prefix matches back into the equal prefix work.
1150     }
1151 
1152     int32_t result;
1153     int32_t fastLatinOptions = settings->fastLatinOptions;
1154     if(fastLatinOptions >= 0 &&
1155             (equalPrefixLength == leftLength ||
1156                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1157             (equalPrefixLength == rightLength ||
1158                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1159         if(leftLength >= 0) {
1160             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1161                                                      settings->fastLatinPrimaries,
1162                                                      fastLatinOptions,
1163                                                      left + equalPrefixLength,
1164                                                      leftLength - equalPrefixLength,
1165                                                      right + equalPrefixLength,
1166                                                      rightLength - equalPrefixLength);
1167         } else {
1168             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1169                                                      settings->fastLatinPrimaries,
1170                                                      fastLatinOptions,
1171                                                      left + equalPrefixLength, -1,
1172                                                      right + equalPrefixLength, -1);
1173         }
1174     } else {
1175         result = CollationFastLatin::BAIL_OUT_RESULT;
1176     }
1177 
1178     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1179         if(settings->dontCheckFCD()) {
1180             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1181             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1182             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1183         } else {
1184             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1185             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1186             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1187         }
1188     }
1189     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1190         return (UCollationResult)result;
1191     }
1192 
1193     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1194     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1195     // and the benefit seems unlikely to be measurable.
1196 
1197     // Compare identical level.
1198     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1199     left += equalPrefixLength;
1200     right += equalPrefixLength;
1201     if(leftLength > 0) {
1202         leftLength -= equalPrefixLength;
1203         rightLength -= equalPrefixLength;
1204     }
1205     if(settings->dontCheckFCD()) {
1206         UTF8NFDIterator leftIter(left, leftLength);
1207         UTF8NFDIterator rightIter(right, rightLength);
1208         return compareNFDIter(nfcImpl, leftIter, rightIter);
1209     } else {
1210         FCDUTF8NFDIterator leftIter(data, left, leftLength);
1211         FCDUTF8NFDIterator rightIter(data, right, rightLength);
1212         return compareNFDIter(nfcImpl, leftIter, rightIter);
1213     }
1214 }
1215 
1216 UCollationResult
compare(UCharIterator & left,UCharIterator & right,UErrorCode & errorCode) const1217 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1218                            UErrorCode &errorCode) const {
1219     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1220     UBool numeric = settings->isNumeric();
1221 
1222     // Identical-prefix test.
1223     int32_t equalPrefixLength = 0;
1224     {
1225         UChar32 leftUnit;
1226         UChar32 rightUnit;
1227         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1228             if(leftUnit < 0) { return UCOL_EQUAL; }
1229             ++equalPrefixLength;
1230         }
1231 
1232         // Back out the code units that differed, for the real collation comparison.
1233         if(leftUnit >= 0) { left.previous(&left); }
1234         if(rightUnit >= 0) { right.previous(&right); }
1235 
1236         if(equalPrefixLength > 0) {
1237             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1238                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1239                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1240                 do {
1241                     --equalPrefixLength;
1242                     leftUnit = left.previous(&left);
1243                     right.previous(&right);
1244                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1245             }
1246             // See the notes in the UTF-16 version.
1247         }
1248     }
1249 
1250     UCollationResult result;
1251     if(settings->dontCheckFCD()) {
1252         UIterCollationIterator leftIter(data, numeric, left);
1253         UIterCollationIterator rightIter(data, numeric, right);
1254         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1255     } else {
1256         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1257         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1258         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1259     }
1260     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1261         return result;
1262     }
1263 
1264     // Compare identical level.
1265     left.move(&left, equalPrefixLength, UITER_ZERO);
1266     right.move(&right, equalPrefixLength, UITER_ZERO);
1267     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1268     if(settings->dontCheckFCD()) {
1269         UIterNFDIterator leftIter(left);
1270         UIterNFDIterator rightIter(right);
1271         return compareNFDIter(nfcImpl, leftIter, rightIter);
1272     } else {
1273         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1274         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1275         return compareNFDIter(nfcImpl, leftIter, rightIter);
1276     }
1277 }
1278 
1279 CollationKey &
getCollationKey(const UnicodeString & s,CollationKey & key,UErrorCode & errorCode) const1280 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1281                                    UErrorCode &errorCode) const {
1282     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1283 }
1284 
1285 CollationKey &
getCollationKey(const UChar * s,int32_t length,CollationKey & key,UErrorCode & errorCode) const1286 RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
1287                                    UErrorCode &errorCode) const {
1288     if(U_FAILURE(errorCode)) {
1289         return key.setToBogus();
1290     }
1291     if(s == NULL && length != 0) {
1292         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1293         return key.setToBogus();
1294     }
1295     key.reset();  // resets the "bogus" state
1296     CollationKeyByteSink sink(key);
1297     writeSortKey(s, length, sink, errorCode);
1298     if(U_FAILURE(errorCode)) {
1299         key.setToBogus();
1300     } else if(key.isBogus()) {
1301         errorCode = U_MEMORY_ALLOCATION_ERROR;
1302     } else {
1303         key.setLength(sink.NumberOfBytesAppended());
1304     }
1305     return key;
1306 }
1307 
1308 int32_t
getSortKey(const UnicodeString & s,uint8_t * dest,int32_t capacity) const1309 RuleBasedCollator::getSortKey(const UnicodeString &s,
1310                               uint8_t *dest, int32_t capacity) const {
1311     return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1312 }
1313 
1314 int32_t
getSortKey(const UChar * s,int32_t length,uint8_t * dest,int32_t capacity) const1315 RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
1316                               uint8_t *dest, int32_t capacity) const {
1317     if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
1318         return 0;
1319     }
1320     uint8_t noDest[1] = { 0 };
1321     if(dest == NULL) {
1322         // Distinguish pure preflighting from an allocation error.
1323         dest = noDest;
1324         capacity = 0;
1325     }
1326     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1327     UErrorCode errorCode = U_ZERO_ERROR;
1328     writeSortKey(s, length, sink, errorCode);
1329     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1330 }
1331 
1332 void
writeSortKey(const UChar * s,int32_t length,SortKeyByteSink & sink,UErrorCode & errorCode) const1333 RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
1334                                 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1335     if(U_FAILURE(errorCode)) { return; }
1336     const UChar *limit = (length >= 0) ? s + length : NULL;
1337     UBool numeric = settings->isNumeric();
1338     CollationKeys::LevelCallback callback;
1339     if(settings->dontCheckFCD()) {
1340         UTF16CollationIterator iter(data, numeric, s, s, limit);
1341         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1342                                                   sink, Collation::PRIMARY_LEVEL,
1343                                                   callback, true, errorCode);
1344     } else {
1345         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1346         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1347                                                   sink, Collation::PRIMARY_LEVEL,
1348                                                   callback, true, errorCode);
1349     }
1350     if(settings->getStrength() == UCOL_IDENTICAL) {
1351         writeIdenticalLevel(s, limit, sink, errorCode);
1352     }
1353     static const char terminator = 0;  // TERMINATOR_BYTE
1354     sink.Append(&terminator, 1);
1355 }
1356 
1357 void
writeIdenticalLevel(const UChar * s,const UChar * limit,SortKeyByteSink & sink,UErrorCode & errorCode) const1358 RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
1359                                        SortKeyByteSink &sink, UErrorCode &errorCode) const {
1360     // NFD quick check
1361     const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
1362     if(U_FAILURE(errorCode)) { return; }
1363     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1364     UChar32 prev = 0;
1365     if(nfdQCYesLimit != s) {
1366         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
1367     }
1368     // Is there non-NFD text?
1369     int32_t destLengthEstimate;
1370     if(limit != NULL) {
1371         if(nfdQCYesLimit == limit) { return; }
1372         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
1373     } else {
1374         // s is NUL-terminated
1375         if(*nfdQCYesLimit == 0) { return; }
1376         destLengthEstimate = -1;
1377     }
1378     UnicodeString nfd;
1379     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1380     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1381 }
1382 
1383 namespace {
1384 
1385 /**
1386  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1387  * with an instance of this callback class.
1388  * When another level is about to be written, the callback
1389  * records the level and the number of bytes that will be written until
1390  * the sink (which is actually a FixedSortKeyByteSink) fills up.
1391  *
1392  * When internalNextSortKeyPart() is called again, it restarts with the last level
1393  * and ignores as many bytes as were written previously for that level.
1394  */
1395 class PartLevelCallback : public CollationKeys::LevelCallback {
1396 public:
PartLevelCallback(const SortKeyByteSink & s)1397     PartLevelCallback(const SortKeyByteSink &s)
1398             : sink(s), level(Collation::PRIMARY_LEVEL) {
1399         levelCapacity = sink.GetRemainingCapacity();
1400     }
~PartLevelCallback()1401     virtual ~PartLevelCallback() {}
needToWrite(Collation::Level l)1402     virtual UBool needToWrite(Collation::Level l) override {
1403         if(!sink.Overflowed()) {
1404             // Remember a level that will be at least partially written.
1405             level = l;
1406             levelCapacity = sink.GetRemainingCapacity();
1407             return true;
1408         } else {
1409             return false;
1410         }
1411     }
getLevel() const1412     Collation::Level getLevel() const { return level; }
getLevelCapacity() const1413     int32_t getLevelCapacity() const { return levelCapacity; }
1414 
1415 private:
1416     const SortKeyByteSink &sink;
1417     Collation::Level level;
1418     int32_t levelCapacity;
1419 };
1420 
1421 }  // namespace
1422 
1423 int32_t
internalNextSortKeyPart(UCharIterator * iter,uint32_t state[2],uint8_t * dest,int32_t count,UErrorCode & errorCode) const1424 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1425                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1426     if(U_FAILURE(errorCode)) { return 0; }
1427     if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
1428         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1429         return 0;
1430     }
1431     if(count == 0) { return 0; }
1432 
1433     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1434     sink.IgnoreBytes((int32_t)state[1]);
1435     iter->move(iter, 0, UITER_START);
1436 
1437     Collation::Level level = (Collation::Level)state[0];
1438     if(level <= Collation::QUATERNARY_LEVEL) {
1439         UBool numeric = settings->isNumeric();
1440         PartLevelCallback callback(sink);
1441         if(settings->dontCheckFCD()) {
1442             UIterCollationIterator ci(data, numeric, *iter);
1443             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1444                                                       sink, level, callback, false, errorCode);
1445         } else {
1446             FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1447             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1448                                                       sink, level, callback, false, errorCode);
1449         }
1450         if(U_FAILURE(errorCode)) { return 0; }
1451         if(sink.NumberOfBytesAppended() > count) {
1452             state[0] = (uint32_t)callback.getLevel();
1453             state[1] = (uint32_t)callback.getLevelCapacity();
1454             return count;
1455         }
1456         // All of the normal levels are done.
1457         if(settings->getStrength() == UCOL_IDENTICAL) {
1458             level = Collation::IDENTICAL_LEVEL;
1459             iter->move(iter, 0, UITER_START);
1460         }
1461         // else fall through to setting ZERO_LEVEL
1462     }
1463 
1464     if(level == Collation::IDENTICAL_LEVEL) {
1465         int32_t levelCapacity = sink.GetRemainingCapacity();
1466         UnicodeString s;
1467         for(;;) {
1468             UChar32 c = iter->next(iter);
1469             if(c < 0) { break; }
1470             s.append((UChar)c);
1471         }
1472         const UChar *sArray = s.getBuffer();
1473         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1474         if(U_FAILURE(errorCode)) { return 0; }
1475         if(sink.NumberOfBytesAppended() > count) {
1476             state[0] = (uint32_t)level;
1477             state[1] = (uint32_t)levelCapacity;
1478             return count;
1479         }
1480     }
1481 
1482     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1483     state[0] = (uint32_t)Collation::ZERO_LEVEL;
1484     state[1] = 0;
1485     int32_t length = sink.NumberOfBytesAppended();
1486     int32_t i = length;
1487     while(i < count) { dest[i++] = 0; }
1488     return length;
1489 }
1490 
1491 void
internalGetCEs(const UnicodeString & str,UVector64 & ces,UErrorCode & errorCode) const1492 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1493                                   UErrorCode &errorCode) const {
1494     if(U_FAILURE(errorCode)) { return; }
1495     const UChar *s = str.getBuffer();
1496     const UChar *limit = s + str.length();
1497     UBool numeric = settings->isNumeric();
1498     if(settings->dontCheckFCD()) {
1499         UTF16CollationIterator iter(data, numeric, s, s, limit);
1500         int64_t ce;
1501         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1502             ces.addElement(ce, errorCode);
1503         }
1504     } else {
1505         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1506         int64_t ce;
1507         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1508             ces.addElement(ce, errorCode);
1509         }
1510     }
1511 }
1512 
1513 namespace {
1514 
appendSubtag(CharString & s,char letter,const char * subtag,int32_t length,UErrorCode & errorCode)1515 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1516                   UErrorCode &errorCode) {
1517     if(U_FAILURE(errorCode) || length == 0) { return; }
1518     if(!s.isEmpty()) {
1519         s.append('_', errorCode);
1520     }
1521     s.append(letter, errorCode);
1522     for(int32_t i = 0; i < length; ++i) {
1523         s.append(uprv_toupper(subtag[i]), errorCode);
1524     }
1525 }
1526 
appendAttribute(CharString & s,char letter,UColAttributeValue value,UErrorCode & errorCode)1527 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1528                      UErrorCode &errorCode) {
1529     if(U_FAILURE(errorCode)) { return; }
1530     if(!s.isEmpty()) {
1531         s.append('_', errorCode);
1532     }
1533     static const char *valueChars = "1234...........IXO..SN..LU......";
1534     s.append(letter, errorCode);
1535     s.append(valueChars[value], errorCode);
1536 }
1537 
1538 }  // namespace
1539 
1540 int32_t
internalGetShortDefinitionString(const char * locale,char * buffer,int32_t capacity,UErrorCode & errorCode) const1541 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1542                                                     char *buffer, int32_t capacity,
1543                                                     UErrorCode &errorCode) const {
1544     if(U_FAILURE(errorCode)) { return 0; }
1545     if(buffer == NULL ? capacity != 0 : capacity < 0) {
1546         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1547         return 0;
1548     }
1549     if(locale == NULL) {
1550         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1551     }
1552 
1553     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1554     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1555                                                   "collation", locale,
1556                                                   NULL, &errorCode);
1557     if(U_FAILURE(errorCode)) { return 0; }
1558     resultLocale[length] = 0;
1559 
1560     // Append items in alphabetic order of their short definition letters.
1561     CharString result;
1562     char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1563 
1564     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1565         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1566     }
1567     // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1568     // See ICU tickets #10372 and #10386.
1569     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1570         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1571     }
1572     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1573         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1574     }
1575     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1576         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1577     }
1578     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1579         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1580     }
1581     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1582     length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
1583     appendSubtag(result, 'K', subtag, length, errorCode);
1584     length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1585     if (length == 0) {
1586         appendSubtag(result, 'L', "root", 4, errorCode);
1587     } else {
1588         appendSubtag(result, 'L', subtag, length, errorCode);
1589     }
1590     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1591         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1592     }
1593     length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1594     appendSubtag(result, 'R', subtag, length, errorCode);
1595     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1596         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1597     }
1598     length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1599     appendSubtag(result, 'V', subtag, length, errorCode);
1600     length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1601     appendSubtag(result, 'Z', subtag, length, errorCode);
1602 
1603     if(U_FAILURE(errorCode)) { return 0; }
1604     return result.extract(buffer, capacity, errorCode);
1605 }
1606 
1607 UBool
isUnsafe(UChar32 c) const1608 RuleBasedCollator::isUnsafe(UChar32 c) const {
1609     return data->isUnsafeBackward(c, settings->isNumeric());
1610 }
1611 
1612 void U_CALLCONV
computeMaxExpansions(const CollationTailoring * t,UErrorCode & errorCode)1613 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1614     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1615 }
1616 
1617 UBool
initMaxExpansions(UErrorCode & errorCode) const1618 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1619     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1620     return U_SUCCESS(errorCode);
1621 }
1622 
1623 CollationElementIterator *
createCollationElementIterator(const UnicodeString & source) const1624 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1625     UErrorCode errorCode = U_ZERO_ERROR;
1626     if(!initMaxExpansions(errorCode)) { return NULL; }
1627     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1628     if(U_FAILURE(errorCode)) {
1629         delete cei;
1630         return NULL;
1631     }
1632     return cei;
1633 }
1634 
1635 CollationElementIterator *
createCollationElementIterator(const CharacterIterator & source) const1636 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1637     UErrorCode errorCode = U_ZERO_ERROR;
1638     if(!initMaxExpansions(errorCode)) { return NULL; }
1639     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1640     if(U_FAILURE(errorCode)) {
1641         delete cei;
1642         return NULL;
1643     }
1644     return cei;
1645 }
1646 
1647 int32_t
getMaxExpansion(int32_t order) const1648 RuleBasedCollator::getMaxExpansion(int32_t order) const {
1649     UErrorCode errorCode = U_ZERO_ERROR;
1650     (void)initMaxExpansions(errorCode);
1651     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1652 }
1653 
1654 U_NAMESPACE_END
1655 
1656 #endif  // !UCONFIG_NO_COLLATION
1657