• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * rulebasedcollator.cpp
9 *
10 * (replaced the former tblcoll.cpp)
11 *
12 * created on: 2012feb14 with new and old collation code
13 * created by: Markus W. Scherer
14 */
15 
16 #include "unicode/utypes.h"
17 
18 #if !UCONFIG_NO_COLLATION
19 
20 #include "unicode/coll.h"
21 #include "unicode/coleitr.h"
22 #include "unicode/localpointer.h"
23 #include "unicode/locid.h"
24 #include "unicode/sortkey.h"
25 #include "unicode/tblcoll.h"
26 #include "unicode/ucol.h"
27 #include "unicode/uiter.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uniset.h"
30 #include "unicode/unistr.h"
31 #include "unicode/usetiter.h"
32 #include "unicode/utf8.h"
33 #include "unicode/uversion.h"
34 #include "bocsu.h"
35 #include "charstr.h"
36 #include "cmemory.h"
37 #include "collation.h"
38 #include "collationcompare.h"
39 #include "collationdata.h"
40 #include "collationdatareader.h"
41 #include "collationfastlatin.h"
42 #include "collationiterator.h"
43 #include "collationkeys.h"
44 #include "collationroot.h"
45 #include "collationsets.h"
46 #include "collationsettings.h"
47 #include "collationtailoring.h"
48 #include "cstring.h"
49 #include "uassert.h"
50 #include "ucol_imp.h"
51 #include "uhash.h"
52 #include "uitercollationiterator.h"
53 #include "ulocimp.h"
54 #include "ustr_imp.h"
55 #include "utf16collationiterator.h"
56 #include "utf8collationiterator.h"
57 #include "uvectr64.h"
58 
59 U_NAMESPACE_BEGIN
60 
61 namespace {
62 
63 class FixedSortKeyByteSink : public SortKeyByteSink {
64 public:
FixedSortKeyByteSink(char * dest,int32_t destCapacity)65     FixedSortKeyByteSink(char *dest, int32_t destCapacity)
66             : SortKeyByteSink(dest, destCapacity) {}
67     virtual ~FixedSortKeyByteSink();
68 
69 private:
70     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
71     virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
72 };
73 
~FixedSortKeyByteSink()74 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
75 
76 void
AppendBeyondCapacity(const char * bytes,int32_t,int32_t length)77 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
78     // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
79     // Fill the buffer completely.
80     int32_t available = capacity_ - length;
81     if (available > 0) {
82         uprv_memcpy(buffer_ + length, bytes, available);
83     }
84 }
85 
86 UBool
Resize(int32_t,int32_t)87 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
88     return false;
89 }
90 
91 }  // namespace
92 
93 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
94 class CollationKeyByteSink : public SortKeyByteSink {
95 public:
CollationKeyByteSink(CollationKey & key)96     CollationKeyByteSink(CollationKey &key)
97             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
98               key_(key) {}
99     virtual ~CollationKeyByteSink();
100 
101 private:
102     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
103     virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
104 
105     CollationKey &key_;
106 };
107 
~CollationKeyByteSink()108 CollationKeyByteSink::~CollationKeyByteSink() {}
109 
110 void
AppendBeyondCapacity(const char * bytes,int32_t n,int32_t length)111 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
112     // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
113     if (Resize(n, length)) {
114         uprv_memcpy(buffer_ + length, bytes, n);
115     }
116 }
117 
118 UBool
Resize(int32_t appendCapacity,int32_t length)119 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
120     if (buffer_ == nullptr) {
121         return false;  // allocation failed before already
122     }
123     int32_t newCapacity = 2 * capacity_;
124     int32_t altCapacity = length + 2 * appendCapacity;
125     if (newCapacity < altCapacity) {
126         newCapacity = altCapacity;
127     }
128     if (newCapacity < 200) {
129         newCapacity = 200;
130     }
131     uint8_t *newBuffer = key_.reallocate(newCapacity, length);
132     if (newBuffer == nullptr) {
133         SetNotOk();
134         return false;
135     }
136     buffer_ = reinterpret_cast<char *>(newBuffer);
137     capacity_ = newCapacity;
138     return true;
139 }
140 
RuleBasedCollator(const RuleBasedCollator & other)141 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
142         : Collator(other),
143           data(other.data),
144           settings(other.settings),
145           tailoring(other.tailoring),
146           cacheEntry(other.cacheEntry),
147           validLocale(other.validLocale),
148           explicitlySetAttributes(other.explicitlySetAttributes),
149           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
150     settings->addRef();
151     cacheEntry->addRef();
152 }
153 
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & errorCode)154 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
155                                      const RuleBasedCollator *base, UErrorCode &errorCode)
156         : data(nullptr),
157           settings(nullptr),
158           tailoring(nullptr),
159           cacheEntry(nullptr),
160           validLocale(""),
161           explicitlySetAttributes(0),
162           actualLocaleIsSameAsValid(false) {
163     if(U_FAILURE(errorCode)) { return; }
164     if(bin == nullptr || length == 0 || base == nullptr) {
165         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
166         return;
167     }
168     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
169     if(U_FAILURE(errorCode)) { return; }
170     if(base->tailoring != root) {
171         errorCode = U_UNSUPPORTED_ERROR;
172         return;
173     }
174     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
175     if(t.isNull() || t->isBogus()) {
176         errorCode = U_MEMORY_ALLOCATION_ERROR;
177         return;
178     }
179     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
180     if(U_FAILURE(errorCode)) { return; }
181     t->actualLocale.setToBogus();
182     adoptTailoring(t.orphan(), errorCode);
183 }
184 
RuleBasedCollator(const CollationCacheEntry * entry)185 RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
186         : data(entry->tailoring->data),
187           settings(entry->tailoring->settings),
188           tailoring(entry->tailoring),
189           cacheEntry(entry),
190           validLocale(entry->validLocale),
191           explicitlySetAttributes(0),
192           actualLocaleIsSameAsValid(false) {
193     settings->addRef();
194     cacheEntry->addRef();
195 }
196 
~RuleBasedCollator()197 RuleBasedCollator::~RuleBasedCollator() {
198     SharedObject::clearPtr(settings);
199     SharedObject::clearPtr(cacheEntry);
200 }
201 
202 void
adoptTailoring(CollationTailoring * t,UErrorCode & errorCode)203 RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
204     if(U_FAILURE(errorCode)) {
205         t->deleteIfZeroRefCount();
206         return;
207     }
208     U_ASSERT(settings == nullptr && data == nullptr && tailoring == nullptr && cacheEntry == nullptr);
209     cacheEntry = new CollationCacheEntry(t->actualLocale, t);
210     if(cacheEntry == nullptr) {
211         errorCode = U_MEMORY_ALLOCATION_ERROR;
212         t->deleteIfZeroRefCount();
213         return;
214     }
215     data = t->data;
216     settings = t->settings;
217     settings->addRef();
218     tailoring = t;
219     cacheEntry->addRef();
220     validLocale = t->actualLocale;
221     actualLocaleIsSameAsValid = false;
222 }
223 
224 RuleBasedCollator *
clone() const225 RuleBasedCollator::clone() const {
226     return new RuleBasedCollator(*this);
227 }
228 
operator =(const RuleBasedCollator & other)229 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
230     if(this == &other) { return *this; }
231     SharedObject::copyPtr(other.settings, settings);
232     tailoring = other.tailoring;
233     SharedObject::copyPtr(other.cacheEntry, cacheEntry);
234     data = tailoring->data;
235     validLocale = other.validLocale;
236     explicitlySetAttributes = other.explicitlySetAttributes;
237     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
238     return *this;
239 }
240 
241 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
242 
243 bool
244 RuleBasedCollator::operator==(const Collator& other) const {
245     if(this == &other) { return true; }
246     if(!Collator::operator==(other)) { return false; }
247     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
248     if(*settings != *o.settings) { return false; }
249     if(data == o.data) { return true; }
250     UBool thisIsRoot = data->base == nullptr;
251     UBool otherIsRoot = o.data->base == nullptr;
252     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
253     if(thisIsRoot != otherIsRoot) { return false; }
254     if((thisIsRoot || !tailoring->rules.isEmpty()) &&
255             (otherIsRoot || !o.tailoring->rules.isEmpty())) {
256         // Shortcut: If both collators have valid rule strings, then compare those.
257         if(tailoring->rules == o.tailoring->rules) { return true; }
258     }
259     // Different rule strings can result in the same or equivalent tailoring.
260     // The rule strings are optional in ICU resource bundles, although included by default.
261     // cloneBinary() drops the rule string.
262     UErrorCode errorCode = U_ZERO_ERROR;
263     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
264     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
265     if(U_FAILURE(errorCode)) { return false; }
266     if(*thisTailored != *otherTailored) { return false; }
267     // For completeness, we should compare all of the mappings;
268     // or we should create a list of strings, sort it with one collator,
269     // and check if both collators compare adjacent strings the same
270     // (order & strength, down to quaternary); or similar.
271     // Testing equality of collators seems unusual.
272     return true;
273 }
274 
275 int32_t
hashCode() const276 RuleBasedCollator::hashCode() const {
277     int32_t h = settings->hashCode();
278     if(data->base == nullptr) { return h; }  // root collator
279     // Do not rely on the rule string, see comments in operator==().
280     UErrorCode errorCode = U_ZERO_ERROR;
281     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
282     if(U_FAILURE(errorCode)) { return 0; }
283     UnicodeSetIterator iter(*set);
284     while(iter.next() && !iter.isString()) {
285         h ^= data->getCE32(iter.getCodepoint());
286     }
287     return h;
288 }
289 
290 void
setLocales(const Locale & requested,const Locale & valid,const Locale & actual)291 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
292                               const Locale &actual) {
293     if(actual == tailoring->actualLocale) {
294         actualLocaleIsSameAsValid = false;
295     } else {
296         U_ASSERT(actual == valid);
297         actualLocaleIsSameAsValid = true;
298     }
299     // Do not modify tailoring.actualLocale:
300     // We cannot be sure that that would be thread-safe.
301     validLocale = valid;
302     (void)requested;  // Ignore, see also ticket #10477.
303 }
304 
305 Locale
getLocale(ULocDataLocaleType type,UErrorCode & errorCode) const306 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
307     if(U_FAILURE(errorCode)) {
308         return Locale::getRoot();
309     }
310     switch(type) {
311     case ULOC_ACTUAL_LOCALE:
312         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
313     case ULOC_VALID_LOCALE:
314         return validLocale;
315     case ULOC_REQUESTED_LOCALE:
316     default:
317         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
318         return Locale::getRoot();
319     }
320 }
321 
322 const char *
internalGetLocaleID(ULocDataLocaleType type,UErrorCode & errorCode) const323 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
324     if(U_FAILURE(errorCode)) {
325         return nullptr;
326     }
327     const Locale *result;
328     switch(type) {
329     case ULOC_ACTUAL_LOCALE:
330         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
331         break;
332     case ULOC_VALID_LOCALE:
333         result = &validLocale;
334         break;
335     case ULOC_REQUESTED_LOCALE:
336     default:
337         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
338         return nullptr;
339     }
340     if(result->isBogus()) { return nullptr; }
341     const char *id = result->getName();
342     return id[0] == 0 ? "root" : id;
343 }
344 
345 const UnicodeString&
getRules() const346 RuleBasedCollator::getRules() const {
347     return tailoring->rules;
348 }
349 
350 void
getRules(UColRuleOption delta,UnicodeString & buffer) const351 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
352     if(delta == UCOL_TAILORING_ONLY) {
353         buffer = tailoring->rules;
354         return;
355     }
356     // UCOL_FULL_RULES
357     buffer.remove();
358     CollationLoader::appendRootRules(buffer);
359     buffer.append(tailoring->rules).getTerminatedBuffer();
360 }
361 
362 void
getVersion(UVersionInfo version) const363 RuleBasedCollator::getVersion(UVersionInfo version) const {
364     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
365     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
366 }
367 
368 UnicodeSet *
getTailoredSet(UErrorCode & errorCode) const369 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
370     if(U_FAILURE(errorCode)) { return nullptr; }
371     UnicodeSet *tailored = new UnicodeSet();
372     if(tailored == nullptr) {
373         errorCode = U_MEMORY_ALLOCATION_ERROR;
374         return nullptr;
375     }
376     if(data->base != nullptr) {
377         TailoredSet(tailored).forData(data, errorCode);
378         if(U_FAILURE(errorCode)) {
379             delete tailored;
380             return nullptr;
381         }
382     }
383     return tailored;
384 }
385 
386 void
internalGetContractionsAndExpansions(UnicodeSet * contractions,UnicodeSet * expansions,UBool addPrefixes,UErrorCode & errorCode) const387 RuleBasedCollator::internalGetContractionsAndExpansions(
388         UnicodeSet *contractions, UnicodeSet *expansions,
389         UBool addPrefixes, UErrorCode &errorCode) const {
390     if(U_FAILURE(errorCode)) { return; }
391     if(contractions != nullptr) {
392         contractions->clear();
393     }
394     if(expansions != nullptr) {
395         expansions->clear();
396     }
397     ContractionsAndExpansions(contractions, expansions, nullptr, addPrefixes).forData(data, errorCode);
398 }
399 
400 void
internalAddContractions(UChar32 c,UnicodeSet & set,UErrorCode & errorCode) const401 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
402     if(U_FAILURE(errorCode)) { return; }
403     ContractionsAndExpansions(&set, nullptr, nullptr, false).forCodePoint(data, c, errorCode);
404 }
405 
406 const CollationSettings &
getDefaultSettings() const407 RuleBasedCollator::getDefaultSettings() const {
408     return *tailoring->settings;
409 }
410 
411 UColAttributeValue
getAttribute(UColAttribute attr,UErrorCode & errorCode) const412 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
413     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
414     int32_t option;
415     switch(attr) {
416     case UCOL_FRENCH_COLLATION:
417         option = CollationSettings::BACKWARD_SECONDARY;
418         break;
419     case UCOL_ALTERNATE_HANDLING:
420         return settings->getAlternateHandling();
421     case UCOL_CASE_FIRST:
422         return settings->getCaseFirst();
423     case UCOL_CASE_LEVEL:
424         option = CollationSettings::CASE_LEVEL;
425         break;
426     case UCOL_NORMALIZATION_MODE:
427         option = CollationSettings::CHECK_FCD;
428         break;
429     case UCOL_STRENGTH:
430         return (UColAttributeValue)settings->getStrength();
431     case UCOL_HIRAGANA_QUATERNARY_MODE:
432         // Deprecated attribute, unsettable.
433         return UCOL_OFF;
434     case UCOL_NUMERIC_COLLATION:
435         option = CollationSettings::NUMERIC;
436         break;
437     default:
438         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
439         return UCOL_DEFAULT;
440     }
441     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
442 }
443 
444 void
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & errorCode)445 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
446                                 UErrorCode &errorCode) {
447     UColAttributeValue oldValue = getAttribute(attr, errorCode);
448     if(U_FAILURE(errorCode)) { return; }
449     if(value == oldValue) {
450         setAttributeExplicitly(attr);
451         return;
452     }
453     const CollationSettings &defaultSettings = getDefaultSettings();
454     if(settings == &defaultSettings) {
455         if(value == UCOL_DEFAULT) {
456             setAttributeDefault(attr);
457             return;
458         }
459     }
460     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
461     if(ownedSettings == nullptr) {
462         errorCode = U_MEMORY_ALLOCATION_ERROR;
463         return;
464     }
465 
466     switch(attr) {
467     case UCOL_FRENCH_COLLATION:
468         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
469                                defaultSettings.options, errorCode);
470         break;
471     case UCOL_ALTERNATE_HANDLING:
472         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
473         break;
474     case UCOL_CASE_FIRST:
475         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
476         break;
477     case UCOL_CASE_LEVEL:
478         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
479                                defaultSettings.options, errorCode);
480         break;
481     case UCOL_NORMALIZATION_MODE:
482         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
483                                defaultSettings.options, errorCode);
484         break;
485     case UCOL_STRENGTH:
486         ownedSettings->setStrength(value, defaultSettings.options, errorCode);
487         break;
488     case UCOL_HIRAGANA_QUATERNARY_MODE:
489         // Deprecated attribute. Check for valid values but do not change anything.
490         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
491             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
492         }
493         break;
494     case UCOL_NUMERIC_COLLATION:
495         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
496         break;
497     default:
498         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
499         break;
500     }
501     if(U_FAILURE(errorCode)) { return; }
502     setFastLatinOptions(*ownedSettings);
503     if(value == UCOL_DEFAULT) {
504         setAttributeDefault(attr);
505     } else {
506         setAttributeExplicitly(attr);
507     }
508 }
509 
510 Collator &
setMaxVariable(UColReorderCode group,UErrorCode & errorCode)511 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
512     if(U_FAILURE(errorCode)) { return *this; }
513     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
514     int32_t value;
515     if(group == UCOL_REORDER_CODE_DEFAULT) {
516         value = UCOL_DEFAULT;
517     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
518         value = group - UCOL_REORDER_CODE_FIRST;
519     } else {
520         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
521         return *this;
522     }
523     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
524     if(value == oldValue) {
525         setAttributeExplicitly(ATTR_VARIABLE_TOP);
526         return *this;
527     }
528     const CollationSettings &defaultSettings = getDefaultSettings();
529     if(settings == &defaultSettings) {
530         if(value == UCOL_DEFAULT) {
531             setAttributeDefault(ATTR_VARIABLE_TOP);
532             return *this;
533         }
534     }
535     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
536     if(ownedSettings == nullptr) {
537         errorCode = U_MEMORY_ALLOCATION_ERROR;
538         return *this;
539     }
540 
541     if(group == UCOL_REORDER_CODE_DEFAULT) {
542         group = (UColReorderCode)(
543             UCOL_REORDER_CODE_FIRST + int32_t{defaultSettings.getMaxVariable()});
544     }
545     uint32_t varTop = data->getLastPrimaryForGroup(group);
546     U_ASSERT(varTop != 0);
547     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
548     if(U_FAILURE(errorCode)) { return *this; }
549     ownedSettings->variableTop = varTop;
550     setFastLatinOptions(*ownedSettings);
551     if(value == UCOL_DEFAULT) {
552         setAttributeDefault(ATTR_VARIABLE_TOP);
553     } else {
554         setAttributeExplicitly(ATTR_VARIABLE_TOP);
555     }
556     return *this;
557 }
558 
559 UColReorderCode
getMaxVariable() const560 RuleBasedCollator::getMaxVariable() const {
561     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + int32_t{settings->getMaxVariable()});
562 }
563 
564 uint32_t
getVariableTop(UErrorCode &) const565 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
566     return settings->variableTop;
567 }
568 
569 uint32_t
setVariableTop(const char16_t * varTop,int32_t len,UErrorCode & errorCode)570 RuleBasedCollator::setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &errorCode) {
571     if(U_FAILURE(errorCode)) { return 0; }
572     if(varTop == nullptr && len !=0) {
573         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
574         return 0;
575     }
576     if(len < 0) { len = u_strlen(varTop); }
577     if(len == 0) {
578         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
579         return 0;
580     }
581     UBool numeric = settings->isNumeric();
582     int64_t ce1, ce2;
583     if(settings->dontCheckFCD()) {
584         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
585         ce1 = ci.nextCE(errorCode);
586         ce2 = ci.nextCE(errorCode);
587     } else {
588         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
589         ce1 = ci.nextCE(errorCode);
590         ce2 = ci.nextCE(errorCode);
591     }
592     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
593         errorCode = U_CE_NOT_FOUND_ERROR;
594         return 0;
595     }
596     setVariableTop((uint32_t)(ce1 >> 32), errorCode);
597     return settings->variableTop;
598 }
599 
600 uint32_t
setVariableTop(const UnicodeString & varTop,UErrorCode & errorCode)601 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
602     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
603 }
604 
605 void
setVariableTop(uint32_t varTop,UErrorCode & errorCode)606 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
607     if(U_FAILURE(errorCode)) { return; }
608     if(varTop != settings->variableTop) {
609         // Pin the variable top to the end of the reordering group which contains it.
610         // Only a few special groups are supported.
611         int32_t group = data->getGroupForPrimary(varTop);
612         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
613             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
614             return;
615         }
616         uint32_t v = data->getLastPrimaryForGroup(group);
617         U_ASSERT(v != 0 && v >= varTop);
618         varTop = v;
619         if(varTop != settings->variableTop) {
620             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
621             if(ownedSettings == nullptr) {
622                 errorCode = U_MEMORY_ALLOCATION_ERROR;
623                 return;
624             }
625             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
626                                           getDefaultSettings().options, errorCode);
627             if(U_FAILURE(errorCode)) { return; }
628             ownedSettings->variableTop = varTop;
629             setFastLatinOptions(*ownedSettings);
630         }
631     }
632     if(varTop == getDefaultSettings().variableTop) {
633         setAttributeDefault(ATTR_VARIABLE_TOP);
634     } else {
635         setAttributeExplicitly(ATTR_VARIABLE_TOP);
636     }
637 }
638 
639 int32_t
getReorderCodes(int32_t * dest,int32_t capacity,UErrorCode & errorCode) const640 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
641                                    UErrorCode &errorCode) const {
642     if(U_FAILURE(errorCode)) { return 0; }
643     if(capacity < 0 || (dest == nullptr && capacity > 0)) {
644         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
645         return 0;
646     }
647     int32_t length = settings->reorderCodesLength;
648     if(length == 0) { return 0; }
649     if(length > capacity) {
650         errorCode = U_BUFFER_OVERFLOW_ERROR;
651         return length;
652     }
653     uprv_memcpy(dest, settings->reorderCodes, length * 4);
654     return length;
655 }
656 
657 void
setReorderCodes(const int32_t * reorderCodes,int32_t length,UErrorCode & errorCode)658 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
659                                    UErrorCode &errorCode) {
660     if(U_FAILURE(errorCode)) { return; }
661     if(length < 0 || (reorderCodes == nullptr && length > 0)) {
662         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
663         return;
664     }
665     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
666         length = 0;
667     }
668     if(length == settings->reorderCodesLength &&
669             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
670         return;
671     }
672     const CollationSettings &defaultSettings = getDefaultSettings();
673     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
674         if(settings != &defaultSettings) {
675             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
676             if(ownedSettings == nullptr) {
677                 errorCode = U_MEMORY_ALLOCATION_ERROR;
678                 return;
679             }
680             ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
681             setFastLatinOptions(*ownedSettings);
682         }
683         return;
684     }
685     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
686     if(ownedSettings == nullptr) {
687         errorCode = U_MEMORY_ALLOCATION_ERROR;
688         return;
689     }
690     ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
691     setFastLatinOptions(*ownedSettings);
692 }
693 
694 void
setFastLatinOptions(CollationSettings & ownedSettings) const695 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
696     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
697             data, ownedSettings,
698             ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
699 }
700 
701 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,UErrorCode & errorCode) const702 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
703                            UErrorCode &errorCode) const {
704     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
705     return doCompare(left.getBuffer(), left.length(),
706                      right.getBuffer(), right.length(), errorCode);
707 }
708 
709 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,int32_t length,UErrorCode & errorCode) const710 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
711                            int32_t length, UErrorCode &errorCode) const {
712     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
713     if(length < 0) {
714         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
715         return UCOL_EQUAL;
716     }
717     int32_t leftLength = left.length();
718     int32_t rightLength = right.length();
719     if(leftLength > length) { leftLength = length; }
720     if(rightLength > length) { rightLength = length; }
721     return doCompare(left.getBuffer(), leftLength,
722                      right.getBuffer(), rightLength, errorCode);
723 }
724 
725 UCollationResult
compare(const char16_t * left,int32_t leftLength,const char16_t * right,int32_t rightLength,UErrorCode & errorCode) const726 RuleBasedCollator::compare(const char16_t *left, int32_t leftLength,
727                            const char16_t *right, int32_t rightLength,
728                            UErrorCode &errorCode) const {
729     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
730     if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
731         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
732         return UCOL_EQUAL;
733     }
734     // Make sure both or neither strings have a known length.
735     // We do not optimize for mixed length/termination.
736     if(leftLength >= 0) {
737         if(rightLength < 0) { rightLength = u_strlen(right); }
738     } else {
739         if(rightLength >= 0) { leftLength = u_strlen(left); }
740     }
741     return doCompare(left, leftLength, right, rightLength, errorCode);
742 }
743 
744 UCollationResult
compareUTF8(const StringPiece & left,const StringPiece & right,UErrorCode & errorCode) const745 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
746                                UErrorCode &errorCode) const {
747     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
748     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
749     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
750     if((leftBytes == nullptr && !left.empty()) || (rightBytes == nullptr && !right.empty())) {
751         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
752         return UCOL_EQUAL;
753     }
754     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
755 }
756 
757 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const758 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
759                                        const char *right, int32_t rightLength,
760                                        UErrorCode &errorCode) const {
761     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
762     if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
763         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
764         return UCOL_EQUAL;
765     }
766     // Make sure both or neither strings have a known length.
767     // We do not optimize for mixed length/termination.
768     if(leftLength >= 0) {
769         if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right)); }
770     } else {
771         if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left)); }
772     }
773     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
774                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
775 }
776 
777 namespace {
778 
779 /**
780  * Abstract iterator for identical-level string comparisons.
781  * Returns FCD code points and handles temporary switching to NFD.
782  */
783 class NFDIterator : public UObject {
784 public:
NFDIterator()785     NFDIterator() : index(-1), length(0) {}
~NFDIterator()786     virtual ~NFDIterator() {}
787     /**
788      * Returns the next code point from the internal normalization buffer,
789      * or else the next text code point.
790      * Returns -1 at the end of the text.
791      */
nextCodePoint()792     UChar32 nextCodePoint() {
793         if(index >= 0) {
794             if(index == length) {
795                 index = -1;
796             } else {
797                 UChar32 c;
798                 U16_NEXT_UNSAFE(decomp, index, c);
799                 return c;
800             }
801         }
802         return nextRawCodePoint();
803     }
804     /**
805      * @param nfcImpl
806      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
807      * @return the first code point in c's decomposition,
808      *         or c itself if it was decomposed already or if it does not decompose
809      */
nextDecomposedCodePoint(const Normalizer2Impl & nfcImpl,UChar32 c)810     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
811         if(index >= 0) { return c; }
812         decomp = nfcImpl.getDecomposition(c, buffer, length);
813         if(decomp == nullptr) { return c; }
814         index = 0;
815         U16_NEXT_UNSAFE(decomp, index, c);
816         return c;
817     }
818 protected:
819     /**
820      * Returns the next text code point in FCD order.
821      * Returns -1 at the end of the text.
822      */
823     virtual UChar32 nextRawCodePoint() = 0;
824 private:
825     const char16_t *decomp;
826     char16_t buffer[4];
827     int32_t index;
828     int32_t length;
829 };
830 
831 class UTF16NFDIterator : public NFDIterator {
832 public:
UTF16NFDIterator(const char16_t * text,const char16_t * textLimit)833     UTF16NFDIterator(const char16_t *text, const char16_t *textLimit) : s(text), limit(textLimit) {}
834 protected:
nextRawCodePoint()835     virtual UChar32 nextRawCodePoint() override {
836         if(s == limit) { return U_SENTINEL; }
837         UChar32 c = *s++;
838         if(limit == nullptr && c == 0) {
839             s = nullptr;
840             return U_SENTINEL;
841         }
842         char16_t trail;
843         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
844             ++s;
845             c = U16_GET_SUPPLEMENTARY(c, trail);
846         }
847         return c;
848     }
849 
850     const char16_t *s;
851     const char16_t *limit;
852 };
853 
854 class FCDUTF16NFDIterator : public UTF16NFDIterator {
855 public:
FCDUTF16NFDIterator(const Normalizer2Impl & nfcImpl,const char16_t * text,const char16_t * textLimit)856     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const char16_t *text, const char16_t *textLimit)
857             : UTF16NFDIterator(nullptr, nullptr) {
858         UErrorCode errorCode = U_ZERO_ERROR;
859         const char16_t *spanLimit = nfcImpl.makeFCD(text, textLimit, nullptr, errorCode);
860         if(U_FAILURE(errorCode)) { return; }
861         if(spanLimit == textLimit || (textLimit == nullptr && *spanLimit == 0)) {
862             s = text;
863             limit = spanLimit;
864         } else {
865             str.setTo(text, (int32_t)(spanLimit - text));
866             {
867                 ReorderingBuffer r_buffer(nfcImpl, str);
868                 if(r_buffer.init(str.length(), errorCode)) {
869                     nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);
870                 }
871             }
872             if(U_SUCCESS(errorCode)) {
873                 s = str.getBuffer();
874                 limit = s + str.length();
875             }
876         }
877     }
878 private:
879     UnicodeString str;
880 };
881 
882 class UTF8NFDIterator : public NFDIterator {
883 public:
UTF8NFDIterator(const uint8_t * text,int32_t textLength)884     UTF8NFDIterator(const uint8_t *text, int32_t textLength)
885         : s(text), pos(0), length(textLength) {}
886 protected:
nextRawCodePoint()887     virtual UChar32 nextRawCodePoint() override {
888         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
889         UChar32 c;
890         U8_NEXT_OR_FFFD(s, pos, length, c);
891         return c;
892     }
893 
894     const uint8_t *s;
895     int32_t pos;
896     int32_t length;
897 };
898 
899 class FCDUTF8NFDIterator : public NFDIterator {
900 public:
FCDUTF8NFDIterator(const CollationData * data,const uint8_t * text,int32_t textLength)901     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
902             : u8ci(data, false, text, 0, textLength) {}
903 protected:
nextRawCodePoint()904     virtual UChar32 nextRawCodePoint() override {
905         UErrorCode errorCode = U_ZERO_ERROR;
906         return u8ci.nextCodePoint(errorCode);
907     }
908 private:
909     FCDUTF8CollationIterator u8ci;
910 };
911 
912 class UIterNFDIterator : public NFDIterator {
913 public:
UIterNFDIterator(UCharIterator & it)914     UIterNFDIterator(UCharIterator &it) : iter(it) {}
915 protected:
nextRawCodePoint()916     virtual UChar32 nextRawCodePoint() override {
917         return uiter_next32(&iter);
918     }
919 private:
920     UCharIterator &iter;
921 };
922 
923 class FCDUIterNFDIterator : public NFDIterator {
924 public:
FCDUIterNFDIterator(const CollationData * data,UCharIterator & it,int32_t startIndex)925     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
926             : uici(data, false, it, startIndex) {}
927 protected:
nextRawCodePoint()928     virtual UChar32 nextRawCodePoint() override {
929         UErrorCode errorCode = U_ZERO_ERROR;
930         return uici.nextCodePoint(errorCode);
931     }
932 private:
933     FCDUIterCollationIterator uici;
934 };
935 
compareNFDIter(const Normalizer2Impl & nfcImpl,NFDIterator & left,NFDIterator & right)936 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
937                                 NFDIterator &left, NFDIterator &right) {
938     for(;;) {
939         // Fetch the next FCD code point from each string.
940         UChar32 leftCp = left.nextCodePoint();
941         UChar32 rightCp = right.nextCodePoint();
942         if(leftCp == rightCp) {
943             if(leftCp < 0) { break; }
944             continue;
945         }
946         // If they are different, then decompose each and compare again.
947         if(leftCp < 0) {
948             leftCp = -2;  // end of string
949         } else if(leftCp == 0xfffe) {
950             leftCp = -1;  // U+FFFE: merge separator
951         } else {
952             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
953         }
954         if(rightCp < 0) {
955             rightCp = -2;  // end of string
956         } else if(rightCp == 0xfffe) {
957             rightCp = -1;  // U+FFFE: merge separator
958         } else {
959             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
960         }
961         if(leftCp < rightCp) { return UCOL_LESS; }
962         if(leftCp > rightCp) { return UCOL_GREATER; }
963     }
964     return UCOL_EQUAL;
965 }
966 
967 }  // namespace
968 
969 UCollationResult
doCompare(const char16_t * left,int32_t leftLength,const char16_t * right,int32_t rightLength,UErrorCode & errorCode) const970 RuleBasedCollator::doCompare(const char16_t *left, int32_t leftLength,
971                              const char16_t *right, int32_t rightLength,
972                              UErrorCode &errorCode) const {
973     // U_FAILURE(errorCode) checked by caller.
974     if(left == right && leftLength == rightLength) {
975         return UCOL_EQUAL;
976     }
977 
978     // Identical-prefix test.
979     const char16_t *leftLimit;
980     const char16_t *rightLimit;
981     int32_t equalPrefixLength = 0;
982     if(leftLength < 0) {
983         leftLimit = nullptr;
984         rightLimit = nullptr;
985         char16_t c;
986         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
987             if(c == 0) { return UCOL_EQUAL; }
988             ++equalPrefixLength;
989         }
990     } else {
991         leftLimit = left + leftLength;
992         rightLimit = right + rightLength;
993         for(;;) {
994             if(equalPrefixLength == leftLength) {
995                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
996                 break;
997             } else if(equalPrefixLength == rightLength ||
998                       left[equalPrefixLength] != right[equalPrefixLength]) {
999                 break;
1000             }
1001             ++equalPrefixLength;
1002         }
1003     }
1004 
1005     UBool numeric = settings->isNumeric();
1006     if(equalPrefixLength > 0) {
1007         if((equalPrefixLength != leftLength &&
1008                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1009                 (equalPrefixLength != rightLength &&
1010                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1011             // Identical prefix: Back up to the start of a contraction or reordering sequence.
1012             while(--equalPrefixLength > 0 &&
1013                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1014         }
1015         // Notes:
1016         // - A longer string can compare equal to a prefix of it if only ignorables follow.
1017         // - With a backward level, a longer string can compare less-than a prefix of it.
1018 
1019         // Pass the actual start of each string into the CollationIterators,
1020         // plus the equalPrefixLength position,
1021         // so that prefix matches back into the equal prefix work.
1022     }
1023 
1024     int32_t result;
1025     int32_t fastLatinOptions = settings->fastLatinOptions;
1026     if(fastLatinOptions >= 0 &&
1027             (equalPrefixLength == leftLength ||
1028                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1029             (equalPrefixLength == rightLength ||
1030                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1031         if(leftLength >= 0) {
1032             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1033                                                       settings->fastLatinPrimaries,
1034                                                       fastLatinOptions,
1035                                                       left + equalPrefixLength,
1036                                                       leftLength - equalPrefixLength,
1037                                                       right + equalPrefixLength,
1038                                                       rightLength - equalPrefixLength);
1039         } else {
1040             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1041                                                       settings->fastLatinPrimaries,
1042                                                       fastLatinOptions,
1043                                                       left + equalPrefixLength, -1,
1044                                                       right + equalPrefixLength, -1);
1045         }
1046     } else {
1047         result = CollationFastLatin::BAIL_OUT_RESULT;
1048     }
1049 
1050     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1051         if(settings->dontCheckFCD()) {
1052             UTF16CollationIterator leftIter(data, numeric,
1053                                             left, left + equalPrefixLength, leftLimit);
1054             UTF16CollationIterator rightIter(data, numeric,
1055                                             right, right + equalPrefixLength, rightLimit);
1056             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1057         } else {
1058             FCDUTF16CollationIterator leftIter(data, numeric,
1059                                               left, left + equalPrefixLength, leftLimit);
1060             FCDUTF16CollationIterator rightIter(data, numeric,
1061                                                 right, right + equalPrefixLength, rightLimit);
1062             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1063         }
1064     }
1065     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1066         return (UCollationResult)result;
1067     }
1068 
1069     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1070     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1071     // and the benefit seems unlikely to be measurable.
1072 
1073     // Compare identical level.
1074     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1075     left += equalPrefixLength;
1076     right += equalPrefixLength;
1077     if(settings->dontCheckFCD()) {
1078         UTF16NFDIterator leftIter(left, leftLimit);
1079         UTF16NFDIterator rightIter(right, rightLimit);
1080         return compareNFDIter(nfcImpl, leftIter, rightIter);
1081     } else {
1082         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1083         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1084         return compareNFDIter(nfcImpl, leftIter, rightIter);
1085     }
1086 }
1087 
1088 UCollationResult
doCompare(const uint8_t * left,int32_t leftLength,const uint8_t * right,int32_t rightLength,UErrorCode & errorCode) const1089 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1090                              const uint8_t *right, int32_t rightLength,
1091                              UErrorCode &errorCode) const {
1092     // U_FAILURE(errorCode) checked by caller.
1093     if(left == right && leftLength == rightLength) {
1094         return UCOL_EQUAL;
1095     }
1096 
1097     // Identical-prefix test.
1098     int32_t equalPrefixLength = 0;
1099     if(leftLength < 0) {
1100         uint8_t c;
1101         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1102             if(c == 0) { return UCOL_EQUAL; }
1103             ++equalPrefixLength;
1104         }
1105     } else {
1106         for(;;) {
1107             if(equalPrefixLength == leftLength) {
1108                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1109                 break;
1110             } else if(equalPrefixLength == rightLength ||
1111                       left[equalPrefixLength] != right[equalPrefixLength]) {
1112                 break;
1113             }
1114             ++equalPrefixLength;
1115         }
1116     }
1117     // Back up to the start of a partially-equal code point.
1118     if(equalPrefixLength > 0 &&
1119             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1120             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1121         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1122     }
1123 
1124     UBool numeric = settings->isNumeric();
1125     if(equalPrefixLength > 0) {
1126         UBool unsafe = false;
1127         if(equalPrefixLength != leftLength) {
1128             int32_t i = equalPrefixLength;
1129             UChar32 c;
1130             U8_NEXT_OR_FFFD(left, i, leftLength, c);
1131             unsafe = data->isUnsafeBackward(c, numeric);
1132         }
1133         if(!unsafe && equalPrefixLength != rightLength) {
1134             int32_t i = equalPrefixLength;
1135             UChar32 c;
1136             U8_NEXT_OR_FFFD(right, i, rightLength, c);
1137             unsafe = data->isUnsafeBackward(c, numeric);
1138         }
1139         if(unsafe) {
1140             // Identical prefix: Back up to the start of a contraction or reordering sequence.
1141             UChar32 c;
1142             do {
1143                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1144             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1145         }
1146         // See the notes in the UTF-16 version.
1147 
1148         // Pass the actual start of each string into the CollationIterators,
1149         // plus the equalPrefixLength position,
1150         // so that prefix matches back into the equal prefix work.
1151     }
1152 
1153     int32_t result;
1154     int32_t fastLatinOptions = settings->fastLatinOptions;
1155     if(fastLatinOptions >= 0 &&
1156             (equalPrefixLength == leftLength ||
1157                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1158             (equalPrefixLength == rightLength ||
1159                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1160         if(leftLength >= 0) {
1161             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1162                                                      settings->fastLatinPrimaries,
1163                                                      fastLatinOptions,
1164                                                      left + equalPrefixLength,
1165                                                      leftLength - equalPrefixLength,
1166                                                      right + equalPrefixLength,
1167                                                      rightLength - equalPrefixLength);
1168         } else {
1169             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1170                                                      settings->fastLatinPrimaries,
1171                                                      fastLatinOptions,
1172                                                      left + equalPrefixLength, -1,
1173                                                      right + equalPrefixLength, -1);
1174         }
1175     } else {
1176         result = CollationFastLatin::BAIL_OUT_RESULT;
1177     }
1178 
1179     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1180         if(settings->dontCheckFCD()) {
1181             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1182             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1183             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1184         } else {
1185             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1186             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1187             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1188         }
1189     }
1190     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1191         return (UCollationResult)result;
1192     }
1193 
1194     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1195     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1196     // and the benefit seems unlikely to be measurable.
1197 
1198     // Compare identical level.
1199     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1200     left += equalPrefixLength;
1201     right += equalPrefixLength;
1202     if(leftLength > 0) {
1203         leftLength -= equalPrefixLength;
1204         rightLength -= equalPrefixLength;
1205     }
1206     if(settings->dontCheckFCD()) {
1207         UTF8NFDIterator leftIter(left, leftLength);
1208         UTF8NFDIterator rightIter(right, rightLength);
1209         return compareNFDIter(nfcImpl, leftIter, rightIter);
1210     } else {
1211         FCDUTF8NFDIterator leftIter(data, left, leftLength);
1212         FCDUTF8NFDIterator rightIter(data, right, rightLength);
1213         return compareNFDIter(nfcImpl, leftIter, rightIter);
1214     }
1215 }
1216 
1217 UCollationResult
compare(UCharIterator & left,UCharIterator & right,UErrorCode & errorCode) const1218 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1219                            UErrorCode &errorCode) const {
1220     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1221     UBool numeric = settings->isNumeric();
1222 
1223     // Identical-prefix test.
1224     int32_t equalPrefixLength = 0;
1225     {
1226         UChar32 leftUnit;
1227         UChar32 rightUnit;
1228         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1229             if(leftUnit < 0) { return UCOL_EQUAL; }
1230             ++equalPrefixLength;
1231         }
1232 
1233         // Back out the code units that differed, for the real collation comparison.
1234         if(leftUnit >= 0) { left.previous(&left); }
1235         if(rightUnit >= 0) { right.previous(&right); }
1236 
1237         if(equalPrefixLength > 0) {
1238             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1239                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1240                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1241                 do {
1242                     --equalPrefixLength;
1243                     leftUnit = left.previous(&left);
1244                     right.previous(&right);
1245                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1246             }
1247             // See the notes in the UTF-16 version.
1248         }
1249     }
1250 
1251     UCollationResult result;
1252     if(settings->dontCheckFCD()) {
1253         UIterCollationIterator leftIter(data, numeric, left);
1254         UIterCollationIterator rightIter(data, numeric, right);
1255         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1256     } else {
1257         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1258         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1259         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1260     }
1261     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1262         return result;
1263     }
1264 
1265     // Compare identical level.
1266     left.move(&left, equalPrefixLength, UITER_ZERO);
1267     right.move(&right, equalPrefixLength, UITER_ZERO);
1268     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1269     if(settings->dontCheckFCD()) {
1270         UIterNFDIterator leftIter(left);
1271         UIterNFDIterator rightIter(right);
1272         return compareNFDIter(nfcImpl, leftIter, rightIter);
1273     } else {
1274         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1275         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1276         return compareNFDIter(nfcImpl, leftIter, rightIter);
1277     }
1278 }
1279 
1280 CollationKey &
getCollationKey(const UnicodeString & s,CollationKey & key,UErrorCode & errorCode) const1281 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1282                                    UErrorCode &errorCode) const {
1283     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1284 }
1285 
1286 CollationKey &
getCollationKey(const char16_t * s,int32_t length,CollationKey & key,UErrorCode & errorCode) const1287 RuleBasedCollator::getCollationKey(const char16_t *s, int32_t length, CollationKey& key,
1288                                    UErrorCode &errorCode) const {
1289     if(U_FAILURE(errorCode)) {
1290         return key.setToBogus();
1291     }
1292     if(s == nullptr && length != 0) {
1293         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1294         return key.setToBogus();
1295     }
1296     key.reset();  // resets the "bogus" state
1297     CollationKeyByteSink sink(key);
1298     writeSortKey(s, length, sink, errorCode);
1299     if(U_FAILURE(errorCode)) {
1300         key.setToBogus();
1301     } else if(key.isBogus()) {
1302         errorCode = U_MEMORY_ALLOCATION_ERROR;
1303     } else {
1304         key.setLength(sink.NumberOfBytesAppended());
1305     }
1306     return key;
1307 }
1308 
1309 int32_t
getSortKey(const UnicodeString & s,uint8_t * dest,int32_t capacity) const1310 RuleBasedCollator::getSortKey(const UnicodeString &s,
1311                               uint8_t *dest, int32_t capacity) const {
1312     return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1313 }
1314 
1315 int32_t
getSortKey(const char16_t * s,int32_t length,uint8_t * dest,int32_t capacity) const1316 RuleBasedCollator::getSortKey(const char16_t *s, int32_t length,
1317                               uint8_t *dest, int32_t capacity) const {
1318     if((s == nullptr && length != 0) || capacity < 0 || (dest == nullptr && capacity > 0)) {
1319         return 0;
1320     }
1321     uint8_t noDest[1] = { 0 };
1322     if(dest == nullptr) {
1323         // Distinguish pure preflighting from an allocation error.
1324         dest = noDest;
1325         capacity = 0;
1326     }
1327     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1328     UErrorCode errorCode = U_ZERO_ERROR;
1329     writeSortKey(s, length, sink, errorCode);
1330     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1331 }
1332 
1333 void
writeSortKey(const char16_t * s,int32_t length,SortKeyByteSink & sink,UErrorCode & errorCode) const1334 RuleBasedCollator::writeSortKey(const char16_t *s, int32_t length,
1335                                 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1336     if(U_FAILURE(errorCode)) { return; }
1337     const char16_t *limit = (length >= 0) ? s + length : nullptr;
1338     UBool numeric = settings->isNumeric();
1339     CollationKeys::LevelCallback callback;
1340     if(settings->dontCheckFCD()) {
1341         UTF16CollationIterator iter(data, numeric, s, s, limit);
1342         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1343                                                   sink, Collation::PRIMARY_LEVEL,
1344                                                   callback, true, errorCode);
1345     } else {
1346         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1347         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1348                                                   sink, Collation::PRIMARY_LEVEL,
1349                                                   callback, true, errorCode);
1350     }
1351     if(settings->getStrength() == UCOL_IDENTICAL) {
1352         writeIdenticalLevel(s, limit, sink, errorCode);
1353     }
1354     static const char terminator = 0;  // TERMINATOR_BYTE
1355     sink.Append(&terminator, 1);
1356 }
1357 
1358 void
writeIdenticalLevel(const char16_t * s,const char16_t * limit,SortKeyByteSink & sink,UErrorCode & errorCode) const1359 RuleBasedCollator::writeIdenticalLevel(const char16_t *s, const char16_t *limit,
1360                                        SortKeyByteSink &sink, UErrorCode &errorCode) const {
1361     // NFD quick check
1362     const char16_t *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, nullptr, errorCode);
1363     if(U_FAILURE(errorCode)) { return; }
1364     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1365     UChar32 prev = 0;
1366     if(nfdQCYesLimit != s) {
1367         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
1368     }
1369     // Is there non-NFD text?
1370     int32_t destLengthEstimate;
1371     if(limit != nullptr) {
1372         if(nfdQCYesLimit == limit) { return; }
1373         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
1374     } else {
1375         // s is NUL-terminated
1376         if(*nfdQCYesLimit == 0) { return; }
1377         destLengthEstimate = -1;
1378     }
1379     UnicodeString nfd;
1380     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1381     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1382 }
1383 
1384 namespace {
1385 
1386 /**
1387  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1388  * with an instance of this callback class.
1389  * When another level is about to be written, the callback
1390  * records the level and the number of bytes that will be written until
1391  * the sink (which is actually a FixedSortKeyByteSink) fills up.
1392  *
1393  * When internalNextSortKeyPart() is called again, it restarts with the last level
1394  * and ignores as many bytes as were written previously for that level.
1395  */
1396 class PartLevelCallback : public CollationKeys::LevelCallback {
1397 public:
PartLevelCallback(const SortKeyByteSink & s)1398     PartLevelCallback(const SortKeyByteSink &s)
1399             : sink(s), level(Collation::PRIMARY_LEVEL) {
1400         levelCapacity = sink.GetRemainingCapacity();
1401     }
~PartLevelCallback()1402     virtual ~PartLevelCallback() {}
needToWrite(Collation::Level l)1403     virtual UBool needToWrite(Collation::Level l) override {
1404         if(!sink.Overflowed()) {
1405             // Remember a level that will be at least partially written.
1406             level = l;
1407             levelCapacity = sink.GetRemainingCapacity();
1408             return true;
1409         } else {
1410             return false;
1411         }
1412     }
getLevel() const1413     Collation::Level getLevel() const { return level; }
getLevelCapacity() const1414     int32_t getLevelCapacity() const { return levelCapacity; }
1415 
1416 private:
1417     const SortKeyByteSink &sink;
1418     Collation::Level level;
1419     int32_t levelCapacity;
1420 };
1421 
1422 }  // namespace
1423 
1424 int32_t
internalNextSortKeyPart(UCharIterator * iter,uint32_t state[2],uint8_t * dest,int32_t count,UErrorCode & errorCode) const1425 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1426                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1427     if(U_FAILURE(errorCode)) { return 0; }
1428     if(iter == nullptr || state == nullptr || count < 0 || (count > 0 && dest == nullptr)) {
1429         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1430         return 0;
1431     }
1432     if(count == 0) { return 0; }
1433 
1434     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1435     sink.IgnoreBytes((int32_t)state[1]);
1436     iter->move(iter, 0, UITER_START);
1437 
1438     Collation::Level level = (Collation::Level)state[0];
1439     if(level <= Collation::QUATERNARY_LEVEL) {
1440         UBool numeric = settings->isNumeric();
1441         PartLevelCallback callback(sink);
1442         if(settings->dontCheckFCD()) {
1443             UIterCollationIterator ci(data, numeric, *iter);
1444             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1445                                                       sink, level, callback, false, errorCode);
1446         } else {
1447             FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1448             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1449                                                       sink, level, callback, false, errorCode);
1450         }
1451         if(U_FAILURE(errorCode)) { return 0; }
1452         if(sink.NumberOfBytesAppended() > count) {
1453             state[0] = (uint32_t)callback.getLevel();
1454             state[1] = (uint32_t)callback.getLevelCapacity();
1455             return count;
1456         }
1457         // All of the normal levels are done.
1458         if(settings->getStrength() == UCOL_IDENTICAL) {
1459             level = Collation::IDENTICAL_LEVEL;
1460             iter->move(iter, 0, UITER_START);
1461         }
1462         // else fall through to setting ZERO_LEVEL
1463     }
1464 
1465     if(level == Collation::IDENTICAL_LEVEL) {
1466         int32_t levelCapacity = sink.GetRemainingCapacity();
1467         UnicodeString s;
1468         for(;;) {
1469             UChar32 c = iter->next(iter);
1470             if(c < 0) { break; }
1471             s.append((char16_t)c);
1472         }
1473         const char16_t *sArray = s.getBuffer();
1474         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1475         if(U_FAILURE(errorCode)) { return 0; }
1476         if(sink.NumberOfBytesAppended() > count) {
1477             state[0] = (uint32_t)level;
1478             state[1] = (uint32_t)levelCapacity;
1479             return count;
1480         }
1481     }
1482 
1483     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1484     state[0] = (uint32_t)Collation::ZERO_LEVEL;
1485     state[1] = 0;
1486     int32_t length = sink.NumberOfBytesAppended();
1487     int32_t i = length;
1488     while(i < count) { dest[i++] = 0; }
1489     return length;
1490 }
1491 
1492 void
internalGetCEs(const UnicodeString & str,UVector64 & ces,UErrorCode & errorCode) const1493 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1494                                   UErrorCode &errorCode) const {
1495     if(U_FAILURE(errorCode)) { return; }
1496     const char16_t *s = str.getBuffer();
1497     const char16_t *limit = s + str.length();
1498     UBool numeric = settings->isNumeric();
1499     if(settings->dontCheckFCD()) {
1500         UTF16CollationIterator iter(data, numeric, s, s, limit);
1501         int64_t ce;
1502         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1503             ces.addElement(ce, errorCode);
1504         }
1505     } else {
1506         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1507         int64_t ce;
1508         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1509             ces.addElement(ce, errorCode);
1510         }
1511     }
1512 }
1513 
1514 namespace {
1515 
appendSubtag(CharString & s,char letter,const char * subtag,int32_t length,UErrorCode & errorCode)1516 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1517                   UErrorCode &errorCode) {
1518     if(U_FAILURE(errorCode) || length == 0) { return; }
1519     if(!s.isEmpty()) {
1520         s.append('_', errorCode);
1521     }
1522     s.append(letter, errorCode);
1523     for(int32_t i = 0; i < length; ++i) {
1524         s.append(uprv_toupper(subtag[i]), errorCode);
1525     }
1526 }
1527 
appendAttribute(CharString & s,char letter,UColAttributeValue value,UErrorCode & errorCode)1528 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1529                      UErrorCode &errorCode) {
1530     if(U_FAILURE(errorCode)) { return; }
1531     if(!s.isEmpty()) {
1532         s.append('_', errorCode);
1533     }
1534     static const char *valueChars = "1234...........IXO..SN..LU......";
1535     s.append(letter, errorCode);
1536     s.append(valueChars[value], errorCode);
1537 }
1538 
1539 }  // namespace
1540 
1541 int32_t
internalGetShortDefinitionString(const char * locale,char * buffer,int32_t capacity,UErrorCode & errorCode) const1542 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1543                                                     char *buffer, int32_t capacity,
1544                                                     UErrorCode &errorCode) const {
1545     if(U_FAILURE(errorCode)) { return 0; }
1546     if(buffer == nullptr ? capacity != 0 : capacity < 0) {
1547         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1548         return 0;
1549     }
1550     if(locale == nullptr) {
1551         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1552     }
1553 
1554     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1555     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1556                                                   "collation", locale,
1557                                                   nullptr, &errorCode);
1558     if(U_FAILURE(errorCode)) { return 0; }
1559     resultLocale[length] = 0;
1560 
1561     // Append items in alphabetic order of their short definition letters.
1562     CharString result;
1563 
1564     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1565         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1566     }
1567     // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1568     // See ICU tickets #10372 and #10386.
1569     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1570         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1571     }
1572     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1573         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1574     }
1575     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1576         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1577     }
1578     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1579         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1580     }
1581     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1582     CharString collation = ulocimp_getKeywordValue(resultLocale, "collation", errorCode);
1583     appendSubtag(result, 'K', collation.data(), collation.length(), errorCode);
1584     CharString language;
1585     CharString script;
1586     CharString region;
1587     CharString variant;
1588     ulocimp_getSubtags(resultLocale, &language, &script, &region, &variant, nullptr, errorCode);
1589     if (language.isEmpty()) {
1590         appendSubtag(result, 'L', "root", 4, errorCode);
1591     } else {
1592         appendSubtag(result, 'L', language.data(), language.length(), errorCode);
1593     }
1594     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1595         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1596     }
1597     appendSubtag(result, 'R', region.data(), region.length(), errorCode);
1598     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1599         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1600     }
1601     appendSubtag(result, 'V', variant.data(), variant.length(), errorCode);
1602     appendSubtag(result, 'Z', script.data(), script.length(), errorCode);
1603 
1604     if(U_FAILURE(errorCode)) { return 0; }
1605     return result.extract(buffer, capacity, errorCode);
1606 }
1607 
1608 UBool
isUnsafe(UChar32 c) const1609 RuleBasedCollator::isUnsafe(UChar32 c) const {
1610     return data->isUnsafeBackward(c, settings->isNumeric());
1611 }
1612 
1613 void U_CALLCONV
computeMaxExpansions(const CollationTailoring * t,UErrorCode & errorCode)1614 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1615     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1616 }
1617 
1618 UBool
initMaxExpansions(UErrorCode & errorCode) const1619 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1620     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1621     return U_SUCCESS(errorCode);
1622 }
1623 
1624 CollationElementIterator *
createCollationElementIterator(const UnicodeString & source) const1625 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1626     UErrorCode errorCode = U_ZERO_ERROR;
1627     if(!initMaxExpansions(errorCode)) { return nullptr; }
1628     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1629     if(U_FAILURE(errorCode)) {
1630         delete cei;
1631         return nullptr;
1632     }
1633     return cei;
1634 }
1635 
1636 CollationElementIterator *
createCollationElementIterator(const CharacterIterator & source) const1637 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1638     UErrorCode errorCode = U_ZERO_ERROR;
1639     if(!initMaxExpansions(errorCode)) { return nullptr; }
1640     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1641     if(U_FAILURE(errorCode)) {
1642         delete cei;
1643         return nullptr;
1644     }
1645     return cei;
1646 }
1647 
1648 int32_t
getMaxExpansion(int32_t order) const1649 RuleBasedCollator::getMaxExpansion(int32_t order) const {
1650     UErrorCode errorCode = U_ZERO_ERROR;
1651     (void)initMaxExpansions(errorCode);
1652     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1653 }
1654 
1655 U_NAMESPACE_END
1656 
1657 #endif  // !UCONFIG_NO_COLLATION
1658