• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * rulebasedcollator.cpp
7 *
8 * (replaced the former tblcoll.cpp)
9 *
10 * created on: 2012feb14 with new and old collation code
11 * created by: Markus W. Scherer
12 */
13 
14 #include "unicode/utypes.h"
15 
16 #if !UCONFIG_NO_COLLATION
17 
18 #include "unicode/coll.h"
19 #include "unicode/coleitr.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/locid.h"
22 #include "unicode/sortkey.h"
23 #include "unicode/tblcoll.h"
24 #include "unicode/ucol.h"
25 #include "unicode/uiter.h"
26 #include "unicode/uloc.h"
27 #include "unicode/uniset.h"
28 #include "unicode/unistr.h"
29 #include "unicode/usetiter.h"
30 #include "unicode/utf8.h"
31 #include "unicode/uversion.h"
32 #include "bocsu.h"
33 #include "charstr.h"
34 #include "cmemory.h"
35 #include "collation.h"
36 #include "collationcompare.h"
37 #include "collationdata.h"
38 #include "collationdatareader.h"
39 #include "collationfastlatin.h"
40 #include "collationiterator.h"
41 #include "collationkeys.h"
42 #include "collationroot.h"
43 #include "collationsets.h"
44 #include "collationsettings.h"
45 #include "collationtailoring.h"
46 #include "cstring.h"
47 #include "uassert.h"
48 #include "ucol_imp.h"
49 #include "uhash.h"
50 #include "uitercollationiterator.h"
51 #include "ustr_imp.h"
52 #include "utf16collationiterator.h"
53 #include "utf8collationiterator.h"
54 #include "uvectr64.h"
55 
56 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
57 
58 U_NAMESPACE_BEGIN
59 
60 namespace {
61 
62 class FixedSortKeyByteSink : public SortKeyByteSink {
63 public:
FixedSortKeyByteSink(char * dest,int32_t destCapacity)64     FixedSortKeyByteSink(char *dest, int32_t destCapacity)
65             : SortKeyByteSink(dest, destCapacity) {}
66     virtual ~FixedSortKeyByteSink();
67 
68 private:
69     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
70     virtual UBool Resize(int32_t appendCapacity, int32_t length);
71 };
72 
~FixedSortKeyByteSink()73 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
74 
75 void
AppendBeyondCapacity(const char * bytes,int32_t,int32_t length)76 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
77     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
78     // Fill the buffer completely.
79     int32_t available = capacity_ - length;
80     if (available > 0) {
81         uprv_memcpy(buffer_ + length, bytes, available);
82     }
83 }
84 
85 UBool
Resize(int32_t,int32_t)86 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
87     return FALSE;
88 }
89 
90 }  // namespace
91 
92 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
93 class CollationKeyByteSink : public SortKeyByteSink {
94 public:
CollationKeyByteSink(CollationKey & key)95     CollationKeyByteSink(CollationKey &key)
96             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
97               key_(key) {}
98     virtual ~CollationKeyByteSink();
99 
100 private:
101     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
102     virtual UBool Resize(int32_t appendCapacity, int32_t length);
103 
104     CollationKey &key_;
105 };
106 
~CollationKeyByteSink()107 CollationKeyByteSink::~CollationKeyByteSink() {}
108 
109 void
AppendBeyondCapacity(const char * bytes,int32_t n,int32_t length)110 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
111     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
112     if (Resize(n, length)) {
113         uprv_memcpy(buffer_ + length, bytes, n);
114     }
115 }
116 
117 UBool
Resize(int32_t appendCapacity,int32_t length)118 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
119     if (buffer_ == NULL) {
120         return FALSE;  // allocation failed before already
121     }
122     int32_t newCapacity = 2 * capacity_;
123     int32_t altCapacity = length + 2 * appendCapacity;
124     if (newCapacity < altCapacity) {
125         newCapacity = altCapacity;
126     }
127     if (newCapacity < 200) {
128         newCapacity = 200;
129     }
130     uint8_t *newBuffer = key_.reallocate(newCapacity, length);
131     if (newBuffer == NULL) {
132         SetNotOk();
133         return FALSE;
134     }
135     buffer_ = reinterpret_cast<char *>(newBuffer);
136     capacity_ = newCapacity;
137     return TRUE;
138 }
139 
RuleBasedCollator(const RuleBasedCollator & other)140 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
141         : Collator(other),
142           data(other.data),
143           settings(other.settings),
144           tailoring(other.tailoring),
145           validLocale(other.validLocale),
146           explicitlySetAttributes(other.explicitlySetAttributes),
147           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
148     settings->addRef();
149     tailoring->addRef();
150 }
151 
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & errorCode)152 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
153                                      const RuleBasedCollator *base, UErrorCode &errorCode)
154         : data(NULL),
155           settings(NULL),
156           tailoring(NULL),
157           validLocale(""),
158           explicitlySetAttributes(0),
159           actualLocaleIsSameAsValid(FALSE) {
160     if(U_FAILURE(errorCode)) { return; }
161     if(bin == NULL || length <= 0 || base == NULL) {
162         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
163         return;
164     }
165     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
166     if(U_FAILURE(errorCode)) { return; }
167     if(base->tailoring != root) {
168         errorCode = U_UNSUPPORTED_ERROR;
169         return;
170     }
171     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
172     if(t.isNull() || t->isBogus()) {
173         errorCode = U_MEMORY_ALLOCATION_ERROR;
174         return;
175     }
176     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
177     if(U_FAILURE(errorCode)) { return; }
178     t->actualLocale.setToBogus();
179     adoptTailoring(t.orphan());
180 }
181 
RuleBasedCollator(const CollationTailoring * t,const Locale & vl)182 RuleBasedCollator::RuleBasedCollator(const CollationTailoring *t, const Locale &vl)
183         : data(t->data),
184           settings(t->settings),
185           tailoring(t),
186           validLocale(vl),
187           explicitlySetAttributes(0),
188           actualLocaleIsSameAsValid(FALSE) {
189     settings->addRef();
190     tailoring->addRef();
191 }
192 
~RuleBasedCollator()193 RuleBasedCollator::~RuleBasedCollator() {
194     SharedObject::clearPtr(settings);
195     SharedObject::clearPtr(tailoring);
196 }
197 
198 void
adoptTailoring(CollationTailoring * t)199 RuleBasedCollator::adoptTailoring(CollationTailoring *t) {
200     U_ASSERT(settings == NULL && data == NULL && tailoring == NULL);
201     data = t->data;
202     settings = t->settings;
203     settings->addRef();
204     t->addRef();
205     tailoring = t;
206     validLocale = t->actualLocale;
207     actualLocaleIsSameAsValid = FALSE;
208 }
209 
210 Collator *
clone() const211 RuleBasedCollator::clone() const {
212     return new RuleBasedCollator(*this);
213 }
214 
operator =(const RuleBasedCollator & other)215 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
216     if(this == &other) { return *this; }
217     SharedObject::copyPtr(other.settings, settings);
218     SharedObject::copyPtr(other.tailoring, tailoring);
219     data = tailoring->data;
220     validLocale = other.validLocale;
221     explicitlySetAttributes = other.explicitlySetAttributes;
222     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
223     return *this;
224 }
225 
226 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
227 
228 UBool
229 RuleBasedCollator::operator==(const Collator& other) const {
230     if(this == &other) { return TRUE; }
231     if(!Collator::operator==(other)) { return FALSE; }
232     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
233     if(*settings != *o.settings) { return FALSE; }
234     if(data == o.data) { return TRUE; }
235     UBool thisIsRoot = data->base == NULL;
236     UBool otherIsRoot = o.data->base == NULL;
237     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
238     if(thisIsRoot != otherIsRoot) { return FALSE; }
239     if((thisIsRoot || !tailoring->rules.isEmpty()) &&
240             (otherIsRoot || !o.tailoring->rules.isEmpty())) {
241         // Shortcut: If both collators have valid rule strings, then compare those.
242         if(tailoring->rules == o.tailoring->rules) { return TRUE; }
243     }
244     // Different rule strings can result in the same or equivalent tailoring.
245     // The rule strings are optional in ICU resource bundles, although included by default.
246     // cloneBinary() drops the rule string.
247     UErrorCode errorCode = U_ZERO_ERROR;
248     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
249     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
250     if(U_FAILURE(errorCode)) { return FALSE; }
251     if(*thisTailored != *otherTailored) { return FALSE; }
252     // For completeness, we should compare all of the mappings;
253     // or we should create a list of strings, sort it with one collator,
254     // and check if both collators compare adjacent strings the same
255     // (order & strength, down to quaternary); or similar.
256     // Testing equality of collators seems unusual.
257     return TRUE;
258 }
259 
260 int32_t
hashCode() const261 RuleBasedCollator::hashCode() const {
262     int32_t h = settings->hashCode();
263     if(data->base == NULL) { return h; }  // root collator
264     // Do not rely on the rule string, see comments in operator==().
265     UErrorCode errorCode = U_ZERO_ERROR;
266     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
267     if(U_FAILURE(errorCode)) { return 0; }
268     UnicodeSetIterator iter(*set);
269     while(iter.next() && !iter.isString()) {
270         h ^= data->getCE32(iter.getCodepoint());
271     }
272     return h;
273 }
274 
275 void
setLocales(const Locale & requested,const Locale & valid,const Locale & actual)276 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
277                               const Locale &actual) {
278     if(actual == tailoring->actualLocale) {
279         actualLocaleIsSameAsValid = FALSE;
280     } else {
281         U_ASSERT(actual == valid);
282         actualLocaleIsSameAsValid = TRUE;
283     }
284     // Do not modify tailoring.actualLocale:
285     // We cannot be sure that that would be thread-safe.
286     validLocale = valid;
287     (void)requested;  // Ignore, see also ticket #10477.
288 }
289 
290 Locale
getLocale(ULocDataLocaleType type,UErrorCode & errorCode) const291 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
292     if(U_FAILURE(errorCode)) {
293         return Locale::getRoot();
294     }
295     switch(type) {
296     case ULOC_ACTUAL_LOCALE:
297         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
298     case ULOC_VALID_LOCALE:
299     case ULOC_REQUESTED_LOCALE:  // TODO: Drop this, see ticket #10477.
300         return validLocale;
301     default:
302         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
303         return Locale::getRoot();
304     }
305 }
306 
307 const char *
internalGetLocaleID(ULocDataLocaleType type,UErrorCode & errorCode) const308 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
309     if(U_FAILURE(errorCode)) {
310         return NULL;
311     }
312     const Locale *result;
313     switch(type) {
314     case ULOC_ACTUAL_LOCALE:
315         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
316         break;
317     case ULOC_VALID_LOCALE:
318     case ULOC_REQUESTED_LOCALE:  // TODO: Drop this, see ticket #10477.
319         result = &validLocale;
320         break;
321     default:
322         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
323         return NULL;
324     }
325     if(result->isBogus()) { return NULL; }
326     const char *id = result->getName();
327     return id[0] == 0 ? "root" : id;
328 }
329 
330 const UnicodeString&
getRules() const331 RuleBasedCollator::getRules() const {
332     return tailoring->rules;
333 }
334 
335 void
getRules(UColRuleOption delta,UnicodeString & buffer) const336 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
337     if(delta == UCOL_TAILORING_ONLY) {
338         buffer = tailoring->rules;
339         return;
340     }
341     // UCOL_FULL_RULES
342     buffer.remove();
343     CollationLoader::appendRootRules(buffer);
344     buffer.append(tailoring->rules).getTerminatedBuffer();
345 }
346 
347 void
getVersion(UVersionInfo version) const348 RuleBasedCollator::getVersion(UVersionInfo version) const {
349     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
350     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
351 }
352 
353 UnicodeSet *
getTailoredSet(UErrorCode & errorCode) const354 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
355     if(U_FAILURE(errorCode)) { return NULL; }
356     UnicodeSet *tailored = new UnicodeSet();
357     if(tailored == NULL) {
358         errorCode = U_MEMORY_ALLOCATION_ERROR;
359         return NULL;
360     }
361     if(data->base != NULL) {
362         TailoredSet(tailored).forData(data, errorCode);
363         if(U_FAILURE(errorCode)) {
364             delete tailored;
365             return NULL;
366         }
367     }
368     return tailored;
369 }
370 
371 void
internalGetContractionsAndExpansions(UnicodeSet * contractions,UnicodeSet * expansions,UBool addPrefixes,UErrorCode & errorCode) const372 RuleBasedCollator::internalGetContractionsAndExpansions(
373         UnicodeSet *contractions, UnicodeSet *expansions,
374         UBool addPrefixes, UErrorCode &errorCode) const {
375     if(U_FAILURE(errorCode)) { return; }
376     if(contractions != NULL) {
377         contractions->clear();
378     }
379     if(expansions != NULL) {
380         expansions->clear();
381     }
382     ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
383 }
384 
385 void
internalAddContractions(UChar32 c,UnicodeSet & set,UErrorCode & errorCode) const386 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
387     if(U_FAILURE(errorCode)) { return; }
388     ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
389 }
390 
391 const CollationSettings &
getDefaultSettings() const392 RuleBasedCollator::getDefaultSettings() const {
393     return *tailoring->settings;
394 }
395 
396 UColAttributeValue
getAttribute(UColAttribute attr,UErrorCode & errorCode) const397 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
398     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
399     int32_t option;
400     switch(attr) {
401     case UCOL_FRENCH_COLLATION:
402         option = CollationSettings::BACKWARD_SECONDARY;
403         break;
404     case UCOL_ALTERNATE_HANDLING:
405         return settings->getAlternateHandling();
406     case UCOL_CASE_FIRST:
407         return settings->getCaseFirst();
408     case UCOL_CASE_LEVEL:
409         option = CollationSettings::CASE_LEVEL;
410         break;
411     case UCOL_NORMALIZATION_MODE:
412         option = CollationSettings::CHECK_FCD;
413         break;
414     case UCOL_STRENGTH:
415         return (UColAttributeValue)settings->getStrength();
416     case UCOL_HIRAGANA_QUATERNARY_MODE:
417         // Deprecated attribute, unsettable.
418         return UCOL_OFF;
419     case UCOL_NUMERIC_COLLATION:
420         option = CollationSettings::NUMERIC;
421         break;
422     default:
423         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
424         return UCOL_DEFAULT;
425     }
426     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
427 }
428 
429 void
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & errorCode)430 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
431                                 UErrorCode &errorCode) {
432     UColAttributeValue oldValue = getAttribute(attr, errorCode);
433     if(U_FAILURE(errorCode)) { return; }
434     if(value == oldValue) {
435         setAttributeExplicitly(attr);
436         return;
437     }
438     const CollationSettings &defaultSettings = getDefaultSettings();
439     if(settings == &defaultSettings) {
440         if(value == UCOL_DEFAULT) {
441             setAttributeDefault(attr);
442             return;
443         }
444     }
445     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
446     if(ownedSettings == NULL) {
447         errorCode = U_MEMORY_ALLOCATION_ERROR;
448         return;
449     }
450 
451     switch(attr) {
452     case UCOL_FRENCH_COLLATION:
453         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
454                                defaultSettings.options, errorCode);
455         break;
456     case UCOL_ALTERNATE_HANDLING:
457         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
458         break;
459     case UCOL_CASE_FIRST:
460         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
461         break;
462     case UCOL_CASE_LEVEL:
463         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
464                                defaultSettings.options, errorCode);
465         break;
466     case UCOL_NORMALIZATION_MODE:
467         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
468                                defaultSettings.options, errorCode);
469         break;
470     case UCOL_STRENGTH:
471         ownedSettings->setStrength(value, defaultSettings.options, errorCode);
472         break;
473     case UCOL_HIRAGANA_QUATERNARY_MODE:
474         // Deprecated attribute. Check for valid values but do not change anything.
475         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
476             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
477         }
478         break;
479     case UCOL_NUMERIC_COLLATION:
480         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
481         break;
482     default:
483         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
484         break;
485     }
486     if(U_FAILURE(errorCode)) { return; }
487     setFastLatinOptions(*ownedSettings);
488     if(value == UCOL_DEFAULT) {
489         setAttributeDefault(attr);
490     } else {
491         setAttributeExplicitly(attr);
492     }
493 }
494 
495 Collator &
setMaxVariable(UColReorderCode group,UErrorCode & errorCode)496 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
497     if(U_FAILURE(errorCode)) { return *this; }
498     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
499     int32_t value;
500     if(group == UCOL_REORDER_CODE_DEFAULT) {
501         value = UCOL_DEFAULT;
502     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
503         value = group - UCOL_REORDER_CODE_FIRST;
504     } else {
505         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
506         return *this;
507     }
508     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
509     if(value == oldValue) {
510         setAttributeExplicitly(ATTR_VARIABLE_TOP);
511         return *this;
512     }
513     const CollationSettings &defaultSettings = getDefaultSettings();
514     if(settings == &defaultSettings) {
515         if(value == UCOL_DEFAULT) {
516             setAttributeDefault(ATTR_VARIABLE_TOP);
517             return *this;
518         }
519     }
520     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
521     if(ownedSettings == NULL) {
522         errorCode = U_MEMORY_ALLOCATION_ERROR;
523         return *this;
524     }
525 
526     if(group == UCOL_REORDER_CODE_DEFAULT) {
527         group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
528     }
529     uint32_t varTop = data->getLastPrimaryForGroup(group);
530     U_ASSERT(varTop != 0);
531     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
532     if(U_FAILURE(errorCode)) { return *this; }
533     ownedSettings->variableTop = varTop;
534     setFastLatinOptions(*ownedSettings);
535     if(value == UCOL_DEFAULT) {
536         setAttributeDefault(ATTR_VARIABLE_TOP);
537     } else {
538         setAttributeExplicitly(ATTR_VARIABLE_TOP);
539     }
540     return *this;
541 }
542 
543 UColReorderCode
getMaxVariable() const544 RuleBasedCollator::getMaxVariable() const {
545     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
546 }
547 
548 uint32_t
getVariableTop(UErrorCode &) const549 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
550     return settings->variableTop;
551 }
552 
553 uint32_t
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & errorCode)554 RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
555     if(U_FAILURE(errorCode)) { return 0; }
556     if(varTop == NULL && len !=0) {
557         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
558         return 0;
559     }
560     if(len < 0) { len = u_strlen(varTop); }
561     if(len == 0) {
562         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
563         return 0;
564     }
565     UBool numeric = settings->isNumeric();
566     int64_t ce1, ce2;
567     if(settings->dontCheckFCD()) {
568         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
569         ce1 = ci.nextCE(errorCode);
570         ce2 = ci.nextCE(errorCode);
571     } else {
572         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
573         ce1 = ci.nextCE(errorCode);
574         ce2 = ci.nextCE(errorCode);
575     }
576     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
577         errorCode = U_CE_NOT_FOUND_ERROR;
578         return 0;
579     }
580     setVariableTop((uint32_t)(ce1 >> 32), errorCode);
581     return settings->variableTop;
582 }
583 
584 uint32_t
setVariableTop(const UnicodeString & varTop,UErrorCode & errorCode)585 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
586     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
587 }
588 
589 void
setVariableTop(uint32_t varTop,UErrorCode & errorCode)590 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
591     if(U_FAILURE(errorCode)) { return; }
592     if(varTop != settings->variableTop) {
593         // Pin the variable top to the end of the reordering group which contains it.
594         // Only a few special groups are supported.
595         int32_t group = data->getGroupForPrimary(varTop);
596         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
597             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
598             return;
599         }
600         uint32_t v = data->getLastPrimaryForGroup(group);
601         U_ASSERT(v != 0 && v >= varTop);
602         varTop = v;
603         if(varTop != settings->variableTop) {
604             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
605             if(ownedSettings == NULL) {
606                 errorCode = U_MEMORY_ALLOCATION_ERROR;
607                 return;
608             }
609             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
610                                           getDefaultSettings().options, errorCode);
611             if(U_FAILURE(errorCode)) { return; }
612             ownedSettings->variableTop = varTop;
613             setFastLatinOptions(*ownedSettings);
614         }
615     }
616     if(varTop == getDefaultSettings().variableTop) {
617         setAttributeDefault(ATTR_VARIABLE_TOP);
618     } else {
619         setAttributeExplicitly(ATTR_VARIABLE_TOP);
620     }
621 }
622 
623 int32_t
getReorderCodes(int32_t * dest,int32_t capacity,UErrorCode & errorCode) const624 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
625                                    UErrorCode &errorCode) const {
626     if(U_FAILURE(errorCode)) { return 0; }
627     if(capacity < 0 || (dest == NULL && capacity > 0)) {
628         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
629         return 0;
630     }
631     int32_t length = settings->reorderCodesLength;
632     if(length == 0) { return 0; }
633     if(length > capacity) {
634         errorCode = U_BUFFER_OVERFLOW_ERROR;
635         return length;
636     }
637     uprv_memcpy(dest, settings->reorderCodes, length * 4);
638     return length;
639 }
640 
641 void
setReorderCodes(const int32_t * reorderCodes,int32_t length,UErrorCode & errorCode)642 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
643                                    UErrorCode &errorCode) {
644     if(U_FAILURE(errorCode)) { return; }
645     if(length < 0 || (reorderCodes == NULL && length > 0)) {
646         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
647         return;
648     }
649     if(length == settings->reorderCodesLength &&
650             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
651         return;
652     }
653     const CollationSettings &defaultSettings = getDefaultSettings();
654     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
655         if(settings != &defaultSettings) {
656             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
657             if(ownedSettings == NULL) {
658                 errorCode = U_MEMORY_ALLOCATION_ERROR;
659                 return;
660             }
661             ownedSettings->aliasReordering(defaultSettings.reorderCodes,
662                                            defaultSettings.reorderCodesLength,
663                                            defaultSettings.reorderTable);
664             setFastLatinOptions(*ownedSettings);
665         }
666         return;
667     }
668     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
669     if(ownedSettings == NULL) {
670         errorCode = U_MEMORY_ALLOCATION_ERROR;
671         return;
672     }
673     if(length == 0) {
674         ownedSettings->resetReordering();
675     } else {
676         uint8_t reorderTable[256];
677         data->makeReorderTable(reorderCodes, length, reorderTable, errorCode);
678         if(U_FAILURE(errorCode)) { return; }
679         if(!ownedSettings->setReordering(reorderCodes, length, reorderTable)) {
680             errorCode = U_MEMORY_ALLOCATION_ERROR;
681             return;
682         }
683     }
684     setFastLatinOptions(*ownedSettings);
685 }
686 
687 void
setFastLatinOptions(CollationSettings & ownedSettings) const688 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
689     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
690             data, ownedSettings,
691             ownedSettings.fastLatinPrimaries, LENGTHOF(ownedSettings.fastLatinPrimaries));
692 }
693 
694 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,UErrorCode & errorCode) const695 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
696                            UErrorCode &errorCode) const {
697     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
698     return doCompare(left.getBuffer(), left.length(),
699                      right.getBuffer(), right.length(), errorCode);
700 }
701 
702 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,int32_t length,UErrorCode & errorCode) const703 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
704                            int32_t length, UErrorCode &errorCode) const {
705     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
706     if(length < 0) {
707         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
708         return UCOL_EQUAL;
709     }
710     int32_t leftLength = left.length();
711     int32_t rightLength = right.length();
712     if(leftLength > length) { leftLength = length; }
713     if(rightLength > length) { rightLength = length; }
714     return doCompare(left.getBuffer(), leftLength,
715                      right.getBuffer(), rightLength, errorCode);
716 }
717 
718 UCollationResult
compare(const UChar * left,int32_t leftLength,const UChar * right,int32_t rightLength,UErrorCode & errorCode) const719 RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
720                            const UChar *right, int32_t rightLength,
721                            UErrorCode &errorCode) const {
722     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
723     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
724         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
725         return UCOL_EQUAL;
726     }
727     // Make sure both or neither strings have a known length.
728     // We do not optimize for mixed length/termination.
729     if(leftLength >= 0) {
730         if(rightLength < 0) { rightLength = u_strlen(right); }
731     } else {
732         if(rightLength >= 0) { leftLength = u_strlen(left); }
733     }
734     return doCompare(left, leftLength, right, rightLength, errorCode);
735 }
736 
737 UCollationResult
compareUTF8(const StringPiece & left,const StringPiece & right,UErrorCode & errorCode) const738 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
739                                UErrorCode &errorCode) const {
740     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
741     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
742     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
743     if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
744         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
745         return UCOL_EQUAL;
746     }
747     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
748 }
749 
750 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const751 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
752                                        const char *right, int32_t rightLength,
753                                        UErrorCode &errorCode) const {
754     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
755     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
756         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
757         return UCOL_EQUAL;
758     }
759     // Make sure both or neither strings have a known length.
760     // We do not optimize for mixed length/termination.
761     if(leftLength >= 0) {
762         if(rightLength < 0) { rightLength = uprv_strlen(right); }
763     } else {
764         if(rightLength >= 0) { leftLength = uprv_strlen(left); }
765     }
766     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
767                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
768 }
769 
770 namespace {
771 
772 /**
773  * Abstract iterator for identical-level string comparisons.
774  * Returns FCD code points and handles temporary switching to NFD.
775  */
776 class NFDIterator {
777 public:
NFDIterator()778     NFDIterator() : index(-1), length(0) {}
~NFDIterator()779     virtual ~NFDIterator() {}
780     /**
781      * Returns the next code point from the internal normalization buffer,
782      * or else the next text code point.
783      * Returns -1 at the end of the text.
784      */
nextCodePoint()785     UChar32 nextCodePoint() {
786         if(index >= 0) {
787             if(index == length) {
788                 index = -1;
789             } else {
790                 UChar32 c;
791                 U16_NEXT_UNSAFE(decomp, index, c);
792                 return c;
793             }
794         }
795         return nextRawCodePoint();
796     }
797     /**
798      * @param nfcImpl
799      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
800      * @return the first code point in c's decomposition,
801      *         or c itself if it was decomposed already or if it does not decompose
802      */
nextDecomposedCodePoint(const Normalizer2Impl & nfcImpl,UChar32 c)803     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
804         if(index >= 0) { return c; }
805         decomp = nfcImpl.getDecomposition(c, buffer, length);
806         if(decomp == NULL) { return c; }
807         index = 0;
808         U16_NEXT_UNSAFE(decomp, index, c);
809         return c;
810     }
811 protected:
812     /**
813      * Returns the next text code point in FCD order.
814      * Returns -1 at the end of the text.
815      */
816     virtual UChar32 nextRawCodePoint() = 0;
817 private:
818     const UChar *decomp;
819     UChar buffer[4];
820     int32_t index;
821     int32_t length;
822 };
823 
824 class UTF16NFDIterator : public NFDIterator {
825 public:
UTF16NFDIterator(const UChar * text,const UChar * textLimit)826     UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
827 protected:
nextRawCodePoint()828     virtual UChar32 nextRawCodePoint() {
829         if(s == limit) { return U_SENTINEL; }
830         UChar32 c = *s++;
831         if(limit == NULL && c == 0) {
832             s = NULL;
833             return U_SENTINEL;
834         }
835         UChar trail;
836         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
837             ++s;
838             c = U16_GET_SUPPLEMENTARY(c, trail);
839         }
840         return c;
841     }
842 
843     const UChar *s;
844     const UChar *limit;
845 };
846 
847 class FCDUTF16NFDIterator : public UTF16NFDIterator {
848 public:
FCDUTF16NFDIterator(const Normalizer2Impl & nfcImpl,const UChar * text,const UChar * textLimit)849     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
850             : UTF16NFDIterator(NULL, NULL) {
851         UErrorCode errorCode = U_ZERO_ERROR;
852         const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
853         if(U_FAILURE(errorCode)) { return; }
854         if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
855             s = text;
856             limit = spanLimit;
857         } else {
858             str.setTo(text, (int32_t)(spanLimit - text));
859             {
860                 ReorderingBuffer buffer(nfcImpl, str);
861                 if(buffer.init(str.length(), errorCode)) {
862                     nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode);
863                 }
864             }
865             if(U_SUCCESS(errorCode)) {
866                 s = str.getBuffer();
867                 limit = s + str.length();
868             }
869         }
870     }
871 private:
872     UnicodeString str;
873 };
874 
875 class UTF8NFDIterator : public NFDIterator {
876 public:
UTF8NFDIterator(const uint8_t * text,int32_t textLength)877     UTF8NFDIterator(const uint8_t *text, int32_t textLength)
878         : s(text), pos(0), length(textLength) {}
879 protected:
nextRawCodePoint()880     virtual UChar32 nextRawCodePoint() {
881         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
882         UChar32 c;
883         U8_NEXT_OR_FFFD(s, pos, length, c);
884         return c;
885     }
886 
887     const uint8_t *s;
888     int32_t pos;
889     int32_t length;
890 };
891 
892 class FCDUTF8NFDIterator : public NFDIterator {
893 public:
FCDUTF8NFDIterator(const CollationData * data,const uint8_t * text,int32_t textLength)894     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
895             : u8ci(data, FALSE, text, 0, textLength) {}
896 protected:
nextRawCodePoint()897     virtual UChar32 nextRawCodePoint() {
898         UErrorCode errorCode = U_ZERO_ERROR;
899         return u8ci.nextCodePoint(errorCode);
900     }
901 private:
902     FCDUTF8CollationIterator u8ci;
903 };
904 
905 class UIterNFDIterator : public NFDIterator {
906 public:
UIterNFDIterator(UCharIterator & it)907     UIterNFDIterator(UCharIterator &it) : iter(it) {}
908 protected:
nextRawCodePoint()909     virtual UChar32 nextRawCodePoint() {
910         return uiter_next32(&iter);
911     }
912 private:
913     UCharIterator &iter;
914 };
915 
916 class FCDUIterNFDIterator : public NFDIterator {
917 public:
FCDUIterNFDIterator(const CollationData * data,UCharIterator & it,int32_t startIndex)918     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
919             : uici(data, FALSE, it, startIndex) {}
920 protected:
nextRawCodePoint()921     virtual UChar32 nextRawCodePoint() {
922         UErrorCode errorCode = U_ZERO_ERROR;
923         return uici.nextCodePoint(errorCode);
924     }
925 private:
926     FCDUIterCollationIterator uici;
927 };
928 
compareNFDIter(const Normalizer2Impl & nfcImpl,NFDIterator & left,NFDIterator & right)929 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
930                                 NFDIterator &left, NFDIterator &right) {
931     for(;;) {
932         // Fetch the next FCD code point from each string.
933         UChar32 leftCp = left.nextCodePoint();
934         UChar32 rightCp = right.nextCodePoint();
935         if(leftCp == rightCp) {
936             if(leftCp < 0) { break; }
937             continue;
938         }
939         // If they are different, then decompose each and compare again.
940         if(leftCp < 0) {
941             leftCp = -2;  // end of string
942         } else if(leftCp == 0xfffe) {
943             leftCp = -1;  // U+FFFE: merge separator
944         } else {
945             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
946         }
947         if(rightCp < 0) {
948             rightCp = -2;  // end of string
949         } else if(rightCp == 0xfffe) {
950             rightCp = -1;  // U+FFFE: merge separator
951         } else {
952             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
953         }
954         if(leftCp < rightCp) { return UCOL_LESS; }
955         if(leftCp > rightCp) { return UCOL_GREATER; }
956     }
957     return UCOL_EQUAL;
958 }
959 
960 }  // namespace
961 
962 UCollationResult
doCompare(const UChar * left,int32_t leftLength,const UChar * right,int32_t rightLength,UErrorCode & errorCode) const963 RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
964                              const UChar *right, int32_t rightLength,
965                              UErrorCode &errorCode) const {
966     // U_FAILURE(errorCode) checked by caller.
967     if(left == right && leftLength == rightLength) {
968         return UCOL_EQUAL;
969     }
970 
971     // Identical-prefix test.
972     const UChar *leftLimit;
973     const UChar *rightLimit;
974     int32_t equalPrefixLength = 0;
975     if(leftLength < 0) {
976         leftLimit = NULL;
977         rightLimit = NULL;
978         UChar c;
979         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
980             if(c == 0) { return UCOL_EQUAL; }
981             ++equalPrefixLength;
982         }
983     } else {
984         leftLimit = left + leftLength;
985         rightLimit = right + rightLength;
986         for(;;) {
987             if(equalPrefixLength == leftLength) {
988                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
989                 break;
990             } else if(equalPrefixLength == rightLength ||
991                       left[equalPrefixLength] != right[equalPrefixLength]) {
992                 break;
993             }
994             ++equalPrefixLength;
995         }
996     }
997 
998     UBool numeric = settings->isNumeric();
999     if(equalPrefixLength > 0) {
1000         if((equalPrefixLength != leftLength &&
1001                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1002                 (equalPrefixLength != rightLength &&
1003                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1004             // Identical prefix: Back up to the start of a contraction or reordering sequence.
1005             while(--equalPrefixLength > 0 &&
1006                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1007         }
1008         // Notes:
1009         // - A longer string can compare equal to a prefix of it if only ignorables follow.
1010         // - With a backward level, a longer string can compare less-than a prefix of it.
1011 
1012         // Pass the actual start of each string into the CollationIterators,
1013         // plus the equalPrefixLength position,
1014         // so that prefix matches back into the equal prefix work.
1015     }
1016 
1017     int32_t result;
1018     int32_t fastLatinOptions = settings->fastLatinOptions;
1019     if(fastLatinOptions >= 0 &&
1020             (equalPrefixLength == leftLength ||
1021                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1022             (equalPrefixLength == rightLength ||
1023                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1024         if(leftLength >= 0) {
1025             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1026                                                       settings->fastLatinPrimaries,
1027                                                       fastLatinOptions,
1028                                                       left + equalPrefixLength,
1029                                                       leftLength - equalPrefixLength,
1030                                                       right + equalPrefixLength,
1031                                                       rightLength - equalPrefixLength);
1032         } else {
1033             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1034                                                       settings->fastLatinPrimaries,
1035                                                       fastLatinOptions,
1036                                                       left + equalPrefixLength, -1,
1037                                                       right + equalPrefixLength, -1);
1038         }
1039     } else {
1040         result = CollationFastLatin::BAIL_OUT_RESULT;
1041     }
1042 
1043     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1044         if(settings->dontCheckFCD()) {
1045             UTF16CollationIterator leftIter(data, numeric,
1046                                             left, left + equalPrefixLength, leftLimit);
1047             UTF16CollationIterator rightIter(data, numeric,
1048                                             right, right + equalPrefixLength, rightLimit);
1049             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1050         } else {
1051             FCDUTF16CollationIterator leftIter(data, numeric,
1052                                               left, left + equalPrefixLength, leftLimit);
1053             FCDUTF16CollationIterator rightIter(data, numeric,
1054                                                 right, right + equalPrefixLength, rightLimit);
1055             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1056         }
1057     }
1058     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1059         return (UCollationResult)result;
1060     }
1061 
1062     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1063     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1064     // and the benefit seems unlikely to be measurable.
1065 
1066     // Compare identical level.
1067     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1068     left += equalPrefixLength;
1069     right += equalPrefixLength;
1070     if(settings->dontCheckFCD()) {
1071         UTF16NFDIterator leftIter(left, leftLimit);
1072         UTF16NFDIterator rightIter(right, rightLimit);
1073         return compareNFDIter(nfcImpl, leftIter, rightIter);
1074     } else {
1075         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1076         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1077         return compareNFDIter(nfcImpl, leftIter, rightIter);
1078     }
1079 }
1080 
1081 UCollationResult
doCompare(const uint8_t * left,int32_t leftLength,const uint8_t * right,int32_t rightLength,UErrorCode & errorCode) const1082 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1083                              const uint8_t *right, int32_t rightLength,
1084                              UErrorCode &errorCode) const {
1085     // U_FAILURE(errorCode) checked by caller.
1086     if(left == right && leftLength == rightLength) {
1087         return UCOL_EQUAL;
1088     }
1089 
1090     // Identical-prefix test.
1091     int32_t equalPrefixLength = 0;
1092     if(leftLength < 0) {
1093         uint8_t c;
1094         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1095             if(c == 0) { return UCOL_EQUAL; }
1096             ++equalPrefixLength;
1097         }
1098     } else {
1099         for(;;) {
1100             if(equalPrefixLength == leftLength) {
1101                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1102                 break;
1103             } else if(equalPrefixLength == rightLength ||
1104                       left[equalPrefixLength] != right[equalPrefixLength]) {
1105                 break;
1106             }
1107             ++equalPrefixLength;
1108         }
1109     }
1110     // Back up to the start of a partially-equal code point.
1111     if(equalPrefixLength > 0 &&
1112             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1113             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1114         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1115     }
1116 
1117     UBool numeric = settings->isNumeric();
1118     if(equalPrefixLength > 0) {
1119         UBool unsafe = FALSE;
1120         if(equalPrefixLength != leftLength) {
1121             int32_t i = equalPrefixLength;
1122             UChar32 c;
1123             U8_NEXT_OR_FFFD(left, i, leftLength, c);
1124             unsafe = data->isUnsafeBackward(c, numeric);
1125         }
1126         if(!unsafe && equalPrefixLength != rightLength) {
1127             int32_t i = equalPrefixLength;
1128             UChar32 c;
1129             U8_NEXT_OR_FFFD(right, i, rightLength, c);
1130             unsafe = data->isUnsafeBackward(c, numeric);
1131         }
1132         if(unsafe) {
1133             // Identical prefix: Back up to the start of a contraction or reordering sequence.
1134             UChar32 c;
1135             do {
1136                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1137             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1138         }
1139         // See the notes in the UTF-16 version.
1140 
1141         // Pass the actual start of each string into the CollationIterators,
1142         // plus the equalPrefixLength position,
1143         // so that prefix matches back into the equal prefix work.
1144     }
1145 
1146     int32_t result;
1147     int32_t fastLatinOptions = settings->fastLatinOptions;
1148     if(fastLatinOptions >= 0 &&
1149             (equalPrefixLength == leftLength ||
1150                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1151             (equalPrefixLength == rightLength ||
1152                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1153         if(leftLength >= 0) {
1154             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1155                                                      settings->fastLatinPrimaries,
1156                                                      fastLatinOptions,
1157                                                      left + equalPrefixLength,
1158                                                      leftLength - equalPrefixLength,
1159                                                      right + equalPrefixLength,
1160                                                      rightLength - equalPrefixLength);
1161         } else {
1162             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1163                                                      settings->fastLatinPrimaries,
1164                                                      fastLatinOptions,
1165                                                      left + equalPrefixLength, -1,
1166                                                      right + equalPrefixLength, -1);
1167         }
1168     } else {
1169         result = CollationFastLatin::BAIL_OUT_RESULT;
1170     }
1171 
1172     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1173         if(settings->dontCheckFCD()) {
1174             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1175             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1176             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1177         } else {
1178             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1179             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1180             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1181         }
1182     }
1183     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1184         return (UCollationResult)result;
1185     }
1186 
1187     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1188     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1189     // and the benefit seems unlikely to be measurable.
1190 
1191     // Compare identical level.
1192     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1193     left += equalPrefixLength;
1194     right += equalPrefixLength;
1195     if(leftLength > 0) {
1196         leftLength -= equalPrefixLength;
1197         rightLength -= equalPrefixLength;
1198     }
1199     if(settings->dontCheckFCD()) {
1200         UTF8NFDIterator leftIter(left, leftLength);
1201         UTF8NFDIterator rightIter(right, rightLength);
1202         return compareNFDIter(nfcImpl, leftIter, rightIter);
1203     } else {
1204         FCDUTF8NFDIterator leftIter(data, left, leftLength);
1205         FCDUTF8NFDIterator rightIter(data, right, rightLength);
1206         return compareNFDIter(nfcImpl, leftIter, rightIter);
1207     }
1208 }
1209 
1210 UCollationResult
compare(UCharIterator & left,UCharIterator & right,UErrorCode & errorCode) const1211 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1212                            UErrorCode &errorCode) const {
1213     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1214     UBool numeric = settings->isNumeric();
1215 
1216     // Identical-prefix test.
1217     int32_t equalPrefixLength = 0;
1218     {
1219         UChar32 leftUnit;
1220         UChar32 rightUnit;
1221         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1222             if(leftUnit < 0) { return UCOL_EQUAL; }
1223             ++equalPrefixLength;
1224         }
1225 
1226         // Back out the code units that differed, for the real collation comparison.
1227         if(leftUnit >= 0) { left.previous(&left); }
1228         if(rightUnit >= 0) { right.previous(&right); }
1229 
1230         if(equalPrefixLength > 0) {
1231             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1232                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1233                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1234                 do {
1235                     --equalPrefixLength;
1236                     leftUnit = left.previous(&left);
1237                     right.previous(&right);
1238                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1239             }
1240             // See the notes in the UTF-16 version.
1241         }
1242     }
1243 
1244     UCollationResult result;
1245     if(settings->dontCheckFCD()) {
1246         UIterCollationIterator leftIter(data, numeric, left);
1247         UIterCollationIterator rightIter(data, numeric, right);
1248         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1249     } else {
1250         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1251         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1252         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1253     }
1254     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1255         return result;
1256     }
1257 
1258     // Compare identical level.
1259     left.move(&left, equalPrefixLength, UITER_ZERO);
1260     right.move(&right, equalPrefixLength, UITER_ZERO);
1261     const Normalizer2Impl &nfcImpl = data->nfcImpl;
1262     if(settings->dontCheckFCD()) {
1263         UIterNFDIterator leftIter(left);
1264         UIterNFDIterator rightIter(right);
1265         return compareNFDIter(nfcImpl, leftIter, rightIter);
1266     } else {
1267         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1268         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1269         return compareNFDIter(nfcImpl, leftIter, rightIter);
1270     }
1271 }
1272 
1273 CollationKey &
getCollationKey(const UnicodeString & s,CollationKey & key,UErrorCode & errorCode) const1274 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1275                                    UErrorCode &errorCode) const {
1276     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1277 }
1278 
1279 CollationKey &
getCollationKey(const UChar * s,int32_t length,CollationKey & key,UErrorCode & errorCode) const1280 RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
1281                                    UErrorCode &errorCode) const {
1282     if(U_FAILURE(errorCode)) {
1283         return key.setToBogus();
1284     }
1285     if(s == NULL && length != 0) {
1286         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1287         return key.setToBogus();
1288     }
1289     key.reset();  // resets the "bogus" state
1290     CollationKeyByteSink sink(key);
1291     writeSortKey(s, length, sink, errorCode);
1292     if(U_FAILURE(errorCode)) {
1293         key.setToBogus();
1294     } else if(key.isBogus()) {
1295         errorCode = U_MEMORY_ALLOCATION_ERROR;
1296     } else {
1297         key.setLength(sink.NumberOfBytesAppended());
1298     }
1299     return key;
1300 }
1301 
1302 int32_t
getSortKey(const UnicodeString & s,uint8_t * dest,int32_t capacity) const1303 RuleBasedCollator::getSortKey(const UnicodeString &s,
1304                               uint8_t *dest, int32_t capacity) const {
1305     return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1306 }
1307 
1308 int32_t
getSortKey(const UChar * s,int32_t length,uint8_t * dest,int32_t capacity) const1309 RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
1310                               uint8_t *dest, int32_t capacity) const {
1311     if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
1312         return 0;
1313     }
1314     uint8_t noDest[1] = { 0 };
1315     if(dest == NULL) {
1316         // Distinguish pure preflighting from an allocation error.
1317         dest = noDest;
1318         capacity = 0;
1319     }
1320     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1321     UErrorCode errorCode = U_ZERO_ERROR;
1322     writeSortKey(s, length, sink, errorCode);
1323     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1324 }
1325 
1326 void
writeSortKey(const UChar * s,int32_t length,SortKeyByteSink & sink,UErrorCode & errorCode) const1327 RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
1328                                 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1329     if(U_FAILURE(errorCode)) { return; }
1330     const UChar *limit = (length >= 0) ? s + length : NULL;
1331     UBool numeric = settings->isNumeric();
1332     CollationKeys::LevelCallback callback;
1333     if(settings->dontCheckFCD()) {
1334         UTF16CollationIterator iter(data, numeric, s, s, limit);
1335         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1336                                                   sink, Collation::PRIMARY_LEVEL,
1337                                                   callback, TRUE, errorCode);
1338     } else {
1339         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1340         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1341                                                   sink, Collation::PRIMARY_LEVEL,
1342                                                   callback, TRUE, errorCode);
1343     }
1344     if(settings->getStrength() == UCOL_IDENTICAL) {
1345         writeIdenticalLevel(s, limit, sink, errorCode);
1346     }
1347     static const char terminator = 0;  // TERMINATOR_BYTE
1348     sink.Append(&terminator, 1);
1349 }
1350 
1351 void
writeIdenticalLevel(const UChar * s,const UChar * limit,SortKeyByteSink & sink,UErrorCode & errorCode) const1352 RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
1353                                        SortKeyByteSink &sink, UErrorCode &errorCode) const {
1354     // NFD quick check
1355     const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
1356     if(U_FAILURE(errorCode)) { return; }
1357     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1358     UChar32 prev = 0;
1359     if(nfdQCYesLimit != s) {
1360         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
1361     }
1362     // Is there non-NFD text?
1363     int32_t destLengthEstimate;
1364     if(limit != NULL) {
1365         if(nfdQCYesLimit == limit) { return; }
1366         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
1367     } else {
1368         // s is NUL-terminated
1369         if(*nfdQCYesLimit == 0) { return; }
1370         destLengthEstimate = -1;
1371     }
1372     UnicodeString nfd;
1373     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1374     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1375 }
1376 
1377 namespace {
1378 
1379 /**
1380  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1381  * with an instance of this callback class.
1382  * When another level is about to be written, the callback
1383  * records the level and the number of bytes that will be written until
1384  * the sink (which is actually a FixedSortKeyByteSink) fills up.
1385  *
1386  * When internalNextSortKeyPart() is called again, it restarts with the last level
1387  * and ignores as many bytes as were written previously for that level.
1388  */
1389 class PartLevelCallback : public CollationKeys::LevelCallback {
1390 public:
PartLevelCallback(const SortKeyByteSink & s)1391     PartLevelCallback(const SortKeyByteSink &s)
1392             : sink(s), level(Collation::PRIMARY_LEVEL) {
1393         levelCapacity = sink.GetRemainingCapacity();
1394     }
~PartLevelCallback()1395     virtual ~PartLevelCallback() {}
needToWrite(Collation::Level l)1396     virtual UBool needToWrite(Collation::Level l) {
1397         if(!sink.Overflowed()) {
1398             // Remember a level that will be at least partially written.
1399             level = l;
1400             levelCapacity = sink.GetRemainingCapacity();
1401             return TRUE;
1402         } else {
1403             return FALSE;
1404         }
1405     }
getLevel() const1406     Collation::Level getLevel() const { return level; }
getLevelCapacity() const1407     int32_t getLevelCapacity() const { return levelCapacity; }
1408 
1409 private:
1410     const SortKeyByteSink &sink;
1411     Collation::Level level;
1412     int32_t levelCapacity;
1413 };
1414 
1415 }  // namespace
1416 
1417 int32_t
internalNextSortKeyPart(UCharIterator * iter,uint32_t state[2],uint8_t * dest,int32_t count,UErrorCode & errorCode) const1418 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1419                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1420     if(U_FAILURE(errorCode)) { return 0; }
1421     if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
1422         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1423         return 0;
1424     }
1425     if(count == 0) { return 0; }
1426 
1427     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1428     sink.IgnoreBytes((int32_t)state[1]);
1429     iter->move(iter, 0, UITER_START);
1430 
1431     Collation::Level level = (Collation::Level)state[0];
1432     if(level <= Collation::QUATERNARY_LEVEL) {
1433         UBool numeric = settings->isNumeric();
1434         PartLevelCallback callback(sink);
1435         if(settings->dontCheckFCD()) {
1436             UIterCollationIterator ci(data, numeric, *iter);
1437             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1438                                                       sink, level, callback, FALSE, errorCode);
1439         } else {
1440             FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1441             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1442                                                       sink, level, callback, FALSE, errorCode);
1443         }
1444         if(U_FAILURE(errorCode)) { return 0; }
1445         if(sink.NumberOfBytesAppended() > count) {
1446             state[0] = (uint32_t)callback.getLevel();
1447             state[1] = (uint32_t)callback.getLevelCapacity();
1448             return count;
1449         }
1450         // All of the normal levels are done.
1451         if(settings->getStrength() == UCOL_IDENTICAL) {
1452             level = Collation::IDENTICAL_LEVEL;
1453             iter->move(iter, 0, UITER_START);
1454         }
1455         // else fall through to setting ZERO_LEVEL
1456     }
1457 
1458     if(level == Collation::IDENTICAL_LEVEL) {
1459         int32_t levelCapacity = sink.GetRemainingCapacity();
1460         UnicodeString s;
1461         for(;;) {
1462             UChar32 c = iter->next(iter);
1463             if(c < 0) { break; }
1464             s.append((UChar)c);
1465         }
1466         const UChar *sArray = s.getBuffer();
1467         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1468         if(U_FAILURE(errorCode)) { return 0; }
1469         if(sink.NumberOfBytesAppended() > count) {
1470             state[0] = (uint32_t)level;
1471             state[1] = (uint32_t)levelCapacity;
1472             return count;
1473         }
1474     }
1475 
1476     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1477     state[0] = (uint32_t)Collation::ZERO_LEVEL;
1478     state[1] = 0;
1479     int32_t length = sink.NumberOfBytesAppended();
1480     int32_t i = length;
1481     while(i < count) { dest[i++] = 0; }
1482     return length;
1483 }
1484 
1485 void
internalGetCEs(const UnicodeString & str,UVector64 & ces,UErrorCode & errorCode) const1486 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1487                                   UErrorCode &errorCode) const {
1488     if(U_FAILURE(errorCode)) { return; }
1489     const UChar *s = str.getBuffer();
1490     const UChar *limit = s + str.length();
1491     UBool numeric = settings->isNumeric();
1492     if(settings->dontCheckFCD()) {
1493         UTF16CollationIterator iter(data, numeric, s, s, limit);
1494         int64_t ce;
1495         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1496             ces.addElement(ce, errorCode);
1497         }
1498     } else {
1499         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1500         int64_t ce;
1501         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1502             ces.addElement(ce, errorCode);
1503         }
1504     }
1505 }
1506 
1507 namespace {
1508 
appendSubtag(CharString & s,char letter,const char * subtag,int32_t length,UErrorCode & errorCode)1509 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1510                   UErrorCode &errorCode) {
1511     if(U_FAILURE(errorCode) || length == 0) { return; }
1512     if(!s.isEmpty()) {
1513         s.append('_', errorCode);
1514     }
1515     s.append(letter, errorCode);
1516     for(int32_t i = 0; i < length; ++i) {
1517         s.append(uprv_toupper(subtag[i]), errorCode);
1518     }
1519 }
1520 
appendAttribute(CharString & s,char letter,UColAttributeValue value,UErrorCode & errorCode)1521 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1522                      UErrorCode &errorCode) {
1523     if(U_FAILURE(errorCode)) { return; }
1524     if(!s.isEmpty()) {
1525         s.append('_', errorCode);
1526     }
1527     static const char *valueChars = "1234...........IXO..SN..LU......";
1528     s.append(letter, errorCode);
1529     s.append(valueChars[value], errorCode);
1530 }
1531 
1532 }  // namespace
1533 
1534 int32_t
internalGetShortDefinitionString(const char * locale,char * buffer,int32_t capacity,UErrorCode & errorCode) const1535 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1536                                                     char *buffer, int32_t capacity,
1537                                                     UErrorCode &errorCode) const {
1538     if(U_FAILURE(errorCode)) { return 0; }
1539     if(buffer == NULL ? capacity != 0 : capacity < 0) {
1540         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1541         return 0;
1542     }
1543     if(locale == NULL) {
1544         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1545     }
1546 
1547     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1548     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1549                                                   "collation", locale,
1550                                                   NULL, &errorCode);
1551     if(U_FAILURE(errorCode)) { return 0; }
1552     if(length == 0) {
1553         uprv_strcpy(resultLocale, "root");
1554     } else {
1555         resultLocale[length] = 0;
1556     }
1557 
1558     // Append items in alphabetic order of their short definition letters.
1559     CharString result;
1560     char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1561 
1562     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1563         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1564     }
1565     // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1566     // See ICU tickets #10372 and #10386.
1567     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1568         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1569     }
1570     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1571         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1572     }
1573     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1574         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1575     }
1576     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1577         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1578     }
1579     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1580     length = uloc_getKeywordValue(resultLocale, "collation", subtag, LENGTHOF(subtag), &errorCode);
1581     appendSubtag(result, 'K', subtag, length, errorCode);
1582     length = uloc_getLanguage(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
1583     appendSubtag(result, 'L', subtag, length, errorCode);
1584     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1585         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1586     }
1587     length = uloc_getCountry(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
1588     appendSubtag(result, 'R', subtag, length, errorCode);
1589     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1590         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1591     }
1592     length = uloc_getVariant(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
1593     appendSubtag(result, 'V', subtag, length, errorCode);
1594     length = uloc_getScript(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
1595     appendSubtag(result, 'Z', subtag, length, errorCode);
1596 
1597     if(U_FAILURE(errorCode)) { return 0; }
1598     if(result.length() <= capacity) {
1599         uprv_memcpy(buffer, result.data(), result.length());
1600     }
1601     return u_terminateChars(buffer, capacity, result.length(), &errorCode);
1602 }
1603 
1604 UBool
isUnsafe(UChar32 c) const1605 RuleBasedCollator::isUnsafe(UChar32 c) const {
1606     return data->isUnsafeBackward(c, settings->isNumeric());
1607 }
1608 
1609 void
computeMaxExpansions(const CollationTailoring * t,UErrorCode & errorCode)1610 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1611     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1612 }
1613 
1614 UBool
initMaxExpansions(UErrorCode & errorCode) const1615 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1616     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1617     return U_SUCCESS(errorCode);
1618 }
1619 
1620 CollationElementIterator *
createCollationElementIterator(const UnicodeString & source) const1621 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1622     UErrorCode errorCode = U_ZERO_ERROR;
1623     if(!initMaxExpansions(errorCode)) { return NULL; }
1624     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1625     if(U_FAILURE(errorCode)) {
1626         delete cei;
1627         return NULL;
1628     }
1629     return cei;
1630 }
1631 
1632 CollationElementIterator *
createCollationElementIterator(const CharacterIterator & source) const1633 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1634     UErrorCode errorCode = U_ZERO_ERROR;
1635     if(!initMaxExpansions(errorCode)) { return NULL; }
1636     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1637     if(U_FAILURE(errorCode)) {
1638         delete cei;
1639         return NULL;
1640     }
1641     return cei;
1642 }
1643 
1644 int32_t
getMaxExpansion(int32_t order) const1645 RuleBasedCollator::getMaxExpansion(int32_t order) const {
1646     UErrorCode errorCode = U_ZERO_ERROR;
1647     (void)initMaxExpansions(errorCode);
1648     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1649 }
1650 
1651 U_NAMESPACE_END
1652 
1653 #endif  // !UCONFIG_NO_COLLATION
1654