1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * rulebasedcollator.cpp
7 *
8 * (replaced the former tblcoll.cpp)
9 *
10 * created on: 2012feb14 with new and old collation code
11 * created by: Markus W. Scherer
12 */
13
14 #include "unicode/utypes.h"
15
16 #if !UCONFIG_NO_COLLATION
17
18 #include "unicode/coll.h"
19 #include "unicode/coleitr.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/locid.h"
22 #include "unicode/sortkey.h"
23 #include "unicode/tblcoll.h"
24 #include "unicode/ucol.h"
25 #include "unicode/uiter.h"
26 #include "unicode/uloc.h"
27 #include "unicode/uniset.h"
28 #include "unicode/unistr.h"
29 #include "unicode/usetiter.h"
30 #include "unicode/utf8.h"
31 #include "unicode/uversion.h"
32 #include "bocsu.h"
33 #include "charstr.h"
34 #include "cmemory.h"
35 #include "collation.h"
36 #include "collationcompare.h"
37 #include "collationdata.h"
38 #include "collationdatareader.h"
39 #include "collationfastlatin.h"
40 #include "collationiterator.h"
41 #include "collationkeys.h"
42 #include "collationroot.h"
43 #include "collationsets.h"
44 #include "collationsettings.h"
45 #include "collationtailoring.h"
46 #include "cstring.h"
47 #include "uassert.h"
48 #include "ucol_imp.h"
49 #include "uhash.h"
50 #include "uitercollationiterator.h"
51 #include "ustr_imp.h"
52 #include "utf16collationiterator.h"
53 #include "utf8collationiterator.h"
54 #include "uvectr64.h"
55
56 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
57
58 U_NAMESPACE_BEGIN
59
60 namespace {
61
62 class FixedSortKeyByteSink : public SortKeyByteSink {
63 public:
FixedSortKeyByteSink(char * dest,int32_t destCapacity)64 FixedSortKeyByteSink(char *dest, int32_t destCapacity)
65 : SortKeyByteSink(dest, destCapacity) {}
66 virtual ~FixedSortKeyByteSink();
67
68 private:
69 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
70 virtual UBool Resize(int32_t appendCapacity, int32_t length);
71 };
72
~FixedSortKeyByteSink()73 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
74
75 void
AppendBeyondCapacity(const char * bytes,int32_t,int32_t length)76 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
77 // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
78 // Fill the buffer completely.
79 int32_t available = capacity_ - length;
80 if (available > 0) {
81 uprv_memcpy(buffer_ + length, bytes, available);
82 }
83 }
84
85 UBool
Resize(int32_t,int32_t)86 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
87 return FALSE;
88 }
89
90 } // namespace
91
92 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
93 class CollationKeyByteSink : public SortKeyByteSink {
94 public:
CollationKeyByteSink(CollationKey & key)95 CollationKeyByteSink(CollationKey &key)
96 : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
97 key_(key) {}
98 virtual ~CollationKeyByteSink();
99
100 private:
101 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
102 virtual UBool Resize(int32_t appendCapacity, int32_t length);
103
104 CollationKey &key_;
105 };
106
~CollationKeyByteSink()107 CollationKeyByteSink::~CollationKeyByteSink() {}
108
109 void
AppendBeyondCapacity(const char * bytes,int32_t n,int32_t length)110 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
111 // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
112 if (Resize(n, length)) {
113 uprv_memcpy(buffer_ + length, bytes, n);
114 }
115 }
116
117 UBool
Resize(int32_t appendCapacity,int32_t length)118 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
119 if (buffer_ == NULL) {
120 return FALSE; // allocation failed before already
121 }
122 int32_t newCapacity = 2 * capacity_;
123 int32_t altCapacity = length + 2 * appendCapacity;
124 if (newCapacity < altCapacity) {
125 newCapacity = altCapacity;
126 }
127 if (newCapacity < 200) {
128 newCapacity = 200;
129 }
130 uint8_t *newBuffer = key_.reallocate(newCapacity, length);
131 if (newBuffer == NULL) {
132 SetNotOk();
133 return FALSE;
134 }
135 buffer_ = reinterpret_cast<char *>(newBuffer);
136 capacity_ = newCapacity;
137 return TRUE;
138 }
139
RuleBasedCollator(const RuleBasedCollator & other)140 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
141 : Collator(other),
142 data(other.data),
143 settings(other.settings),
144 tailoring(other.tailoring),
145 validLocale(other.validLocale),
146 explicitlySetAttributes(other.explicitlySetAttributes),
147 actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
148 settings->addRef();
149 tailoring->addRef();
150 }
151
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & errorCode)152 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
153 const RuleBasedCollator *base, UErrorCode &errorCode)
154 : data(NULL),
155 settings(NULL),
156 tailoring(NULL),
157 validLocale(""),
158 explicitlySetAttributes(0),
159 actualLocaleIsSameAsValid(FALSE) {
160 if(U_FAILURE(errorCode)) { return; }
161 if(bin == NULL || length <= 0 || base == NULL) {
162 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
163 return;
164 }
165 const CollationTailoring *root = CollationRoot::getRoot(errorCode);
166 if(U_FAILURE(errorCode)) { return; }
167 if(base->tailoring != root) {
168 errorCode = U_UNSUPPORTED_ERROR;
169 return;
170 }
171 LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
172 if(t.isNull() || t->isBogus()) {
173 errorCode = U_MEMORY_ALLOCATION_ERROR;
174 return;
175 }
176 CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
177 if(U_FAILURE(errorCode)) { return; }
178 t->actualLocale.setToBogus();
179 adoptTailoring(t.orphan());
180 }
181
RuleBasedCollator(const CollationTailoring * t,const Locale & vl)182 RuleBasedCollator::RuleBasedCollator(const CollationTailoring *t, const Locale &vl)
183 : data(t->data),
184 settings(t->settings),
185 tailoring(t),
186 validLocale(vl),
187 explicitlySetAttributes(0),
188 actualLocaleIsSameAsValid(FALSE) {
189 settings->addRef();
190 tailoring->addRef();
191 }
192
~RuleBasedCollator()193 RuleBasedCollator::~RuleBasedCollator() {
194 SharedObject::clearPtr(settings);
195 SharedObject::clearPtr(tailoring);
196 }
197
198 void
adoptTailoring(CollationTailoring * t)199 RuleBasedCollator::adoptTailoring(CollationTailoring *t) {
200 U_ASSERT(settings == NULL && data == NULL && tailoring == NULL);
201 data = t->data;
202 settings = t->settings;
203 settings->addRef();
204 t->addRef();
205 tailoring = t;
206 validLocale = t->actualLocale;
207 actualLocaleIsSameAsValid = FALSE;
208 }
209
210 Collator *
clone() const211 RuleBasedCollator::clone() const {
212 return new RuleBasedCollator(*this);
213 }
214
operator =(const RuleBasedCollator & other)215 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
216 if(this == &other) { return *this; }
217 SharedObject::copyPtr(other.settings, settings);
218 SharedObject::copyPtr(other.tailoring, tailoring);
219 data = tailoring->data;
220 validLocale = other.validLocale;
221 explicitlySetAttributes = other.explicitlySetAttributes;
222 actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
223 return *this;
224 }
225
226 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
227
228 UBool
229 RuleBasedCollator::operator==(const Collator& other) const {
230 if(this == &other) { return TRUE; }
231 if(!Collator::operator==(other)) { return FALSE; }
232 const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
233 if(*settings != *o.settings) { return FALSE; }
234 if(data == o.data) { return TRUE; }
235 UBool thisIsRoot = data->base == NULL;
236 UBool otherIsRoot = o.data->base == NULL;
237 U_ASSERT(!thisIsRoot || !otherIsRoot); // otherwise their data pointers should be ==
238 if(thisIsRoot != otherIsRoot) { return FALSE; }
239 if((thisIsRoot || !tailoring->rules.isEmpty()) &&
240 (otherIsRoot || !o.tailoring->rules.isEmpty())) {
241 // Shortcut: If both collators have valid rule strings, then compare those.
242 if(tailoring->rules == o.tailoring->rules) { return TRUE; }
243 }
244 // Different rule strings can result in the same or equivalent tailoring.
245 // The rule strings are optional in ICU resource bundles, although included by default.
246 // cloneBinary() drops the rule string.
247 UErrorCode errorCode = U_ZERO_ERROR;
248 LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
249 LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
250 if(U_FAILURE(errorCode)) { return FALSE; }
251 if(*thisTailored != *otherTailored) { return FALSE; }
252 // For completeness, we should compare all of the mappings;
253 // or we should create a list of strings, sort it with one collator,
254 // and check if both collators compare adjacent strings the same
255 // (order & strength, down to quaternary); or similar.
256 // Testing equality of collators seems unusual.
257 return TRUE;
258 }
259
260 int32_t
hashCode() const261 RuleBasedCollator::hashCode() const {
262 int32_t h = settings->hashCode();
263 if(data->base == NULL) { return h; } // root collator
264 // Do not rely on the rule string, see comments in operator==().
265 UErrorCode errorCode = U_ZERO_ERROR;
266 LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
267 if(U_FAILURE(errorCode)) { return 0; }
268 UnicodeSetIterator iter(*set);
269 while(iter.next() && !iter.isString()) {
270 h ^= data->getCE32(iter.getCodepoint());
271 }
272 return h;
273 }
274
275 void
setLocales(const Locale & requested,const Locale & valid,const Locale & actual)276 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
277 const Locale &actual) {
278 if(actual == tailoring->actualLocale) {
279 actualLocaleIsSameAsValid = FALSE;
280 } else {
281 U_ASSERT(actual == valid);
282 actualLocaleIsSameAsValid = TRUE;
283 }
284 // Do not modify tailoring.actualLocale:
285 // We cannot be sure that that would be thread-safe.
286 validLocale = valid;
287 (void)requested; // Ignore, see also ticket #10477.
288 }
289
290 Locale
getLocale(ULocDataLocaleType type,UErrorCode & errorCode) const291 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
292 if(U_FAILURE(errorCode)) {
293 return Locale::getRoot();
294 }
295 switch(type) {
296 case ULOC_ACTUAL_LOCALE:
297 return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
298 case ULOC_VALID_LOCALE:
299 case ULOC_REQUESTED_LOCALE: // TODO: Drop this, see ticket #10477.
300 return validLocale;
301 default:
302 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
303 return Locale::getRoot();
304 }
305 }
306
307 const char *
internalGetLocaleID(ULocDataLocaleType type,UErrorCode & errorCode) const308 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
309 if(U_FAILURE(errorCode)) {
310 return NULL;
311 }
312 const Locale *result;
313 switch(type) {
314 case ULOC_ACTUAL_LOCALE:
315 result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
316 break;
317 case ULOC_VALID_LOCALE:
318 case ULOC_REQUESTED_LOCALE: // TODO: Drop this, see ticket #10477.
319 result = &validLocale;
320 break;
321 default:
322 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
323 return NULL;
324 }
325 if(result->isBogus()) { return NULL; }
326 const char *id = result->getName();
327 return id[0] == 0 ? "root" : id;
328 }
329
330 const UnicodeString&
getRules() const331 RuleBasedCollator::getRules() const {
332 return tailoring->rules;
333 }
334
335 void
getRules(UColRuleOption delta,UnicodeString & buffer) const336 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
337 if(delta == UCOL_TAILORING_ONLY) {
338 buffer = tailoring->rules;
339 return;
340 }
341 // UCOL_FULL_RULES
342 buffer.remove();
343 CollationLoader::appendRootRules(buffer);
344 buffer.append(tailoring->rules).getTerminatedBuffer();
345 }
346
347 void
getVersion(UVersionInfo version) const348 RuleBasedCollator::getVersion(UVersionInfo version) const {
349 uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
350 version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
351 }
352
353 UnicodeSet *
getTailoredSet(UErrorCode & errorCode) const354 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
355 if(U_FAILURE(errorCode)) { return NULL; }
356 UnicodeSet *tailored = new UnicodeSet();
357 if(tailored == NULL) {
358 errorCode = U_MEMORY_ALLOCATION_ERROR;
359 return NULL;
360 }
361 if(data->base != NULL) {
362 TailoredSet(tailored).forData(data, errorCode);
363 if(U_FAILURE(errorCode)) {
364 delete tailored;
365 return NULL;
366 }
367 }
368 return tailored;
369 }
370
371 void
internalGetContractionsAndExpansions(UnicodeSet * contractions,UnicodeSet * expansions,UBool addPrefixes,UErrorCode & errorCode) const372 RuleBasedCollator::internalGetContractionsAndExpansions(
373 UnicodeSet *contractions, UnicodeSet *expansions,
374 UBool addPrefixes, UErrorCode &errorCode) const {
375 if(U_FAILURE(errorCode)) { return; }
376 if(contractions != NULL) {
377 contractions->clear();
378 }
379 if(expansions != NULL) {
380 expansions->clear();
381 }
382 ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
383 }
384
385 void
internalAddContractions(UChar32 c,UnicodeSet & set,UErrorCode & errorCode) const386 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
387 if(U_FAILURE(errorCode)) { return; }
388 ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
389 }
390
391 const CollationSettings &
getDefaultSettings() const392 RuleBasedCollator::getDefaultSettings() const {
393 return *tailoring->settings;
394 }
395
396 UColAttributeValue
getAttribute(UColAttribute attr,UErrorCode & errorCode) const397 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
398 if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
399 int32_t option;
400 switch(attr) {
401 case UCOL_FRENCH_COLLATION:
402 option = CollationSettings::BACKWARD_SECONDARY;
403 break;
404 case UCOL_ALTERNATE_HANDLING:
405 return settings->getAlternateHandling();
406 case UCOL_CASE_FIRST:
407 return settings->getCaseFirst();
408 case UCOL_CASE_LEVEL:
409 option = CollationSettings::CASE_LEVEL;
410 break;
411 case UCOL_NORMALIZATION_MODE:
412 option = CollationSettings::CHECK_FCD;
413 break;
414 case UCOL_STRENGTH:
415 return (UColAttributeValue)settings->getStrength();
416 case UCOL_HIRAGANA_QUATERNARY_MODE:
417 // Deprecated attribute, unsettable.
418 return UCOL_OFF;
419 case UCOL_NUMERIC_COLLATION:
420 option = CollationSettings::NUMERIC;
421 break;
422 default:
423 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
424 return UCOL_DEFAULT;
425 }
426 return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
427 }
428
429 void
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & errorCode)430 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
431 UErrorCode &errorCode) {
432 UColAttributeValue oldValue = getAttribute(attr, errorCode);
433 if(U_FAILURE(errorCode)) { return; }
434 if(value == oldValue) {
435 setAttributeExplicitly(attr);
436 return;
437 }
438 const CollationSettings &defaultSettings = getDefaultSettings();
439 if(settings == &defaultSettings) {
440 if(value == UCOL_DEFAULT) {
441 setAttributeDefault(attr);
442 return;
443 }
444 }
445 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
446 if(ownedSettings == NULL) {
447 errorCode = U_MEMORY_ALLOCATION_ERROR;
448 return;
449 }
450
451 switch(attr) {
452 case UCOL_FRENCH_COLLATION:
453 ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
454 defaultSettings.options, errorCode);
455 break;
456 case UCOL_ALTERNATE_HANDLING:
457 ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
458 break;
459 case UCOL_CASE_FIRST:
460 ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
461 break;
462 case UCOL_CASE_LEVEL:
463 ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
464 defaultSettings.options, errorCode);
465 break;
466 case UCOL_NORMALIZATION_MODE:
467 ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
468 defaultSettings.options, errorCode);
469 break;
470 case UCOL_STRENGTH:
471 ownedSettings->setStrength(value, defaultSettings.options, errorCode);
472 break;
473 case UCOL_HIRAGANA_QUATERNARY_MODE:
474 // Deprecated attribute. Check for valid values but do not change anything.
475 if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
476 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
477 }
478 break;
479 case UCOL_NUMERIC_COLLATION:
480 ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
481 break;
482 default:
483 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
484 break;
485 }
486 if(U_FAILURE(errorCode)) { return; }
487 setFastLatinOptions(*ownedSettings);
488 if(value == UCOL_DEFAULT) {
489 setAttributeDefault(attr);
490 } else {
491 setAttributeExplicitly(attr);
492 }
493 }
494
495 Collator &
setMaxVariable(UColReorderCode group,UErrorCode & errorCode)496 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
497 if(U_FAILURE(errorCode)) { return *this; }
498 // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
499 int32_t value;
500 if(group == UCOL_REORDER_CODE_DEFAULT) {
501 value = UCOL_DEFAULT;
502 } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
503 value = group - UCOL_REORDER_CODE_FIRST;
504 } else {
505 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
506 return *this;
507 }
508 CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
509 if(value == oldValue) {
510 setAttributeExplicitly(ATTR_VARIABLE_TOP);
511 return *this;
512 }
513 const CollationSettings &defaultSettings = getDefaultSettings();
514 if(settings == &defaultSettings) {
515 if(value == UCOL_DEFAULT) {
516 setAttributeDefault(ATTR_VARIABLE_TOP);
517 return *this;
518 }
519 }
520 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
521 if(ownedSettings == NULL) {
522 errorCode = U_MEMORY_ALLOCATION_ERROR;
523 return *this;
524 }
525
526 if(group == UCOL_REORDER_CODE_DEFAULT) {
527 group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
528 }
529 uint32_t varTop = data->getLastPrimaryForGroup(group);
530 U_ASSERT(varTop != 0);
531 ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
532 if(U_FAILURE(errorCode)) { return *this; }
533 ownedSettings->variableTop = varTop;
534 setFastLatinOptions(*ownedSettings);
535 if(value == UCOL_DEFAULT) {
536 setAttributeDefault(ATTR_VARIABLE_TOP);
537 } else {
538 setAttributeExplicitly(ATTR_VARIABLE_TOP);
539 }
540 return *this;
541 }
542
543 UColReorderCode
getMaxVariable() const544 RuleBasedCollator::getMaxVariable() const {
545 return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
546 }
547
548 uint32_t
getVariableTop(UErrorCode &) const549 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
550 return settings->variableTop;
551 }
552
553 uint32_t
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & errorCode)554 RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
555 if(U_FAILURE(errorCode)) { return 0; }
556 if(varTop == NULL && len !=0) {
557 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
558 return 0;
559 }
560 if(len < 0) { len = u_strlen(varTop); }
561 if(len == 0) {
562 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
563 return 0;
564 }
565 UBool numeric = settings->isNumeric();
566 int64_t ce1, ce2;
567 if(settings->dontCheckFCD()) {
568 UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
569 ce1 = ci.nextCE(errorCode);
570 ce2 = ci.nextCE(errorCode);
571 } else {
572 FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
573 ce1 = ci.nextCE(errorCode);
574 ce2 = ci.nextCE(errorCode);
575 }
576 if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
577 errorCode = U_CE_NOT_FOUND_ERROR;
578 return 0;
579 }
580 setVariableTop((uint32_t)(ce1 >> 32), errorCode);
581 return settings->variableTop;
582 }
583
584 uint32_t
setVariableTop(const UnicodeString & varTop,UErrorCode & errorCode)585 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
586 return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
587 }
588
589 void
setVariableTop(uint32_t varTop,UErrorCode & errorCode)590 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
591 if(U_FAILURE(errorCode)) { return; }
592 if(varTop != settings->variableTop) {
593 // Pin the variable top to the end of the reordering group which contains it.
594 // Only a few special groups are supported.
595 int32_t group = data->getGroupForPrimary(varTop);
596 if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
597 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
598 return;
599 }
600 uint32_t v = data->getLastPrimaryForGroup(group);
601 U_ASSERT(v != 0 && v >= varTop);
602 varTop = v;
603 if(varTop != settings->variableTop) {
604 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
605 if(ownedSettings == NULL) {
606 errorCode = U_MEMORY_ALLOCATION_ERROR;
607 return;
608 }
609 ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
610 getDefaultSettings().options, errorCode);
611 if(U_FAILURE(errorCode)) { return; }
612 ownedSettings->variableTop = varTop;
613 setFastLatinOptions(*ownedSettings);
614 }
615 }
616 if(varTop == getDefaultSettings().variableTop) {
617 setAttributeDefault(ATTR_VARIABLE_TOP);
618 } else {
619 setAttributeExplicitly(ATTR_VARIABLE_TOP);
620 }
621 }
622
623 int32_t
getReorderCodes(int32_t * dest,int32_t capacity,UErrorCode & errorCode) const624 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
625 UErrorCode &errorCode) const {
626 if(U_FAILURE(errorCode)) { return 0; }
627 if(capacity < 0 || (dest == NULL && capacity > 0)) {
628 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
629 return 0;
630 }
631 int32_t length = settings->reorderCodesLength;
632 if(length == 0) { return 0; }
633 if(length > capacity) {
634 errorCode = U_BUFFER_OVERFLOW_ERROR;
635 return length;
636 }
637 uprv_memcpy(dest, settings->reorderCodes, length * 4);
638 return length;
639 }
640
641 void
setReorderCodes(const int32_t * reorderCodes,int32_t length,UErrorCode & errorCode)642 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
643 UErrorCode &errorCode) {
644 if(U_FAILURE(errorCode)) { return; }
645 if(length < 0 || (reorderCodes == NULL && length > 0)) {
646 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
647 return;
648 }
649 if(length == settings->reorderCodesLength &&
650 uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
651 return;
652 }
653 const CollationSettings &defaultSettings = getDefaultSettings();
654 if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
655 if(settings != &defaultSettings) {
656 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
657 if(ownedSettings == NULL) {
658 errorCode = U_MEMORY_ALLOCATION_ERROR;
659 return;
660 }
661 ownedSettings->aliasReordering(defaultSettings.reorderCodes,
662 defaultSettings.reorderCodesLength,
663 defaultSettings.reorderTable);
664 setFastLatinOptions(*ownedSettings);
665 }
666 return;
667 }
668 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
669 if(ownedSettings == NULL) {
670 errorCode = U_MEMORY_ALLOCATION_ERROR;
671 return;
672 }
673 if(length == 0) {
674 ownedSettings->resetReordering();
675 } else {
676 uint8_t reorderTable[256];
677 data->makeReorderTable(reorderCodes, length, reorderTable, errorCode);
678 if(U_FAILURE(errorCode)) { return; }
679 if(!ownedSettings->setReordering(reorderCodes, length, reorderTable)) {
680 errorCode = U_MEMORY_ALLOCATION_ERROR;
681 return;
682 }
683 }
684 setFastLatinOptions(*ownedSettings);
685 }
686
687 void
setFastLatinOptions(CollationSettings & ownedSettings) const688 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
689 ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
690 data, ownedSettings,
691 ownedSettings.fastLatinPrimaries, LENGTHOF(ownedSettings.fastLatinPrimaries));
692 }
693
694 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,UErrorCode & errorCode) const695 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
696 UErrorCode &errorCode) const {
697 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
698 return doCompare(left.getBuffer(), left.length(),
699 right.getBuffer(), right.length(), errorCode);
700 }
701
702 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,int32_t length,UErrorCode & errorCode) const703 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
704 int32_t length, UErrorCode &errorCode) const {
705 if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
706 if(length < 0) {
707 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
708 return UCOL_EQUAL;
709 }
710 int32_t leftLength = left.length();
711 int32_t rightLength = right.length();
712 if(leftLength > length) { leftLength = length; }
713 if(rightLength > length) { rightLength = length; }
714 return doCompare(left.getBuffer(), leftLength,
715 right.getBuffer(), rightLength, errorCode);
716 }
717
718 UCollationResult
compare(const UChar * left,int32_t leftLength,const UChar * right,int32_t rightLength,UErrorCode & errorCode) const719 RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
720 const UChar *right, int32_t rightLength,
721 UErrorCode &errorCode) const {
722 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
723 if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
724 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
725 return UCOL_EQUAL;
726 }
727 // Make sure both or neither strings have a known length.
728 // We do not optimize for mixed length/termination.
729 if(leftLength >= 0) {
730 if(rightLength < 0) { rightLength = u_strlen(right); }
731 } else {
732 if(rightLength >= 0) { leftLength = u_strlen(left); }
733 }
734 return doCompare(left, leftLength, right, rightLength, errorCode);
735 }
736
737 UCollationResult
compareUTF8(const StringPiece & left,const StringPiece & right,UErrorCode & errorCode) const738 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
739 UErrorCode &errorCode) const {
740 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
741 const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
742 const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
743 if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
744 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
745 return UCOL_EQUAL;
746 }
747 return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
748 }
749
750 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const751 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
752 const char *right, int32_t rightLength,
753 UErrorCode &errorCode) const {
754 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
755 if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
756 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
757 return UCOL_EQUAL;
758 }
759 // Make sure both or neither strings have a known length.
760 // We do not optimize for mixed length/termination.
761 if(leftLength >= 0) {
762 if(rightLength < 0) { rightLength = uprv_strlen(right); }
763 } else {
764 if(rightLength >= 0) { leftLength = uprv_strlen(left); }
765 }
766 return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
767 reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
768 }
769
770 namespace {
771
772 /**
773 * Abstract iterator for identical-level string comparisons.
774 * Returns FCD code points and handles temporary switching to NFD.
775 */
776 class NFDIterator {
777 public:
NFDIterator()778 NFDIterator() : index(-1), length(0) {}
~NFDIterator()779 virtual ~NFDIterator() {}
780 /**
781 * Returns the next code point from the internal normalization buffer,
782 * or else the next text code point.
783 * Returns -1 at the end of the text.
784 */
nextCodePoint()785 UChar32 nextCodePoint() {
786 if(index >= 0) {
787 if(index == length) {
788 index = -1;
789 } else {
790 UChar32 c;
791 U16_NEXT_UNSAFE(decomp, index, c);
792 return c;
793 }
794 }
795 return nextRawCodePoint();
796 }
797 /**
798 * @param nfcImpl
799 * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
800 * @return the first code point in c's decomposition,
801 * or c itself if it was decomposed already or if it does not decompose
802 */
nextDecomposedCodePoint(const Normalizer2Impl & nfcImpl,UChar32 c)803 UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
804 if(index >= 0) { return c; }
805 decomp = nfcImpl.getDecomposition(c, buffer, length);
806 if(decomp == NULL) { return c; }
807 index = 0;
808 U16_NEXT_UNSAFE(decomp, index, c);
809 return c;
810 }
811 protected:
812 /**
813 * Returns the next text code point in FCD order.
814 * Returns -1 at the end of the text.
815 */
816 virtual UChar32 nextRawCodePoint() = 0;
817 private:
818 const UChar *decomp;
819 UChar buffer[4];
820 int32_t index;
821 int32_t length;
822 };
823
824 class UTF16NFDIterator : public NFDIterator {
825 public:
UTF16NFDIterator(const UChar * text,const UChar * textLimit)826 UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
827 protected:
nextRawCodePoint()828 virtual UChar32 nextRawCodePoint() {
829 if(s == limit) { return U_SENTINEL; }
830 UChar32 c = *s++;
831 if(limit == NULL && c == 0) {
832 s = NULL;
833 return U_SENTINEL;
834 }
835 UChar trail;
836 if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
837 ++s;
838 c = U16_GET_SUPPLEMENTARY(c, trail);
839 }
840 return c;
841 }
842
843 const UChar *s;
844 const UChar *limit;
845 };
846
847 class FCDUTF16NFDIterator : public UTF16NFDIterator {
848 public:
FCDUTF16NFDIterator(const Normalizer2Impl & nfcImpl,const UChar * text,const UChar * textLimit)849 FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
850 : UTF16NFDIterator(NULL, NULL) {
851 UErrorCode errorCode = U_ZERO_ERROR;
852 const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
853 if(U_FAILURE(errorCode)) { return; }
854 if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
855 s = text;
856 limit = spanLimit;
857 } else {
858 str.setTo(text, (int32_t)(spanLimit - text));
859 {
860 ReorderingBuffer buffer(nfcImpl, str);
861 if(buffer.init(str.length(), errorCode)) {
862 nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode);
863 }
864 }
865 if(U_SUCCESS(errorCode)) {
866 s = str.getBuffer();
867 limit = s + str.length();
868 }
869 }
870 }
871 private:
872 UnicodeString str;
873 };
874
875 class UTF8NFDIterator : public NFDIterator {
876 public:
UTF8NFDIterator(const uint8_t * text,int32_t textLength)877 UTF8NFDIterator(const uint8_t *text, int32_t textLength)
878 : s(text), pos(0), length(textLength) {}
879 protected:
nextRawCodePoint()880 virtual UChar32 nextRawCodePoint() {
881 if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
882 UChar32 c;
883 U8_NEXT_OR_FFFD(s, pos, length, c);
884 return c;
885 }
886
887 const uint8_t *s;
888 int32_t pos;
889 int32_t length;
890 };
891
892 class FCDUTF8NFDIterator : public NFDIterator {
893 public:
FCDUTF8NFDIterator(const CollationData * data,const uint8_t * text,int32_t textLength)894 FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
895 : u8ci(data, FALSE, text, 0, textLength) {}
896 protected:
nextRawCodePoint()897 virtual UChar32 nextRawCodePoint() {
898 UErrorCode errorCode = U_ZERO_ERROR;
899 return u8ci.nextCodePoint(errorCode);
900 }
901 private:
902 FCDUTF8CollationIterator u8ci;
903 };
904
905 class UIterNFDIterator : public NFDIterator {
906 public:
UIterNFDIterator(UCharIterator & it)907 UIterNFDIterator(UCharIterator &it) : iter(it) {}
908 protected:
nextRawCodePoint()909 virtual UChar32 nextRawCodePoint() {
910 return uiter_next32(&iter);
911 }
912 private:
913 UCharIterator &iter;
914 };
915
916 class FCDUIterNFDIterator : public NFDIterator {
917 public:
FCDUIterNFDIterator(const CollationData * data,UCharIterator & it,int32_t startIndex)918 FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
919 : uici(data, FALSE, it, startIndex) {}
920 protected:
nextRawCodePoint()921 virtual UChar32 nextRawCodePoint() {
922 UErrorCode errorCode = U_ZERO_ERROR;
923 return uici.nextCodePoint(errorCode);
924 }
925 private:
926 FCDUIterCollationIterator uici;
927 };
928
compareNFDIter(const Normalizer2Impl & nfcImpl,NFDIterator & left,NFDIterator & right)929 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
930 NFDIterator &left, NFDIterator &right) {
931 for(;;) {
932 // Fetch the next FCD code point from each string.
933 UChar32 leftCp = left.nextCodePoint();
934 UChar32 rightCp = right.nextCodePoint();
935 if(leftCp == rightCp) {
936 if(leftCp < 0) { break; }
937 continue;
938 }
939 // If they are different, then decompose each and compare again.
940 if(leftCp < 0) {
941 leftCp = -2; // end of string
942 } else if(leftCp == 0xfffe) {
943 leftCp = -1; // U+FFFE: merge separator
944 } else {
945 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
946 }
947 if(rightCp < 0) {
948 rightCp = -2; // end of string
949 } else if(rightCp == 0xfffe) {
950 rightCp = -1; // U+FFFE: merge separator
951 } else {
952 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
953 }
954 if(leftCp < rightCp) { return UCOL_LESS; }
955 if(leftCp > rightCp) { return UCOL_GREATER; }
956 }
957 return UCOL_EQUAL;
958 }
959
960 } // namespace
961
962 UCollationResult
doCompare(const UChar * left,int32_t leftLength,const UChar * right,int32_t rightLength,UErrorCode & errorCode) const963 RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
964 const UChar *right, int32_t rightLength,
965 UErrorCode &errorCode) const {
966 // U_FAILURE(errorCode) checked by caller.
967 if(left == right && leftLength == rightLength) {
968 return UCOL_EQUAL;
969 }
970
971 // Identical-prefix test.
972 const UChar *leftLimit;
973 const UChar *rightLimit;
974 int32_t equalPrefixLength = 0;
975 if(leftLength < 0) {
976 leftLimit = NULL;
977 rightLimit = NULL;
978 UChar c;
979 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
980 if(c == 0) { return UCOL_EQUAL; }
981 ++equalPrefixLength;
982 }
983 } else {
984 leftLimit = left + leftLength;
985 rightLimit = right + rightLength;
986 for(;;) {
987 if(equalPrefixLength == leftLength) {
988 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
989 break;
990 } else if(equalPrefixLength == rightLength ||
991 left[equalPrefixLength] != right[equalPrefixLength]) {
992 break;
993 }
994 ++equalPrefixLength;
995 }
996 }
997
998 UBool numeric = settings->isNumeric();
999 if(equalPrefixLength > 0) {
1000 if((equalPrefixLength != leftLength &&
1001 data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1002 (equalPrefixLength != rightLength &&
1003 data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1004 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1005 while(--equalPrefixLength > 0 &&
1006 data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1007 }
1008 // Notes:
1009 // - A longer string can compare equal to a prefix of it if only ignorables follow.
1010 // - With a backward level, a longer string can compare less-than a prefix of it.
1011
1012 // Pass the actual start of each string into the CollationIterators,
1013 // plus the equalPrefixLength position,
1014 // so that prefix matches back into the equal prefix work.
1015 }
1016
1017 int32_t result;
1018 int32_t fastLatinOptions = settings->fastLatinOptions;
1019 if(fastLatinOptions >= 0 &&
1020 (equalPrefixLength == leftLength ||
1021 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1022 (equalPrefixLength == rightLength ||
1023 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1024 if(leftLength >= 0) {
1025 result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1026 settings->fastLatinPrimaries,
1027 fastLatinOptions,
1028 left + equalPrefixLength,
1029 leftLength - equalPrefixLength,
1030 right + equalPrefixLength,
1031 rightLength - equalPrefixLength);
1032 } else {
1033 result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1034 settings->fastLatinPrimaries,
1035 fastLatinOptions,
1036 left + equalPrefixLength, -1,
1037 right + equalPrefixLength, -1);
1038 }
1039 } else {
1040 result = CollationFastLatin::BAIL_OUT_RESULT;
1041 }
1042
1043 if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1044 if(settings->dontCheckFCD()) {
1045 UTF16CollationIterator leftIter(data, numeric,
1046 left, left + equalPrefixLength, leftLimit);
1047 UTF16CollationIterator rightIter(data, numeric,
1048 right, right + equalPrefixLength, rightLimit);
1049 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1050 } else {
1051 FCDUTF16CollationIterator leftIter(data, numeric,
1052 left, left + equalPrefixLength, leftLimit);
1053 FCDUTF16CollationIterator rightIter(data, numeric,
1054 right, right + equalPrefixLength, rightLimit);
1055 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1056 }
1057 }
1058 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1059 return (UCollationResult)result;
1060 }
1061
1062 // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1063 // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1064 // and the benefit seems unlikely to be measurable.
1065
1066 // Compare identical level.
1067 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1068 left += equalPrefixLength;
1069 right += equalPrefixLength;
1070 if(settings->dontCheckFCD()) {
1071 UTF16NFDIterator leftIter(left, leftLimit);
1072 UTF16NFDIterator rightIter(right, rightLimit);
1073 return compareNFDIter(nfcImpl, leftIter, rightIter);
1074 } else {
1075 FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1076 FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1077 return compareNFDIter(nfcImpl, leftIter, rightIter);
1078 }
1079 }
1080
1081 UCollationResult
doCompare(const uint8_t * left,int32_t leftLength,const uint8_t * right,int32_t rightLength,UErrorCode & errorCode) const1082 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1083 const uint8_t *right, int32_t rightLength,
1084 UErrorCode &errorCode) const {
1085 // U_FAILURE(errorCode) checked by caller.
1086 if(left == right && leftLength == rightLength) {
1087 return UCOL_EQUAL;
1088 }
1089
1090 // Identical-prefix test.
1091 int32_t equalPrefixLength = 0;
1092 if(leftLength < 0) {
1093 uint8_t c;
1094 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1095 if(c == 0) { return UCOL_EQUAL; }
1096 ++equalPrefixLength;
1097 }
1098 } else {
1099 for(;;) {
1100 if(equalPrefixLength == leftLength) {
1101 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1102 break;
1103 } else if(equalPrefixLength == rightLength ||
1104 left[equalPrefixLength] != right[equalPrefixLength]) {
1105 break;
1106 }
1107 ++equalPrefixLength;
1108 }
1109 }
1110 // Back up to the start of a partially-equal code point.
1111 if(equalPrefixLength > 0 &&
1112 ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1113 (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1114 while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1115 }
1116
1117 UBool numeric = settings->isNumeric();
1118 if(equalPrefixLength > 0) {
1119 UBool unsafe = FALSE;
1120 if(equalPrefixLength != leftLength) {
1121 int32_t i = equalPrefixLength;
1122 UChar32 c;
1123 U8_NEXT_OR_FFFD(left, i, leftLength, c);
1124 unsafe = data->isUnsafeBackward(c, numeric);
1125 }
1126 if(!unsafe && equalPrefixLength != rightLength) {
1127 int32_t i = equalPrefixLength;
1128 UChar32 c;
1129 U8_NEXT_OR_FFFD(right, i, rightLength, c);
1130 unsafe = data->isUnsafeBackward(c, numeric);
1131 }
1132 if(unsafe) {
1133 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1134 UChar32 c;
1135 do {
1136 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1137 } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1138 }
1139 // See the notes in the UTF-16 version.
1140
1141 // Pass the actual start of each string into the CollationIterators,
1142 // plus the equalPrefixLength position,
1143 // so that prefix matches back into the equal prefix work.
1144 }
1145
1146 int32_t result;
1147 int32_t fastLatinOptions = settings->fastLatinOptions;
1148 if(fastLatinOptions >= 0 &&
1149 (equalPrefixLength == leftLength ||
1150 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1151 (equalPrefixLength == rightLength ||
1152 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1153 if(leftLength >= 0) {
1154 result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1155 settings->fastLatinPrimaries,
1156 fastLatinOptions,
1157 left + equalPrefixLength,
1158 leftLength - equalPrefixLength,
1159 right + equalPrefixLength,
1160 rightLength - equalPrefixLength);
1161 } else {
1162 result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1163 settings->fastLatinPrimaries,
1164 fastLatinOptions,
1165 left + equalPrefixLength, -1,
1166 right + equalPrefixLength, -1);
1167 }
1168 } else {
1169 result = CollationFastLatin::BAIL_OUT_RESULT;
1170 }
1171
1172 if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1173 if(settings->dontCheckFCD()) {
1174 UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1175 UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1176 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1177 } else {
1178 FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1179 FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1180 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1181 }
1182 }
1183 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1184 return (UCollationResult)result;
1185 }
1186
1187 // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1188 // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1189 // and the benefit seems unlikely to be measurable.
1190
1191 // Compare identical level.
1192 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1193 left += equalPrefixLength;
1194 right += equalPrefixLength;
1195 if(leftLength > 0) {
1196 leftLength -= equalPrefixLength;
1197 rightLength -= equalPrefixLength;
1198 }
1199 if(settings->dontCheckFCD()) {
1200 UTF8NFDIterator leftIter(left, leftLength);
1201 UTF8NFDIterator rightIter(right, rightLength);
1202 return compareNFDIter(nfcImpl, leftIter, rightIter);
1203 } else {
1204 FCDUTF8NFDIterator leftIter(data, left, leftLength);
1205 FCDUTF8NFDIterator rightIter(data, right, rightLength);
1206 return compareNFDIter(nfcImpl, leftIter, rightIter);
1207 }
1208 }
1209
1210 UCollationResult
compare(UCharIterator & left,UCharIterator & right,UErrorCode & errorCode) const1211 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1212 UErrorCode &errorCode) const {
1213 if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1214 UBool numeric = settings->isNumeric();
1215
1216 // Identical-prefix test.
1217 int32_t equalPrefixLength = 0;
1218 {
1219 UChar32 leftUnit;
1220 UChar32 rightUnit;
1221 while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1222 if(leftUnit < 0) { return UCOL_EQUAL; }
1223 ++equalPrefixLength;
1224 }
1225
1226 // Back out the code units that differed, for the real collation comparison.
1227 if(leftUnit >= 0) { left.previous(&left); }
1228 if(rightUnit >= 0) { right.previous(&right); }
1229
1230 if(equalPrefixLength > 0) {
1231 if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1232 (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1233 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1234 do {
1235 --equalPrefixLength;
1236 leftUnit = left.previous(&left);
1237 right.previous(&right);
1238 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1239 }
1240 // See the notes in the UTF-16 version.
1241 }
1242 }
1243
1244 UCollationResult result;
1245 if(settings->dontCheckFCD()) {
1246 UIterCollationIterator leftIter(data, numeric, left);
1247 UIterCollationIterator rightIter(data, numeric, right);
1248 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1249 } else {
1250 FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1251 FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1252 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1253 }
1254 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1255 return result;
1256 }
1257
1258 // Compare identical level.
1259 left.move(&left, equalPrefixLength, UITER_ZERO);
1260 right.move(&right, equalPrefixLength, UITER_ZERO);
1261 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1262 if(settings->dontCheckFCD()) {
1263 UIterNFDIterator leftIter(left);
1264 UIterNFDIterator rightIter(right);
1265 return compareNFDIter(nfcImpl, leftIter, rightIter);
1266 } else {
1267 FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1268 FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1269 return compareNFDIter(nfcImpl, leftIter, rightIter);
1270 }
1271 }
1272
1273 CollationKey &
getCollationKey(const UnicodeString & s,CollationKey & key,UErrorCode & errorCode) const1274 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1275 UErrorCode &errorCode) const {
1276 return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1277 }
1278
1279 CollationKey &
getCollationKey(const UChar * s,int32_t length,CollationKey & key,UErrorCode & errorCode) const1280 RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
1281 UErrorCode &errorCode) const {
1282 if(U_FAILURE(errorCode)) {
1283 return key.setToBogus();
1284 }
1285 if(s == NULL && length != 0) {
1286 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1287 return key.setToBogus();
1288 }
1289 key.reset(); // resets the "bogus" state
1290 CollationKeyByteSink sink(key);
1291 writeSortKey(s, length, sink, errorCode);
1292 if(U_FAILURE(errorCode)) {
1293 key.setToBogus();
1294 } else if(key.isBogus()) {
1295 errorCode = U_MEMORY_ALLOCATION_ERROR;
1296 } else {
1297 key.setLength(sink.NumberOfBytesAppended());
1298 }
1299 return key;
1300 }
1301
1302 int32_t
getSortKey(const UnicodeString & s,uint8_t * dest,int32_t capacity) const1303 RuleBasedCollator::getSortKey(const UnicodeString &s,
1304 uint8_t *dest, int32_t capacity) const {
1305 return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1306 }
1307
1308 int32_t
getSortKey(const UChar * s,int32_t length,uint8_t * dest,int32_t capacity) const1309 RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
1310 uint8_t *dest, int32_t capacity) const {
1311 if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
1312 return 0;
1313 }
1314 uint8_t noDest[1] = { 0 };
1315 if(dest == NULL) {
1316 // Distinguish pure preflighting from an allocation error.
1317 dest = noDest;
1318 capacity = 0;
1319 }
1320 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1321 UErrorCode errorCode = U_ZERO_ERROR;
1322 writeSortKey(s, length, sink, errorCode);
1323 return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1324 }
1325
1326 void
writeSortKey(const UChar * s,int32_t length,SortKeyByteSink & sink,UErrorCode & errorCode) const1327 RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
1328 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1329 if(U_FAILURE(errorCode)) { return; }
1330 const UChar *limit = (length >= 0) ? s + length : NULL;
1331 UBool numeric = settings->isNumeric();
1332 CollationKeys::LevelCallback callback;
1333 if(settings->dontCheckFCD()) {
1334 UTF16CollationIterator iter(data, numeric, s, s, limit);
1335 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1336 sink, Collation::PRIMARY_LEVEL,
1337 callback, TRUE, errorCode);
1338 } else {
1339 FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1340 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1341 sink, Collation::PRIMARY_LEVEL,
1342 callback, TRUE, errorCode);
1343 }
1344 if(settings->getStrength() == UCOL_IDENTICAL) {
1345 writeIdenticalLevel(s, limit, sink, errorCode);
1346 }
1347 static const char terminator = 0; // TERMINATOR_BYTE
1348 sink.Append(&terminator, 1);
1349 }
1350
1351 void
writeIdenticalLevel(const UChar * s,const UChar * limit,SortKeyByteSink & sink,UErrorCode & errorCode) const1352 RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
1353 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1354 // NFD quick check
1355 const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
1356 if(U_FAILURE(errorCode)) { return; }
1357 sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1358 UChar32 prev = 0;
1359 if(nfdQCYesLimit != s) {
1360 prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
1361 }
1362 // Is there non-NFD text?
1363 int32_t destLengthEstimate;
1364 if(limit != NULL) {
1365 if(nfdQCYesLimit == limit) { return; }
1366 destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
1367 } else {
1368 // s is NUL-terminated
1369 if(*nfdQCYesLimit == 0) { return; }
1370 destLengthEstimate = -1;
1371 }
1372 UnicodeString nfd;
1373 data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1374 u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1375 }
1376
1377 namespace {
1378
1379 /**
1380 * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1381 * with an instance of this callback class.
1382 * When another level is about to be written, the callback
1383 * records the level and the number of bytes that will be written until
1384 * the sink (which is actually a FixedSortKeyByteSink) fills up.
1385 *
1386 * When internalNextSortKeyPart() is called again, it restarts with the last level
1387 * and ignores as many bytes as were written previously for that level.
1388 */
1389 class PartLevelCallback : public CollationKeys::LevelCallback {
1390 public:
PartLevelCallback(const SortKeyByteSink & s)1391 PartLevelCallback(const SortKeyByteSink &s)
1392 : sink(s), level(Collation::PRIMARY_LEVEL) {
1393 levelCapacity = sink.GetRemainingCapacity();
1394 }
~PartLevelCallback()1395 virtual ~PartLevelCallback() {}
needToWrite(Collation::Level l)1396 virtual UBool needToWrite(Collation::Level l) {
1397 if(!sink.Overflowed()) {
1398 // Remember a level that will be at least partially written.
1399 level = l;
1400 levelCapacity = sink.GetRemainingCapacity();
1401 return TRUE;
1402 } else {
1403 return FALSE;
1404 }
1405 }
getLevel() const1406 Collation::Level getLevel() const { return level; }
getLevelCapacity() const1407 int32_t getLevelCapacity() const { return levelCapacity; }
1408
1409 private:
1410 const SortKeyByteSink &sink;
1411 Collation::Level level;
1412 int32_t levelCapacity;
1413 };
1414
1415 } // namespace
1416
1417 int32_t
internalNextSortKeyPart(UCharIterator * iter,uint32_t state[2],uint8_t * dest,int32_t count,UErrorCode & errorCode) const1418 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1419 uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1420 if(U_FAILURE(errorCode)) { return 0; }
1421 if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
1422 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1423 return 0;
1424 }
1425 if(count == 0) { return 0; }
1426
1427 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1428 sink.IgnoreBytes((int32_t)state[1]);
1429 iter->move(iter, 0, UITER_START);
1430
1431 Collation::Level level = (Collation::Level)state[0];
1432 if(level <= Collation::QUATERNARY_LEVEL) {
1433 UBool numeric = settings->isNumeric();
1434 PartLevelCallback callback(sink);
1435 if(settings->dontCheckFCD()) {
1436 UIterCollationIterator ci(data, numeric, *iter);
1437 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1438 sink, level, callback, FALSE, errorCode);
1439 } else {
1440 FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1441 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1442 sink, level, callback, FALSE, errorCode);
1443 }
1444 if(U_FAILURE(errorCode)) { return 0; }
1445 if(sink.NumberOfBytesAppended() > count) {
1446 state[0] = (uint32_t)callback.getLevel();
1447 state[1] = (uint32_t)callback.getLevelCapacity();
1448 return count;
1449 }
1450 // All of the normal levels are done.
1451 if(settings->getStrength() == UCOL_IDENTICAL) {
1452 level = Collation::IDENTICAL_LEVEL;
1453 iter->move(iter, 0, UITER_START);
1454 }
1455 // else fall through to setting ZERO_LEVEL
1456 }
1457
1458 if(level == Collation::IDENTICAL_LEVEL) {
1459 int32_t levelCapacity = sink.GetRemainingCapacity();
1460 UnicodeString s;
1461 for(;;) {
1462 UChar32 c = iter->next(iter);
1463 if(c < 0) { break; }
1464 s.append((UChar)c);
1465 }
1466 const UChar *sArray = s.getBuffer();
1467 writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1468 if(U_FAILURE(errorCode)) { return 0; }
1469 if(sink.NumberOfBytesAppended() > count) {
1470 state[0] = (uint32_t)level;
1471 state[1] = (uint32_t)levelCapacity;
1472 return count;
1473 }
1474 }
1475
1476 // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1477 state[0] = (uint32_t)Collation::ZERO_LEVEL;
1478 state[1] = 0;
1479 int32_t length = sink.NumberOfBytesAppended();
1480 int32_t i = length;
1481 while(i < count) { dest[i++] = 0; }
1482 return length;
1483 }
1484
1485 void
internalGetCEs(const UnicodeString & str,UVector64 & ces,UErrorCode & errorCode) const1486 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1487 UErrorCode &errorCode) const {
1488 if(U_FAILURE(errorCode)) { return; }
1489 const UChar *s = str.getBuffer();
1490 const UChar *limit = s + str.length();
1491 UBool numeric = settings->isNumeric();
1492 if(settings->dontCheckFCD()) {
1493 UTF16CollationIterator iter(data, numeric, s, s, limit);
1494 int64_t ce;
1495 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1496 ces.addElement(ce, errorCode);
1497 }
1498 } else {
1499 FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1500 int64_t ce;
1501 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1502 ces.addElement(ce, errorCode);
1503 }
1504 }
1505 }
1506
1507 namespace {
1508
appendSubtag(CharString & s,char letter,const char * subtag,int32_t length,UErrorCode & errorCode)1509 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1510 UErrorCode &errorCode) {
1511 if(U_FAILURE(errorCode) || length == 0) { return; }
1512 if(!s.isEmpty()) {
1513 s.append('_', errorCode);
1514 }
1515 s.append(letter, errorCode);
1516 for(int32_t i = 0; i < length; ++i) {
1517 s.append(uprv_toupper(subtag[i]), errorCode);
1518 }
1519 }
1520
appendAttribute(CharString & s,char letter,UColAttributeValue value,UErrorCode & errorCode)1521 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1522 UErrorCode &errorCode) {
1523 if(U_FAILURE(errorCode)) { return; }
1524 if(!s.isEmpty()) {
1525 s.append('_', errorCode);
1526 }
1527 static const char *valueChars = "1234...........IXO..SN..LU......";
1528 s.append(letter, errorCode);
1529 s.append(valueChars[value], errorCode);
1530 }
1531
1532 } // namespace
1533
1534 int32_t
internalGetShortDefinitionString(const char * locale,char * buffer,int32_t capacity,UErrorCode & errorCode) const1535 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1536 char *buffer, int32_t capacity,
1537 UErrorCode &errorCode) const {
1538 if(U_FAILURE(errorCode)) { return 0; }
1539 if(buffer == NULL ? capacity != 0 : capacity < 0) {
1540 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1541 return 0;
1542 }
1543 if(locale == NULL) {
1544 locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1545 }
1546
1547 char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1548 int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1549 "collation", locale,
1550 NULL, &errorCode);
1551 if(U_FAILURE(errorCode)) { return 0; }
1552 if(length == 0) {
1553 uprv_strcpy(resultLocale, "root");
1554 } else {
1555 resultLocale[length] = 0;
1556 }
1557
1558 // Append items in alphabetic order of their short definition letters.
1559 CharString result;
1560 char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1561
1562 if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1563 appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1564 }
1565 // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1566 // See ICU tickets #10372 and #10386.
1567 if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1568 appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1569 }
1570 if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1571 appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1572 }
1573 if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1574 appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1575 }
1576 if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1577 appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1578 }
1579 // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1580 length = uloc_getKeywordValue(resultLocale, "collation", subtag, LENGTHOF(subtag), &errorCode);
1581 appendSubtag(result, 'K', subtag, length, errorCode);
1582 length = uloc_getLanguage(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
1583 appendSubtag(result, 'L', subtag, length, errorCode);
1584 if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1585 appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1586 }
1587 length = uloc_getCountry(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
1588 appendSubtag(result, 'R', subtag, length, errorCode);
1589 if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1590 appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1591 }
1592 length = uloc_getVariant(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
1593 appendSubtag(result, 'V', subtag, length, errorCode);
1594 length = uloc_getScript(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
1595 appendSubtag(result, 'Z', subtag, length, errorCode);
1596
1597 if(U_FAILURE(errorCode)) { return 0; }
1598 if(result.length() <= capacity) {
1599 uprv_memcpy(buffer, result.data(), result.length());
1600 }
1601 return u_terminateChars(buffer, capacity, result.length(), &errorCode);
1602 }
1603
1604 UBool
isUnsafe(UChar32 c) const1605 RuleBasedCollator::isUnsafe(UChar32 c) const {
1606 return data->isUnsafeBackward(c, settings->isNumeric());
1607 }
1608
1609 void
computeMaxExpansions(const CollationTailoring * t,UErrorCode & errorCode)1610 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1611 t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1612 }
1613
1614 UBool
initMaxExpansions(UErrorCode & errorCode) const1615 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1616 umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1617 return U_SUCCESS(errorCode);
1618 }
1619
1620 CollationElementIterator *
createCollationElementIterator(const UnicodeString & source) const1621 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1622 UErrorCode errorCode = U_ZERO_ERROR;
1623 if(!initMaxExpansions(errorCode)) { return NULL; }
1624 CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1625 if(U_FAILURE(errorCode)) {
1626 delete cei;
1627 return NULL;
1628 }
1629 return cei;
1630 }
1631
1632 CollationElementIterator *
createCollationElementIterator(const CharacterIterator & source) const1633 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1634 UErrorCode errorCode = U_ZERO_ERROR;
1635 if(!initMaxExpansions(errorCode)) { return NULL; }
1636 CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1637 if(U_FAILURE(errorCode)) {
1638 delete cei;
1639 return NULL;
1640 }
1641 return cei;
1642 }
1643
1644 int32_t
getMaxExpansion(int32_t order) const1645 RuleBasedCollator::getMaxExpansion(int32_t order) const {
1646 UErrorCode errorCode = U_ZERO_ERROR;
1647 (void)initMaxExpansions(errorCode);
1648 return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1649 }
1650
1651 U_NAMESPACE_END
1652
1653 #endif // !UCONFIG_NO_COLLATION
1654