1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2009-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_COLLATION
13
14 #include "unicode/alphaindex.h"
15 #include "unicode/coll.h"
16 #include "unicode/localpointer.h"
17 #include "unicode/normalizer2.h"
18 #include "unicode/tblcoll.h"
19 #include "unicode/uchar.h"
20 #include "unicode/ulocdata.h"
21 #include "unicode/uniset.h"
22 #include "unicode/uobject.h"
23 #include "unicode/usetiter.h"
24 #include "unicode/utf16.h"
25
26 #include "cmemory.h"
27 #include "cstring.h"
28 #include "uassert.h"
29 #include "uvector.h"
30 #include "uvectr64.h"
31
32 //#include <string>
33 //#include <iostream>
34
35 U_NAMESPACE_BEGIN
36
37 namespace {
38
39 /**
40 * Prefix string for Chinese index buckets.
41 * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
42 */
43 const UChar BASE[1] = { 0xFDD0 };
44 const int32_t BASE_LENGTH = 1;
45
46 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
47 const UnicodeString &one, const UnicodeString &other);
48
49 } // namespace
50
51 static int32_t U_CALLCONV
52 collatorComparator(const void *context, const void *left, const void *right);
53
54 static int32_t U_CALLCONV
55 recordCompareFn(const void *context, const void *left, const void *right);
56
57 // UVector<Record *> support function, delete a Record.
58 static void U_CALLCONV
alphaIndex_deleteRecord(void * obj)59 alphaIndex_deleteRecord(void *obj) {
60 delete static_cast<AlphabeticIndex::Record *>(obj);
61 }
62
63 namespace {
64
ownedString(const UnicodeString & s,LocalPointer<UnicodeString> & owned,UErrorCode & errorCode)65 UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
66 UErrorCode &errorCode) {
67 if (U_FAILURE(errorCode)) { return NULL; }
68 if (owned.isValid()) {
69 return owned.orphan();
70 }
71 UnicodeString *p = new UnicodeString(s);
72 if (p == NULL) {
73 errorCode = U_MEMORY_ALLOCATION_ERROR;
74 }
75 return p;
76 }
77
getString(const UVector & list,int32_t i)78 inline UnicodeString *getString(const UVector &list, int32_t i) {
79 return static_cast<UnicodeString *>(list[i]);
80 }
81
getBucket(const UVector & list,int32_t i)82 inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
83 return static_cast<AlphabeticIndex::Bucket *>(list[i]);
84 }
85
getRecord(const UVector & list,int32_t i)86 inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
87 return static_cast<AlphabeticIndex::Record *>(list[i]);
88 }
89
90 /**
91 * Like Java Collections.binarySearch(List, String, Comparator).
92 *
93 * @return the index>=0 where the item was found,
94 * or the index<0 for inserting the string at ~index in sorted order
95 */
binarySearch(const UVector & list,const UnicodeString & s,const Collator & coll)96 int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
97 if (list.size() == 0) { return ~0; }
98 int32_t start = 0;
99 int32_t limit = list.size();
100 for (;;) {
101 int32_t i = (start + limit) / 2;
102 const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
103 UErrorCode errorCode = U_ZERO_ERROR;
104 UCollationResult cmp = coll.compare(s, *si, errorCode);
105 if (cmp == UCOL_EQUAL) {
106 return i;
107 } else if (cmp < 0) {
108 if (i == start) {
109 return ~start; // insert s before *si
110 }
111 limit = i;
112 } else {
113 if (i == start) {
114 return ~(start + 1); // insert s after *si
115 }
116 start = i;
117 }
118 }
119 }
120
121 } // namespace
122
123 // The BucketList is not in the anonymous namespace because only Clang
124 // seems to support its use in other classes from there.
125 // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
126 class BucketList : public UObject {
127 public:
BucketList(UVector * bucketList,UVector * publicBucketList)128 BucketList(UVector *bucketList, UVector *publicBucketList)
129 : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
130 int32_t displayIndex = 0;
131 for (int32_t i = 0; i < publicBucketList->size(); ++i) {
132 getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
133 }
134 }
135
136 // The virtual destructor must not be inline.
137 // See ticket #8454 for details.
138 virtual ~BucketList();
139
getBucketCount() const140 int32_t getBucketCount() const {
141 return immutableVisibleList_->size();
142 }
143
getBucketIndex(const UnicodeString & name,const Collator & collatorPrimaryOnly,UErrorCode & errorCode)144 int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
145 UErrorCode &errorCode) {
146 // binary search
147 int32_t start = 0;
148 int32_t limit = bucketList_->size();
149 while ((start + 1) < limit) {
150 int32_t i = (start + limit) / 2;
151 const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
152 UCollationResult nameVsBucket =
153 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
154 if (nameVsBucket < 0) {
155 limit = i;
156 } else {
157 start = i;
158 }
159 }
160 const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
161 if (bucket->displayBucket_ != NULL) {
162 bucket = bucket->displayBucket_;
163 }
164 return bucket->displayIndex_;
165 }
166
167 /** All of the buckets, visible and invisible. */
168 UVector *bucketList_;
169 /** Just the visible buckets. */
170 UVector *immutableVisibleList_;
171 };
172
~BucketList()173 BucketList::~BucketList() {
174 delete bucketList_;
175 if (immutableVisibleList_ != bucketList_) {
176 delete immutableVisibleList_;
177 }
178 }
179
~ImmutableIndex()180 AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
181 delete buckets_;
182 delete collatorPrimaryOnly_;
183 }
184
185 int32_t
getBucketCount() const186 AlphabeticIndex::ImmutableIndex::getBucketCount() const {
187 return buckets_->getBucketCount();
188 }
189
190 int32_t
getBucketIndex(const UnicodeString & name,UErrorCode & errorCode) const191 AlphabeticIndex::ImmutableIndex::getBucketIndex(
192 const UnicodeString &name, UErrorCode &errorCode) const {
193 return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
194 }
195
196 const AlphabeticIndex::Bucket *
getBucket(int32_t index) const197 AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
198 if (0 <= index && index < buckets_->getBucketCount()) {
199 return icu::getBucket(*buckets_->immutableVisibleList_, index);
200 } else {
201 return NULL;
202 }
203 }
204
AlphabeticIndex(const Locale & locale,UErrorCode & status)205 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
206 : inputList_(NULL),
207 labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
208 maxLabelCount_(99),
209 initialLabels_(NULL), firstCharsInScripts_(NULL),
210 collator_(NULL), collatorPrimaryOnly_(NULL),
211 buckets_(NULL) {
212 init(&locale, status);
213 }
214
215
AlphabeticIndex(RuleBasedCollator * collator,UErrorCode & status)216 AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
217 : inputList_(NULL),
218 labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
219 maxLabelCount_(99),
220 initialLabels_(NULL), firstCharsInScripts_(NULL),
221 collator_(collator), collatorPrimaryOnly_(NULL),
222 buckets_(NULL) {
223 init(NULL, status);
224 }
225
226
227
~AlphabeticIndex()228 AlphabeticIndex::~AlphabeticIndex() {
229 delete collator_;
230 delete collatorPrimaryOnly_;
231 delete firstCharsInScripts_;
232 delete buckets_;
233 delete inputList_;
234 delete initialLabels_;
235 }
236
237
addLabels(const UnicodeSet & additions,UErrorCode & status)238 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
239 if (U_FAILURE(status)) {
240 return *this;
241 }
242 initialLabels_->addAll(additions);
243 clearBuckets();
244 return *this;
245 }
246
247
addLabels(const Locale & locale,UErrorCode & status)248 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
249 addIndexExemplars(locale, status);
250 clearBuckets();
251 return *this;
252 }
253
254
buildImmutableIndex(UErrorCode & errorCode)255 AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
256 if (U_FAILURE(errorCode)) { return NULL; }
257 // In C++, the ImmutableIndex must own its copy of the BucketList,
258 // even if it contains no records, for proper memory management.
259 // We could clone the buckets_ if they are not NULL,
260 // but that would be worth it only if this method is called multiple times,
261 // or called after using the old-style bucket iterator API.
262 LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
263 LocalPointer<RuleBasedCollator> coll(
264 static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
265 if (immutableBucketList.isNull() || coll.isNull()) {
266 errorCode = U_MEMORY_ALLOCATION_ERROR;
267 return NULL;
268 }
269 ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
270 if (immIndex == NULL) {
271 errorCode = U_MEMORY_ALLOCATION_ERROR;
272 return NULL;
273 }
274 // The ImmutableIndex adopted its parameter objects.
275 immutableBucketList.orphan();
276 coll.orphan();
277 return immIndex;
278 }
279
getBucketCount(UErrorCode & status)280 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
281 initBuckets(status);
282 if (U_FAILURE(status)) {
283 return 0;
284 }
285 return buckets_->getBucketCount();
286 }
287
288
getRecordCount(UErrorCode & status)289 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
290 if (U_FAILURE(status) || inputList_ == NULL) {
291 return 0;
292 }
293 return inputList_->size();
294 }
295
initLabels(UVector & indexCharacters,UErrorCode & errorCode) const296 void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
297 const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
298 if (U_FAILURE(errorCode)) { return; }
299
300 const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
301 const UnicodeString &overflowBoundary =
302 *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
303
304 // We make a sorted array of elements.
305 // Some of the input may be redundant.
306 // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
307 // We filter out those cases.
308 UnicodeSetIterator iter(*initialLabels_);
309 while (iter.next()) {
310 const UnicodeString *item = &iter.getString();
311 LocalPointer<UnicodeString> ownedItem;
312 UBool checkDistinct;
313 int32_t itemLength = item->length();
314 if (!item->hasMoreChar32Than(0, itemLength, 1)) {
315 checkDistinct = FALSE;
316 } else if(item->charAt(itemLength - 1) == 0x2a && // '*'
317 item->charAt(itemLength - 2) != 0x2a) {
318 // Use a label if it is marked with one trailing star,
319 // even if the label string sorts the same when all contractions are suppressed.
320 ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
321 item = ownedItem.getAlias();
322 if (item == NULL) {
323 errorCode = U_MEMORY_ALLOCATION_ERROR;
324 return;
325 }
326 checkDistinct = FALSE;
327 } else {
328 checkDistinct = TRUE;
329 }
330 if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
331 // Ignore a primary-ignorable or non-alphabetic index character.
332 } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
333 // Ignore an index character that will land in the overflow bucket.
334 } else if (checkDistinct &&
335 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
336 // Ignore a multi-code point index character that does not sort distinctly
337 // from the sequence of its separate characters.
338 } else {
339 int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
340 if (insertionPoint < 0) {
341 indexCharacters.insertElementAt(
342 ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
343 } else {
344 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
345 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
346 indexCharacters.setElementAt(
347 ownedString(*item, ownedItem, errorCode), insertionPoint);
348 }
349 }
350 }
351 }
352 if (U_FAILURE(errorCode)) { return; }
353
354 // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
355
356 int32_t size = indexCharacters.size() - 1;
357 if (size > maxLabelCount_) {
358 int32_t count = 0;
359 int32_t old = -1;
360 for (int32_t i = 0; i < indexCharacters.size();) {
361 ++count;
362 int32_t bump = count * maxLabelCount_ / size;
363 if (bump == old) {
364 indexCharacters.removeElementAt(i);
365 } else {
366 old = bump;
367 ++i;
368 }
369 }
370 }
371 }
372
373 namespace {
374
fixLabel(const UnicodeString & current,UnicodeString & temp)375 const UnicodeString &fixLabel(const UnicodeString ¤t, UnicodeString &temp) {
376 if (!current.startsWith(BASE, BASE_LENGTH)) {
377 return current;
378 }
379 UChar rest = current.charAt(BASE_LENGTH);
380 if (0x2800 < rest && rest <= 0x28FF) { // stroke count
381 int32_t count = rest-0x2800;
382 temp.setTo((UChar)(0x30 + count % 10));
383 if (count >= 10) {
384 count /= 10;
385 temp.insert(0, (UChar)(0x30 + count % 10));
386 if (count >= 10) {
387 count /= 10;
388 temp.insert(0, (UChar)(0x30 + count));
389 }
390 }
391 return temp.append((UChar)0x5283);
392 }
393 return temp.setTo(current, BASE_LENGTH);
394 }
395
hasMultiplePrimaryWeights(const RuleBasedCollator & coll,uint32_t variableTop,const UnicodeString & s,UVector64 & ces,UErrorCode & errorCode)396 UBool hasMultiplePrimaryWeights(
397 const RuleBasedCollator &coll, uint32_t variableTop,
398 const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
399 ces.removeAllElements();
400 coll.internalGetCEs(s, ces, errorCode);
401 if (U_FAILURE(errorCode)) { return FALSE; }
402 UBool seenPrimary = FALSE;
403 for (int32_t i = 0; i < ces.size(); ++i) {
404 int64_t ce = ces.elementAti(i);
405 uint32_t p = (uint32_t)(ce >> 32);
406 if (p > variableTop) {
407 // not primary ignorable
408 if (seenPrimary) {
409 return TRUE;
410 }
411 seenPrimary = TRUE;
412 }
413 }
414 return FALSE;
415 }
416
417 } // namespace
418
createBucketList(UErrorCode & errorCode) const419 BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
420 // Initialize indexCharacters.
421 UVector indexCharacters(errorCode);
422 indexCharacters.setDeleter(uprv_deleteUObject);
423 initLabels(indexCharacters, errorCode);
424 if (U_FAILURE(errorCode)) { return NULL; }
425
426 // Variables for hasMultiplePrimaryWeights().
427 UVector64 ces(errorCode);
428 uint32_t variableTop;
429 if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
430 variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
431 } else {
432 variableTop = 0;
433 }
434 UBool hasInvisibleBuckets = FALSE;
435
436 // Helper arrays for Chinese Pinyin collation.
437 Bucket *asciiBuckets[26] = {
438 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
439 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
440 };
441 Bucket *pinyinBuckets[26] = {
442 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
443 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
444 };
445 UBool hasPinyin = FALSE;
446
447 LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
448 if (U_FAILURE(errorCode)) {
449 return NULL;
450 }
451 bucketList->setDeleter(uprv_deleteUObject);
452
453 // underflow bucket
454 Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
455 if (bucket == NULL) {
456 errorCode = U_MEMORY_ALLOCATION_ERROR;
457 return NULL;
458 }
459 bucketList->addElement(bucket, errorCode);
460 if (U_FAILURE(errorCode)) { return NULL; }
461
462 UnicodeString temp;
463
464 // fix up the list, adding underflow, additions, overflow
465 // Insert inflow labels as needed.
466 int32_t scriptIndex = -1;
467 const UnicodeString *scriptUpperBoundary = &emptyString_;
468 for (int32_t i = 0; i < indexCharacters.size(); ++i) {
469 UnicodeString ¤t = *getString(indexCharacters, i);
470 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
471 // We crossed the script boundary into a new script.
472 const UnicodeString &inflowBoundary = *scriptUpperBoundary;
473 UBool skippedScript = FALSE;
474 for (;;) {
475 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
476 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
477 break;
478 }
479 skippedScript = TRUE;
480 }
481 if (skippedScript && bucketList->size() > 1) {
482 // We are skipping one or more scripts,
483 // and we are not just getting out of the underflow label.
484 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
485 if (bucket == NULL) {
486 errorCode = U_MEMORY_ALLOCATION_ERROR;
487 return NULL;
488 }
489 bucketList->addElement(bucket, errorCode);
490 }
491 }
492 // Add a bucket with the current label.
493 bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
494 if (bucket == NULL) {
495 errorCode = U_MEMORY_ALLOCATION_ERROR;
496 return NULL;
497 }
498 bucketList->addElement(bucket, errorCode);
499 // Remember ASCII and Pinyin buckets for Pinyin redirects.
500 UChar c;
501 if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z
502 asciiBuckets[c - 0x41] = bucket;
503 } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
504 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
505 pinyinBuckets[c - 0x41] = bucket;
506 hasPinyin = TRUE;
507 }
508 // Check for multiple primary weights.
509 if (!current.startsWith(BASE, BASE_LENGTH) &&
510 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
511 ces, errorCode) &&
512 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
513 // "AE-ligature" or "Sch" etc.
514 for (int32_t i = bucketList->size() - 2;; --i) {
515 Bucket *singleBucket = getBucket(*bucketList, i);
516 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
517 // There is no single-character bucket since the last
518 // underflow or inflow label.
519 break;
520 }
521 if (singleBucket->displayBucket_ == NULL &&
522 !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
523 singleBucket->lowerBoundary_,
524 ces, errorCode)) {
525 // Add an invisible bucket that redirects strings greater than the expansion
526 // to the previous single-character bucket.
527 // For example, after ... Q R S Sch we add Sch\uFFFF->S
528 // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
529 bucket = new Bucket(emptyString_,
530 UnicodeString(current).append((UChar)0xFFFF),
531 U_ALPHAINDEX_NORMAL);
532 if (bucket == NULL) {
533 errorCode = U_MEMORY_ALLOCATION_ERROR;
534 return NULL;
535 }
536 bucket->displayBucket_ = singleBucket;
537 bucketList->addElement(bucket, errorCode);
538 hasInvisibleBuckets = TRUE;
539 break;
540 }
541 }
542 }
543 }
544 if (U_FAILURE(errorCode)) { return NULL; }
545 if (bucketList->size() == 1) {
546 // No real labels, show only the underflow label.
547 BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
548 if (bl == NULL) {
549 errorCode = U_MEMORY_ALLOCATION_ERROR;
550 return NULL;
551 }
552 bucketList.orphan();
553 return bl;
554 }
555 // overflow bucket
556 bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
557 if (bucket == NULL) {
558 errorCode = U_MEMORY_ALLOCATION_ERROR;
559 return NULL;
560 }
561 bucketList->addElement(bucket, errorCode); // final
562
563 if (hasPinyin) {
564 // Redirect Pinyin buckets.
565 Bucket *asciiBucket = NULL;
566 for (int32_t i = 0; i < 26; ++i) {
567 if (asciiBuckets[i] != NULL) {
568 asciiBucket = asciiBuckets[i];
569 }
570 if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
571 pinyinBuckets[i]->displayBucket_ = asciiBucket;
572 hasInvisibleBuckets = TRUE;
573 }
574 }
575 }
576
577 if (U_FAILURE(errorCode)) { return NULL; }
578 if (!hasInvisibleBuckets) {
579 BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
580 if (bl == NULL) {
581 errorCode = U_MEMORY_ALLOCATION_ERROR;
582 return NULL;
583 }
584 bucketList.orphan();
585 return bl;
586 }
587 // Merge inflow buckets that are visually adjacent.
588 // Iterate backwards: Merge inflow into overflow rather than the other way around.
589 int32_t i = bucketList->size() - 1;
590 Bucket *nextBucket = getBucket(*bucketList, i);
591 while (--i > 0) {
592 bucket = getBucket(*bucketList, i);
593 if (bucket->displayBucket_ != NULL) {
594 continue; // skip invisible buckets
595 }
596 if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
597 if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
598 bucket->displayBucket_ = nextBucket;
599 continue;
600 }
601 }
602 nextBucket = bucket;
603 }
604
605 LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
606 if (U_FAILURE(errorCode)) {
607 return NULL;
608 }
609 // Do not call publicBucketList->setDeleter():
610 // This vector shares its objects with the bucketList.
611 for (int32_t i = 0; i < bucketList->size(); ++i) {
612 bucket = getBucket(*bucketList, i);
613 if (bucket->displayBucket_ == NULL) {
614 publicBucketList->addElement(bucket, errorCode);
615 }
616 }
617 if (U_FAILURE(errorCode)) { return NULL; }
618 BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
619 if (bl == NULL) {
620 errorCode = U_MEMORY_ALLOCATION_ERROR;
621 return NULL;
622 }
623 bucketList.orphan();
624 publicBucketList.orphan();
625 return bl;
626 }
627
628 /**
629 * Creates an index, and buckets and sorts the list of records into the index.
630 */
initBuckets(UErrorCode & errorCode)631 void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
632 if (U_FAILURE(errorCode) || buckets_ != NULL) {
633 return;
634 }
635 buckets_ = createBucketList(errorCode);
636 if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
637 return;
638 }
639
640 // Sort the records by name.
641 // Stable sort preserves input order of collation duplicates.
642 inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
643
644 // Now, we traverse all of the input, which is now sorted.
645 // If the item doesn't go in the current bucket, we find the next bucket that contains it.
646 // This makes the process order n*log(n), since we just sort the list and then do a linear process.
647 // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
648 // we need to improve it for that case.
649
650 Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
651 int32_t bucketIndex = 1;
652 Bucket *nextBucket;
653 const UnicodeString *upperBoundary;
654 if (bucketIndex < buckets_->bucketList_->size()) {
655 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
656 upperBoundary = &nextBucket->lowerBoundary_;
657 } else {
658 nextBucket = NULL;
659 upperBoundary = NULL;
660 }
661 for (int32_t i = 0; i < inputList_->size(); ++i) {
662 Record *r = getRecord(*inputList_, i);
663 // if the current bucket isn't the right one, find the one that is
664 // We have a special flag for the last bucket so that we don't look any further
665 while (upperBoundary != NULL &&
666 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
667 currentBucket = nextBucket;
668 // now reset the boundary that we compare against
669 if (bucketIndex < buckets_->bucketList_->size()) {
670 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
671 upperBoundary = &nextBucket->lowerBoundary_;
672 } else {
673 upperBoundary = NULL;
674 }
675 }
676 // now put the record into the bucket.
677 Bucket *bucket = currentBucket;
678 if (bucket->displayBucket_ != NULL) {
679 bucket = bucket->displayBucket_;
680 }
681 if (bucket->records_ == NULL) {
682 bucket->records_ = new UVector(errorCode);
683 if (bucket->records_ == NULL) {
684 errorCode = U_MEMORY_ALLOCATION_ERROR;
685 return;
686 }
687 }
688 bucket->records_->addElement(r, errorCode);
689 }
690 }
691
clearBuckets()692 void AlphabeticIndex::clearBuckets() {
693 if (buckets_ != NULL) {
694 delete buckets_;
695 buckets_ = NULL;
696 internalResetBucketIterator();
697 }
698 }
699
internalResetBucketIterator()700 void AlphabeticIndex::internalResetBucketIterator() {
701 labelsIterIndex_ = -1;
702 currentBucket_ = NULL;
703 }
704
705
addIndexExemplars(const Locale & locale,UErrorCode & status)706 void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
707 LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
708 if (U_FAILURE(status)) {
709 return;
710 }
711
712 UnicodeSet exemplars;
713 ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
714 if (U_SUCCESS(status)) {
715 initialLabels_->addAll(exemplars);
716 return;
717 }
718 status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
719
720 // The locale data did not include explicit Index characters.
721 // Synthesize a set of them from the locale's standard exemplar characters.
722 ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
723 if (U_FAILURE(status)) {
724 return;
725 }
726
727 // question: should we add auxiliary exemplars?
728 if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
729 exemplars.add(0x61, 0x7A);
730 }
731 if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
732 // cut down to small list
733 exemplars.remove(0xAC00, 0xD7A3).
734 add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
735 add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
736 add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
737 add(0xD30C).add(0xD558);
738 }
739 if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
740 // cut down to small list
741 // make use of the fact that Ethiopic is allocated in 8's, where
742 // the base is 0 mod 8.
743 UnicodeSet ethiopic(
744 UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
745 UnicodeSetIterator it(ethiopic);
746 while (it.next() && !it.isString()) {
747 if ((it.getCodepoint() & 0x7) != 0) {
748 exemplars.remove(it.getCodepoint());
749 }
750 }
751 }
752
753 // Upper-case any that aren't already so.
754 // (We only do this for synthesized index characters.)
755 UnicodeSetIterator it(exemplars);
756 UnicodeString upperC;
757 while (it.next()) {
758 const UnicodeString &exemplarC = it.getString();
759 upperC = exemplarC;
760 upperC.toUpper(locale);
761 initialLabels_->add(upperC);
762 }
763 }
764
addChineseIndexCharacters(UErrorCode & errorCode)765 UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
766 UnicodeSet contractions;
767 collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
768 if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
769 initialLabels_->addAll(contractions);
770 UnicodeSetIterator iter(contractions);
771 while (iter.next()) {
772 const UnicodeString &s = iter.getString();
773 U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
774 UChar c = s.charAt(s.length() - 1);
775 if (0x41 <= c && c <= 0x5A) { // A-Z
776 // There are Pinyin labels, add ASCII A-Z labels as well.
777 initialLabels_->add(0x41, 0x5A); // A-Z
778 break;
779 }
780 }
781 return TRUE;
782 }
783
784
785 /*
786 * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
787 */
788 static const UChar CGJ = 0x034F;
separated(const UnicodeString & item)789 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
790 UnicodeString result;
791 if (item.length() == 0) {
792 return result;
793 }
794 int32_t i = 0;
795 for (;;) {
796 UChar32 cp = item.char32At(i);
797 result.append(cp);
798 i = item.moveIndex32(i, 1);
799 if (i >= item.length()) {
800 break;
801 }
802 result.append(CGJ);
803 }
804 return result;
805 }
806
807
operator ==(const AlphabeticIndex &) const808 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
809 return FALSE;
810 }
811
812
operator !=(const AlphabeticIndex &) const813 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
814 return FALSE;
815 }
816
817
getCollator() const818 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
819 return *collator_;
820 }
821
822
getInflowLabel() const823 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
824 return inflowLabel_;
825 }
826
getOverflowLabel() const827 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
828 return overflowLabel_;
829 }
830
831
getUnderflowLabel() const832 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
833 return underflowLabel_;
834 }
835
836
setInflowLabel(const UnicodeString & label,UErrorCode &)837 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
838 inflowLabel_ = label;
839 clearBuckets();
840 return *this;
841 }
842
843
setOverflowLabel(const UnicodeString & label,UErrorCode &)844 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
845 overflowLabel_ = label;
846 clearBuckets();
847 return *this;
848 }
849
850
setUnderflowLabel(const UnicodeString & label,UErrorCode &)851 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
852 underflowLabel_ = label;
853 clearBuckets();
854 return *this;
855 }
856
857
getMaxLabelCount() const858 int32_t AlphabeticIndex::getMaxLabelCount() const {
859 return maxLabelCount_;
860 }
861
862
setMaxLabelCount(int32_t maxLabelCount,UErrorCode & status)863 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
864 if (U_FAILURE(status)) {
865 return *this;
866 }
867 if (maxLabelCount <= 0) {
868 status = U_ILLEGAL_ARGUMENT_ERROR;
869 return *this;
870 }
871 maxLabelCount_ = maxLabelCount;
872 clearBuckets();
873 return *this;
874 }
875
876
877 //
878 // init() - Common code for constructors.
879 //
880
init(const Locale * locale,UErrorCode & status)881 void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
882 if (U_FAILURE(status)) { return; }
883 if (locale == NULL && collator_ == NULL) {
884 status = U_ILLEGAL_ARGUMENT_ERROR;
885 return;
886 }
887
888 initialLabels_ = new UnicodeSet();
889 if (initialLabels_ == NULL) {
890 status = U_MEMORY_ALLOCATION_ERROR;
891 return;
892 }
893
894 inflowLabel_.setTo((UChar)0x2026); // Ellipsis
895 overflowLabel_ = inflowLabel_;
896 underflowLabel_ = inflowLabel_;
897
898 if (collator_ == NULL) {
899 Collator *coll = Collator::createInstance(*locale, status);
900 if (U_FAILURE(status)) {
901 delete coll;
902 return;
903 }
904 if (coll == NULL) {
905 status = U_MEMORY_ALLOCATION_ERROR;
906 return;
907 }
908 collator_ = dynamic_cast<RuleBasedCollator *>(coll);
909 if (collator_ == NULL) {
910 delete coll;
911 status = U_UNSUPPORTED_ERROR;
912 return;
913 }
914 }
915 collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
916 if (collatorPrimaryOnly_ == NULL) {
917 status = U_MEMORY_ALLOCATION_ERROR;
918 return;
919 }
920 collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
921 firstCharsInScripts_ = firstStringsInScript(status);
922 if (U_FAILURE(status)) { return; }
923 firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
924 // Guard against a degenerate collator where
925 // some script boundary strings are primary ignorable.
926 for (;;) {
927 if (U_FAILURE(status)) { return; }
928 if (firstCharsInScripts_->isEmpty()) {
929 // AlphabeticIndex requires some non-ignorable script boundary strings.
930 status = U_ILLEGAL_ARGUMENT_ERROR;
931 return;
932 }
933 if (collatorPrimaryOnly_->compare(
934 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
935 emptyString_, status) == UCOL_EQUAL) {
936 firstCharsInScripts_->removeElementAt(0);
937 } else {
938 break;
939 }
940 }
941
942 // Chinese index characters, which are specific to each of the several Chinese tailorings,
943 // take precedence over the single locale data exemplar set per language.
944 if (!addChineseIndexCharacters(status) && locale != NULL) {
945 addIndexExemplars(*locale, status);
946 }
947 }
948
949
950 //
951 // Comparison function for UVector<UnicodeString *> sorting with a collator.
952 //
953 static int32_t U_CALLCONV
collatorComparator(const void * context,const void * left,const void * right)954 collatorComparator(const void *context, const void *left, const void *right) {
955 const UElement *leftElement = static_cast<const UElement *>(left);
956 const UElement *rightElement = static_cast<const UElement *>(right);
957 const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer);
958 const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
959
960 if (leftString == rightString) {
961 // Catches case where both are NULL
962 return 0;
963 }
964 if (leftString == NULL) {
965 return 1;
966 };
967 if (rightString == NULL) {
968 return -1;
969 }
970 const Collator *col = static_cast<const Collator *>(context);
971 UErrorCode errorCode = U_ZERO_ERROR;
972 return col->compare(*leftString, *rightString, errorCode);
973 }
974
975 //
976 // Comparison function for UVector<Record *> sorting with a collator.
977 //
978 static int32_t U_CALLCONV
recordCompareFn(const void * context,const void * left,const void * right)979 recordCompareFn(const void *context, const void *left, const void *right) {
980 const UElement *leftElement = static_cast<const UElement *>(left);
981 const UElement *rightElement = static_cast<const UElement *>(right);
982 const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
983 const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
984 const Collator *col = static_cast<const Collator *>(context);
985 UErrorCode errorCode = U_ZERO_ERROR;
986 return col->compare(leftRec->name_, rightRec->name_, errorCode);
987 }
988
firstStringsInScript(UErrorCode & status)989 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
990 if (U_FAILURE(status)) {
991 return NULL;
992 }
993 LocalPointer<UVector> dest(new UVector(status), status);
994 if (U_FAILURE(status)) {
995 return NULL;
996 }
997 dest->setDeleter(uprv_deleteUObject);
998 // Fetch the script-first-primary contractions which are defined in the root collator.
999 // They all start with U+FDD1.
1000 UnicodeSet set;
1001 collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
1002 if (U_FAILURE(status)) {
1003 return NULL;
1004 }
1005 if (set.isEmpty()) {
1006 status = U_UNSUPPORTED_ERROR;
1007 return NULL;
1008 }
1009 UnicodeSetIterator iter(set);
1010 while (iter.next()) {
1011 const UnicodeString &boundary = iter.getString();
1012 uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1013 if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1014 // Ignore boundaries for the special reordering groups.
1015 // Take only those for "real scripts" (where the sample character is a Letter,
1016 // and the one for unassigned implicit weights (Cn).
1017 continue;
1018 }
1019 UnicodeString *s = new UnicodeString(boundary);
1020 if (s == NULL) {
1021 status = U_MEMORY_ALLOCATION_ERROR;
1022 return NULL;
1023 }
1024 dest->addElement(s, status);
1025 }
1026 return dest.orphan();
1027 }
1028
1029
1030 namespace {
1031
1032 /**
1033 * Returns true if one index character string is "better" than the other.
1034 * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1035 * better, and otherwise binary-less-than is better.
1036 */
isOneLabelBetterThanOther(const Normalizer2 & nfkdNormalizer,const UnicodeString & one,const UnicodeString & other)1037 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1038 const UnicodeString &one, const UnicodeString &other) {
1039 // This is called with primary-equal strings, but never with one.equals(other).
1040 UErrorCode status = U_ZERO_ERROR;
1041 UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1042 UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1043 if (U_FAILURE(status)) { return FALSE; }
1044 int32_t result = n1.countChar32() - n2.countChar32();
1045 if (result != 0) {
1046 return result < 0;
1047 }
1048 result = n1.compareCodePointOrder(n2);
1049 if (result != 0) {
1050 return result < 0;
1051 }
1052 return one.compareCodePointOrder(other) < 0;
1053 }
1054
1055 } // namespace
1056
1057 //
1058 // Constructor & Destructor for AlphabeticIndex::Record
1059 //
1060 // Records are internal only, instances are not directly surfaced in the public API.
1061 // This class is mostly struct-like, with all public fields.
1062
Record(const UnicodeString & name,const void * data)1063 AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1064 : name_(name), data_(data) {}
1065
~Record()1066 AlphabeticIndex::Record::~Record() {
1067 }
1068
1069
addRecord(const UnicodeString & name,const void * data,UErrorCode & status)1070 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1071 if (U_FAILURE(status)) {
1072 return *this;
1073 }
1074 if (inputList_ == NULL) {
1075 inputList_ = new UVector(status);
1076 if (inputList_ == NULL) {
1077 status = U_MEMORY_ALLOCATION_ERROR;
1078 return *this;
1079 }
1080 inputList_->setDeleter(alphaIndex_deleteRecord);
1081 }
1082 Record *r = new Record(name, data);
1083 if (r == NULL) {
1084 status = U_MEMORY_ALLOCATION_ERROR;
1085 return *this;
1086 }
1087 inputList_->addElement(r, status);
1088 clearBuckets();
1089 //std::string ss;
1090 //std::string ss2;
1091 //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1092 // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1093 return *this;
1094 }
1095
1096
clearRecords(UErrorCode & status)1097 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1098 if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1099 inputList_->removeAllElements();
1100 clearBuckets();
1101 }
1102 return *this;
1103 }
1104
getBucketIndex(const UnicodeString & name,UErrorCode & status)1105 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1106 initBuckets(status);
1107 if (U_FAILURE(status)) {
1108 return 0;
1109 }
1110 return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1111 }
1112
1113
getBucketIndex() const1114 int32_t AlphabeticIndex::getBucketIndex() const {
1115 return labelsIterIndex_;
1116 }
1117
1118
nextBucket(UErrorCode & status)1119 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1120 if (U_FAILURE(status)) {
1121 return FALSE;
1122 }
1123 if (buckets_ == NULL && currentBucket_ != NULL) {
1124 status = U_ENUM_OUT_OF_SYNC_ERROR;
1125 return FALSE;
1126 }
1127 initBuckets(status);
1128 if (U_FAILURE(status)) {
1129 return FALSE;
1130 }
1131 ++labelsIterIndex_;
1132 if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1133 labelsIterIndex_ = buckets_->getBucketCount();
1134 return FALSE;
1135 }
1136 currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1137 resetRecordIterator();
1138 return TRUE;
1139 }
1140
getBucketLabel() const1141 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1142 if (currentBucket_ != NULL) {
1143 return currentBucket_->label_;
1144 } else {
1145 return emptyString_;
1146 }
1147 }
1148
1149
getBucketLabelType() const1150 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1151 if (currentBucket_ != NULL) {
1152 return currentBucket_->labelType_;
1153 } else {
1154 return U_ALPHAINDEX_NORMAL;
1155 }
1156 }
1157
1158
getBucketRecordCount() const1159 int32_t AlphabeticIndex::getBucketRecordCount() const {
1160 if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1161 return currentBucket_->records_->size();
1162 } else {
1163 return 0;
1164 }
1165 }
1166
1167
resetBucketIterator(UErrorCode & status)1168 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1169 if (U_FAILURE(status)) {
1170 return *this;
1171 }
1172 internalResetBucketIterator();
1173 return *this;
1174 }
1175
1176
nextRecord(UErrorCode & status)1177 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1178 if (U_FAILURE(status)) {
1179 return FALSE;
1180 }
1181 if (currentBucket_ == NULL) {
1182 // We are trying to iterate over the items in a bucket, but there is no
1183 // current bucket from the enumeration of buckets.
1184 status = U_INVALID_STATE_ERROR;
1185 return FALSE;
1186 }
1187 if (buckets_ == NULL) {
1188 status = U_ENUM_OUT_OF_SYNC_ERROR;
1189 return FALSE;
1190 }
1191 if (currentBucket_->records_ == NULL) {
1192 return FALSE;
1193 }
1194 ++itemsIterIndex_;
1195 if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1196 itemsIterIndex_ = currentBucket_->records_->size();
1197 return FALSE;
1198 }
1199 return TRUE;
1200 }
1201
1202
getRecordName() const1203 const UnicodeString &AlphabeticIndex::getRecordName() const {
1204 const UnicodeString *retStr = &emptyString_;
1205 if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1206 itemsIterIndex_ >= 0 &&
1207 itemsIterIndex_ < currentBucket_->records_->size()) {
1208 Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1209 retStr = &item->name_;
1210 }
1211 return *retStr;
1212 }
1213
getRecordData() const1214 const void *AlphabeticIndex::getRecordData() const {
1215 const void *retPtr = NULL;
1216 if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1217 itemsIterIndex_ >= 0 &&
1218 itemsIterIndex_ < currentBucket_->records_->size()) {
1219 Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1220 retPtr = item->data_;
1221 }
1222 return retPtr;
1223 }
1224
1225
resetRecordIterator()1226 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1227 itemsIterIndex_ = -1;
1228 return *this;
1229 }
1230
1231
1232
Bucket(const UnicodeString & label,const UnicodeString & lowerBoundary,UAlphabeticIndexLabelType type)1233 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1234 const UnicodeString &lowerBoundary,
1235 UAlphabeticIndexLabelType type)
1236 : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1237 displayBucket_(NULL), displayIndex_(-1),
1238 records_(NULL) {
1239 }
1240
1241
~Bucket()1242 AlphabeticIndex::Bucket::~Bucket() {
1243 delete records_;
1244 }
1245
1246 U_NAMESPACE_END
1247
1248 #endif // !UCONFIG_NO_COLLATION
1249