• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 2009-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7 
8 #include "unicode/utypes.h"
9 
10 #if !UCONFIG_NO_COLLATION
11 
12 #include "unicode/alphaindex.h"
13 #include "unicode/coll.h"
14 #include "unicode/localpointer.h"
15 #include "unicode/normalizer2.h"
16 #include "unicode/tblcoll.h"
17 #include "unicode/uchar.h"
18 #include "unicode/ulocdata.h"
19 #include "unicode/uniset.h"
20 #include "unicode/uobject.h"
21 #include "unicode/usetiter.h"
22 #include "unicode/utf16.h"
23 
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "uassert.h"
27 #include "uvector.h"
28 #include "uvectr64.h"
29 
30 //#include <string>
31 //#include <iostream>
32 
33 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
34 
35 U_NAMESPACE_BEGIN
36 
37 namespace {
38 
39 /**
40  * Prefix string for Chinese index buckets.
41  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
42  */
43 const UChar BASE[1] = { 0xFDD0 };
44 const int32_t BASE_LENGTH = 1;
45 
46 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
47                                 const UnicodeString &one, const UnicodeString &other);
48 
49 }  // namespace
50 
51 static int32_t U_CALLCONV
52 collatorComparator(const void *context, const void *left, const void *right);
53 
54 static int32_t U_CALLCONV
55 recordCompareFn(const void *context, const void *left, const void *right);
56 
57 //  UVector<Record *> support function, delete a Record.
58 static void U_CALLCONV
alphaIndex_deleteRecord(void * obj)59 alphaIndex_deleteRecord(void *obj) {
60     delete static_cast<AlphabeticIndex::Record *>(obj);
61 }
62 
63 namespace {
64 
ownedString(const UnicodeString & s,LocalPointer<UnicodeString> & owned,UErrorCode & errorCode)65 UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
66                            UErrorCode &errorCode) {
67     if (U_FAILURE(errorCode)) { return NULL; }
68     if (owned.isValid()) {
69         return owned.orphan();
70     }
71     UnicodeString *p = new UnicodeString(s);
72     if (p == NULL) {
73         errorCode = U_MEMORY_ALLOCATION_ERROR;
74     }
75     return p;
76 }
77 
getString(const UVector & list,int32_t i)78 inline UnicodeString *getString(const UVector &list, int32_t i) {
79     return static_cast<UnicodeString *>(list[i]);
80 }
81 
getBucket(const UVector & list,int32_t i)82 inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
83     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
84 }
85 
getRecord(const UVector & list,int32_t i)86 inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
87     return static_cast<AlphabeticIndex::Record *>(list[i]);
88 }
89 
90 /**
91  * Like Java Collections.binarySearch(List, String, Comparator).
92  *
93  * @return the index>=0 where the item was found,
94  *         or the index<0 for inserting the string at ~index in sorted order
95  */
binarySearch(const UVector & list,const UnicodeString & s,const Collator & coll)96 int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
97     if (list.size() == 0) { return ~0; }
98     int32_t start = 0;
99     int32_t limit = list.size();
100     for (;;) {
101         int32_t i = (start + limit) / 2;
102         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
103         UErrorCode errorCode = U_ZERO_ERROR;
104         UCollationResult cmp = coll.compare(s, *si, errorCode);
105         if (cmp == UCOL_EQUAL) {
106             return i;
107         } else if (cmp < 0) {
108             if (i == start) {
109                 return ~start;  // insert s before *si
110             }
111             limit = i;
112         } else {
113             if (i == start) {
114                 return ~(start + 1);  // insert s after *si
115             }
116             start = i;
117         }
118     }
119 }
120 
121 }  // namespace
122 
123 // The BucketList is not in the anonymous namespace because only Clang
124 // seems to support its use in other classes from there.
125 // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
126 class BucketList : public UObject {
127 public:
BucketList(UVector * bucketList,UVector * publicBucketList)128     BucketList(UVector *bucketList, UVector *publicBucketList)
129             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
130         int32_t displayIndex = 0;
131         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
132             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
133         }
134     }
135 
136     // The virtual destructor must not be inline.
137     // See ticket #8454 for details.
138     virtual ~BucketList();
139 
getBucketCount() const140     int32_t getBucketCount() const {
141         return immutableVisibleList_->size();
142     }
143 
getBucketIndex(const UnicodeString & name,const Collator & collatorPrimaryOnly,UErrorCode & errorCode)144     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
145                            UErrorCode &errorCode) {
146         // binary search
147         int32_t start = 0;
148         int32_t limit = bucketList_->size();
149         while ((start + 1) < limit) {
150             int32_t i = (start + limit) / 2;
151             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
152             UCollationResult nameVsBucket =
153                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
154             if (nameVsBucket < 0) {
155                 limit = i;
156             } else {
157                 start = i;
158             }
159         }
160         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
161         if (bucket->displayBucket_ != NULL) {
162             bucket = bucket->displayBucket_;
163         }
164         return bucket->displayIndex_;
165     }
166 
167     /** All of the buckets, visible and invisible. */
168     UVector *bucketList_;
169     /** Just the visible buckets. */
170     UVector *immutableVisibleList_;
171 };
172 
~BucketList()173 BucketList::~BucketList() {
174     delete bucketList_;
175     if (immutableVisibleList_ != bucketList_) {
176         delete immutableVisibleList_;
177     }
178 }
179 
~ImmutableIndex()180 AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
181     delete buckets_;
182     delete collatorPrimaryOnly_;
183 }
184 
185 int32_t
getBucketCount() const186 AlphabeticIndex::ImmutableIndex::getBucketCount() const {
187     return buckets_->getBucketCount();
188 }
189 
190 int32_t
getBucketIndex(const UnicodeString & name,UErrorCode & errorCode) const191 AlphabeticIndex::ImmutableIndex::getBucketIndex(
192         const UnicodeString &name, UErrorCode &errorCode) const {
193     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
194 }
195 
196 const AlphabeticIndex::Bucket *
getBucket(int32_t index) const197 AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
198     if (0 <= index && index < buckets_->getBucketCount()) {
199         return icu::getBucket(*buckets_->immutableVisibleList_, index);
200     } else {
201         return NULL;
202     }
203 }
204 
AlphabeticIndex(const Locale & locale,UErrorCode & status)205 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
206         : inputList_(NULL),
207           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
208           maxLabelCount_(99),
209           initialLabels_(NULL), firstCharsInScripts_(NULL),
210           collator_(NULL), collatorPrimaryOnly_(NULL),
211           buckets_(NULL) {
212     init(&locale, status);
213 }
214 
215 
AlphabeticIndex(RuleBasedCollator * collator,UErrorCode & status)216 AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
217         : inputList_(NULL),
218           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
219           maxLabelCount_(99),
220           initialLabels_(NULL), firstCharsInScripts_(NULL),
221           collator_(collator), collatorPrimaryOnly_(NULL),
222           buckets_(NULL) {
223     init(NULL, status);
224 }
225 
226 
227 
~AlphabeticIndex()228 AlphabeticIndex::~AlphabeticIndex() {
229     delete collator_;
230     delete collatorPrimaryOnly_;
231     delete firstCharsInScripts_;
232     delete buckets_;
233     delete inputList_;
234     delete initialLabels_;
235 }
236 
237 
addLabels(const UnicodeSet & additions,UErrorCode & status)238 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
239     if (U_FAILURE(status)) {
240         return *this;
241     }
242     initialLabels_->addAll(additions);
243     clearBuckets();
244     return *this;
245 }
246 
247 
addLabels(const Locale & locale,UErrorCode & status)248 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
249     addIndexExemplars(locale, status);
250     clearBuckets();
251     return *this;
252 }
253 
254 
buildImmutableIndex(UErrorCode & errorCode)255 AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
256     if (U_FAILURE(errorCode)) { return NULL; }
257     // In C++, the ImmutableIndex must own its copy of the BucketList,
258     // even if it contains no records, for proper memory management.
259     // We could clone the buckets_ if they are not NULL,
260     // but that would be worth it only if this method is called multiple times,
261     // or called after using the old-style bucket iterator API.
262     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
263     LocalPointer<RuleBasedCollator> coll(
264         static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
265     if (immutableBucketList.isNull() || coll.isNull()) {
266         errorCode = U_MEMORY_ALLOCATION_ERROR;
267         return NULL;
268     }
269     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
270     if (immIndex == NULL) {
271         errorCode = U_MEMORY_ALLOCATION_ERROR;
272         return NULL;
273     }
274     // The ImmutableIndex adopted its parameter objects.
275     immutableBucketList.orphan();
276     coll.orphan();
277     return immIndex;
278 }
279 
getBucketCount(UErrorCode & status)280 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
281     initBuckets(status);
282     if (U_FAILURE(status)) {
283         return 0;
284     }
285     return buckets_->getBucketCount();
286 }
287 
288 
getRecordCount(UErrorCode & status)289 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
290     if (U_FAILURE(status) || inputList_ == NULL) {
291         return 0;
292     }
293     return inputList_->size();
294 }
295 
initLabels(UVector & indexCharacters,UErrorCode & errorCode) const296 void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
297     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
298     if (U_FAILURE(errorCode)) { return; }
299 
300     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
301     const UnicodeString &overflowBoundary =
302         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
303 
304     // We make a sorted array of elements.
305     // Some of the input may be redundant.
306     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
307     // We filter out those cases.
308     UnicodeSetIterator iter(*initialLabels_);
309     while (iter.next()) {
310         const UnicodeString *item = &iter.getString();
311         LocalPointer<UnicodeString> ownedItem;
312         UBool checkDistinct;
313         int32_t itemLength = item->length();
314         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
315             checkDistinct = FALSE;
316         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
317                 item->charAt(itemLength - 2) != 0x2a) {
318             // Use a label if it is marked with one trailing star,
319             // even if the label string sorts the same when all contractions are suppressed.
320             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
321             item = ownedItem.getAlias();
322             if (item == NULL) {
323                 errorCode = U_MEMORY_ALLOCATION_ERROR;
324                 return;
325             }
326             checkDistinct = FALSE;
327         } else {
328             checkDistinct = TRUE;
329         }
330         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
331             // Ignore a primary-ignorable or non-alphabetic index character.
332         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
333             // Ignore an index character that will land in the overflow bucket.
334         } else if (checkDistinct &&
335                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
336             // Ignore a multi-code point index character that does not sort distinctly
337             // from the sequence of its separate characters.
338         } else {
339             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
340             if (insertionPoint < 0) {
341                 indexCharacters.insertElementAt(
342                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
343             } else {
344                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
345                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
346                     indexCharacters.setElementAt(
347                         ownedString(*item, ownedItem, errorCode), insertionPoint);
348                 }
349             }
350         }
351     }
352     if (U_FAILURE(errorCode)) { return; }
353 
354     // if the result is still too large, cut down to maxCount elements, by removing every nth element
355 
356     int32_t size = indexCharacters.size() - 1;
357     if (size > maxLabelCount_) {
358         int32_t count = 0;
359         int32_t old = -1;
360         for (int32_t i = 0; i < indexCharacters.size();) {
361             ++count;
362             int32_t bump = count * maxLabelCount_ / size;
363             if (bump == old) {
364                 indexCharacters.removeElementAt(i);
365             } else {
366                 old = bump;
367                 ++i;
368             }
369         }
370     }
371 }
372 
373 namespace {
374 
fixLabel(const UnicodeString & current,UnicodeString & temp)375 const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
376     if (!current.startsWith(BASE, BASE_LENGTH)) {
377         return current;
378     }
379     UChar rest = current.charAt(BASE_LENGTH);
380     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
381         int32_t count = rest-0x2800;
382         temp.setTo((UChar)(0x30 + count % 10));
383         if (count >= 10) {
384             count /= 10;
385             temp.insert(0, (UChar)(0x30 + count % 10));
386             if (count >= 10) {
387                 count /= 10;
388                 temp.insert(0, (UChar)(0x30 + count));
389             }
390         }
391         return temp.append((UChar)0x5283);
392     }
393     return temp.setTo(current, BASE_LENGTH);
394 }
395 
hasMultiplePrimaryWeights(const RuleBasedCollator & coll,uint32_t variableTop,const UnicodeString & s,UVector64 & ces,UErrorCode & errorCode)396 UBool hasMultiplePrimaryWeights(
397         const RuleBasedCollator &coll, uint32_t variableTop,
398         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
399     ces.removeAllElements();
400     coll.internalGetCEs(s, ces, errorCode);
401     if (U_FAILURE(errorCode)) { return FALSE; }
402     UBool seenPrimary = FALSE;
403     for (int32_t i = 0; i < ces.size(); ++i) {
404         int64_t ce = ces.elementAti(i);
405         uint32_t p = (uint32_t)(ce >> 32);
406         if (p > variableTop) {
407             // not primary ignorable
408             if (seenPrimary) {
409                 return TRUE;
410             }
411             seenPrimary = TRUE;
412         }
413     }
414     return FALSE;
415 }
416 
417 }  // namespace
418 
createBucketList(UErrorCode & errorCode) const419 BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
420     // Initialize indexCharacters.
421     UVector indexCharacters(errorCode);
422     indexCharacters.setDeleter(uprv_deleteUObject);
423     initLabels(indexCharacters, errorCode);
424     if (U_FAILURE(errorCode)) { return NULL; }
425 
426     // Variables for hasMultiplePrimaryWeights().
427     UVector64 ces(errorCode);
428     uint32_t variableTop;
429     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
430         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
431     } else {
432         variableTop = 0;
433     }
434     UBool hasInvisibleBuckets = FALSE;
435 
436     // Helper arrays for Chinese Pinyin collation.
437     Bucket *asciiBuckets[26] = {
438         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
439         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
440     };
441     Bucket *pinyinBuckets[26] = {
442         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
443         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
444     };
445     UBool hasPinyin = FALSE;
446 
447     LocalPointer<UVector> bucketList(new UVector(errorCode));
448     if (bucketList.isNull()) {
449         errorCode = U_MEMORY_ALLOCATION_ERROR;
450         return NULL;
451     }
452     bucketList->setDeleter(uprv_deleteUObject);
453 
454     // underflow bucket
455     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
456     if (bucket == NULL) {
457         errorCode = U_MEMORY_ALLOCATION_ERROR;
458         return NULL;
459     }
460     bucketList->addElement(bucket, errorCode);
461     if (U_FAILURE(errorCode)) { return NULL; }
462 
463     UnicodeString temp;
464 
465     // fix up the list, adding underflow, additions, overflow
466     // Insert inflow labels as needed.
467     int32_t scriptIndex = -1;
468     const UnicodeString *scriptUpperBoundary = &emptyString_;
469     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
470         UnicodeString &current = *getString(indexCharacters, i);
471         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
472             // We crossed the script boundary into a new script.
473             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
474             UBool skippedScript = FALSE;
475             for (;;) {
476                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
477                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
478                     break;
479                 }
480                 skippedScript = TRUE;
481             }
482             if (skippedScript && bucketList->size() > 1) {
483                 // We are skipping one or more scripts,
484                 // and we are not just getting out of the underflow label.
485                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
486                 if (bucket == NULL) {
487                     errorCode = U_MEMORY_ALLOCATION_ERROR;
488                     return NULL;
489                 }
490                 bucketList->addElement(bucket, errorCode);
491             }
492         }
493         // Add a bucket with the current label.
494         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
495         if (bucket == NULL) {
496             errorCode = U_MEMORY_ALLOCATION_ERROR;
497             return NULL;
498         }
499         bucketList->addElement(bucket, errorCode);
500         // Remember ASCII and Pinyin buckets for Pinyin redirects.
501         UChar c;
502         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
503             asciiBuckets[c - 0x41] = bucket;
504         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
505                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
506             pinyinBuckets[c - 0x41] = bucket;
507             hasPinyin = TRUE;
508         }
509         // Check for multiple primary weights.
510         if (!current.startsWith(BASE, BASE_LENGTH) &&
511                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
512                                           ces, errorCode) &&
513                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
514             // "AE-ligature" or "Sch" etc.
515             for (int32_t i = bucketList->size() - 2;; --i) {
516                 Bucket *singleBucket = getBucket(*bucketList, i);
517                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
518                     // There is no single-character bucket since the last
519                     // underflow or inflow label.
520                     break;
521                 }
522                 if (singleBucket->displayBucket_ == NULL &&
523                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
524                                                    singleBucket->lowerBoundary_,
525                                                    ces, errorCode)) {
526                     // Add an invisible bucket that redirects strings greater than the expansion
527                     // to the previous single-character bucket.
528                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
529                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
530                     bucket = new Bucket(emptyString_,
531                         UnicodeString(current).append((UChar)0xFFFF),
532                         U_ALPHAINDEX_NORMAL);
533                     if (bucket == NULL) {
534                         errorCode = U_MEMORY_ALLOCATION_ERROR;
535                         return NULL;
536                     }
537                     bucket->displayBucket_ = singleBucket;
538                     bucketList->addElement(bucket, errorCode);
539                     hasInvisibleBuckets = TRUE;
540                     break;
541                 }
542             }
543         }
544     }
545     if (U_FAILURE(errorCode)) { return NULL; }
546     if (bucketList->size() == 1) {
547         // No real labels, show only the underflow label.
548         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
549         if (bl == NULL) {
550             errorCode = U_MEMORY_ALLOCATION_ERROR;
551             return NULL;
552         }
553         bucketList.orphan();
554         return bl;
555     }
556     // overflow bucket
557     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
558     if (bucket == NULL) {
559         errorCode = U_MEMORY_ALLOCATION_ERROR;
560         return NULL;
561     }
562     bucketList->addElement(bucket, errorCode); // final
563 
564     if (hasPinyin) {
565         // Redirect Pinyin buckets.
566         Bucket *asciiBucket = NULL;
567         for (int32_t i = 0; i < 26; ++i) {
568             if (asciiBuckets[i] != NULL) {
569                 asciiBucket = asciiBuckets[i];
570             }
571             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
572                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
573                 hasInvisibleBuckets = TRUE;
574             }
575         }
576     }
577 
578     if (U_FAILURE(errorCode)) { return NULL; }
579     if (!hasInvisibleBuckets) {
580         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
581         if (bl == NULL) {
582             errorCode = U_MEMORY_ALLOCATION_ERROR;
583             return NULL;
584         }
585         bucketList.orphan();
586         return bl;
587     }
588     // Merge inflow buckets that are visually adjacent.
589     // Iterate backwards: Merge inflow into overflow rather than the other way around.
590     int32_t i = bucketList->size() - 1;
591     Bucket *nextBucket = getBucket(*bucketList, i);
592     while (--i > 0) {
593         bucket = getBucket(*bucketList, i);
594         if (bucket->displayBucket_ != NULL) {
595             continue;  // skip invisible buckets
596         }
597         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
598             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
599                 bucket->displayBucket_ = nextBucket;
600                 continue;
601             }
602         }
603         nextBucket = bucket;
604     }
605 
606     LocalPointer<UVector> publicBucketList(new UVector(errorCode));
607     if (bucketList.isNull()) {
608         errorCode = U_MEMORY_ALLOCATION_ERROR;
609         return NULL;
610     }
611     // Do not call publicBucketList->setDeleter():
612     // This vector shares its objects with the bucketList.
613     for (int32_t i = 0; i < bucketList->size(); ++i) {
614         bucket = getBucket(*bucketList, i);
615         if (bucket->displayBucket_ == NULL) {
616             publicBucketList->addElement(bucket, errorCode);
617         }
618     }
619     if (U_FAILURE(errorCode)) { return NULL; }
620     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
621     if (bl == NULL) {
622         errorCode = U_MEMORY_ALLOCATION_ERROR;
623         return NULL;
624     }
625     bucketList.orphan();
626     publicBucketList.orphan();
627     return bl;
628 }
629 
630 /**
631  * Creates an index, and buckets and sorts the list of records into the index.
632  */
initBuckets(UErrorCode & errorCode)633 void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
634     if (U_FAILURE(errorCode) || buckets_ != NULL) {
635         return;
636     }
637     buckets_ = createBucketList(errorCode);
638     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
639         return;
640     }
641 
642     // Sort the records by name.
643     // Stable sort preserves input order of collation duplicates.
644     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
645 
646     // Now, we traverse all of the input, which is now sorted.
647     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
648     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
649     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
650     // we need to improve it for that case.
651 
652     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
653     int32_t bucketIndex = 1;
654     Bucket *nextBucket;
655     const UnicodeString *upperBoundary;
656     if (bucketIndex < buckets_->bucketList_->size()) {
657         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
658         upperBoundary = &nextBucket->lowerBoundary_;
659     } else {
660         nextBucket = NULL;
661         upperBoundary = NULL;
662     }
663     for (int32_t i = 0; i < inputList_->size(); ++i) {
664         Record *r = getRecord(*inputList_, i);
665         // if the current bucket isn't the right one, find the one that is
666         // We have a special flag for the last bucket so that we don't look any further
667         while (upperBoundary != NULL &&
668                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
669             currentBucket = nextBucket;
670             // now reset the boundary that we compare against
671             if (bucketIndex < buckets_->bucketList_->size()) {
672                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
673                 upperBoundary = &nextBucket->lowerBoundary_;
674             } else {
675                 upperBoundary = NULL;
676             }
677         }
678         // now put the record into the bucket.
679         Bucket *bucket = currentBucket;
680         if (bucket->displayBucket_ != NULL) {
681             bucket = bucket->displayBucket_;
682         }
683         if (bucket->records_ == NULL) {
684             bucket->records_ = new UVector(errorCode);
685             if (bucket->records_ == NULL) {
686                 errorCode = U_MEMORY_ALLOCATION_ERROR;
687                 return;
688             }
689         }
690         bucket->records_->addElement(r, errorCode);
691     }
692 }
693 
clearBuckets()694 void AlphabeticIndex::clearBuckets() {
695     if (buckets_ != NULL) {
696         delete buckets_;
697         buckets_ = NULL;
698         internalResetBucketIterator();
699     }
700 }
701 
internalResetBucketIterator()702 void AlphabeticIndex::internalResetBucketIterator() {
703     labelsIterIndex_ = -1;
704     currentBucket_ = NULL;
705 }
706 
707 
addIndexExemplars(const Locale & locale,UErrorCode & status)708 void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
709     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
710     if (U_FAILURE(status)) {
711         return;
712     }
713 
714     UnicodeSet exemplars;
715     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
716     if (U_SUCCESS(status)) {
717         initialLabels_->addAll(exemplars);
718         return;
719     }
720     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
721 
722     // The locale data did not include explicit Index characters.
723     // Synthesize a set of them from the locale's standard exemplar characters.
724     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
725     if (U_FAILURE(status)) {
726         return;
727     }
728 
729     // question: should we add auxiliary exemplars?
730     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
731         exemplars.add(0x61, 0x7A);
732     }
733     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
734         // cut down to small list
735         exemplars.remove(0xAC00, 0xD7A3).
736             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
737             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
738             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
739             add(0xD30C).add(0xD558);
740     }
741     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
742         // cut down to small list
743         // make use of the fact that Ethiopic is allocated in 8's, where
744         // the base is 0 mod 8.
745         UnicodeSet ethiopic(
746             UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
747         UnicodeSetIterator it(ethiopic);
748         while (it.next() && !it.isString()) {
749             if ((it.getCodepoint() & 0x7) != 0) {
750                 exemplars.remove(it.getCodepoint());
751             }
752         }
753     }
754 
755     // Upper-case any that aren't already so.
756     //   (We only do this for synthesized index characters.)
757     UnicodeSetIterator it(exemplars);
758     UnicodeString upperC;
759     while (it.next()) {
760         const UnicodeString &exemplarC = it.getString();
761         upperC = exemplarC;
762         upperC.toUpper(locale);
763         initialLabels_->add(upperC);
764     }
765 }
766 
addChineseIndexCharacters(UErrorCode & errorCode)767 UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
768     UnicodeSet contractions;
769     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
770     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
771     initialLabels_->addAll(contractions);
772     UnicodeSetIterator iter(contractions);
773     while (iter.next()) {
774         const UnicodeString &s = iter.getString();
775         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
776         UChar c = s.charAt(s.length() - 1);
777         if (0x41 <= c && c <= 0x5A) {  // A-Z
778             // There are Pinyin labels, add ASCII A-Z labels as well.
779             initialLabels_->add(0x41, 0x5A);  // A-Z
780             break;
781         }
782     }
783     return TRUE;
784 }
785 
786 
787 /*
788  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
789  */
790 static const UChar CGJ = 0x034F;
separated(const UnicodeString & item)791 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
792     UnicodeString result;
793     if (item.length() == 0) {
794         return result;
795     }
796     int32_t i = 0;
797     for (;;) {
798         UChar32  cp = item.char32At(i);
799         result.append(cp);
800         i = item.moveIndex32(i, 1);
801         if (i >= item.length()) {
802             break;
803         }
804         result.append(CGJ);
805     }
806     return result;
807 }
808 
809 
operator ==(const AlphabeticIndex &) const810 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
811     return FALSE;
812 }
813 
814 
operator !=(const AlphabeticIndex &) const815 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
816     return FALSE;
817 }
818 
819 
getCollator() const820 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
821     return *collator_;
822 }
823 
824 
getInflowLabel() const825 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
826     return inflowLabel_;
827 }
828 
getOverflowLabel() const829 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
830     return overflowLabel_;
831 }
832 
833 
getUnderflowLabel() const834 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
835     return underflowLabel_;
836 }
837 
838 
setInflowLabel(const UnicodeString & label,UErrorCode &)839 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
840     inflowLabel_ = label;
841     clearBuckets();
842     return *this;
843 }
844 
845 
setOverflowLabel(const UnicodeString & label,UErrorCode &)846 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
847     overflowLabel_ = label;
848     clearBuckets();
849     return *this;
850 }
851 
852 
setUnderflowLabel(const UnicodeString & label,UErrorCode &)853 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
854     underflowLabel_ = label;
855     clearBuckets();
856     return *this;
857 }
858 
859 
getMaxLabelCount() const860 int32_t AlphabeticIndex::getMaxLabelCount() const {
861     return maxLabelCount_;
862 }
863 
864 
setMaxLabelCount(int32_t maxLabelCount,UErrorCode & status)865 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
866     if (U_FAILURE(status)) {
867         return *this;
868     }
869     if (maxLabelCount <= 0) {
870         status = U_ILLEGAL_ARGUMENT_ERROR;
871         return *this;
872     }
873     maxLabelCount_ = maxLabelCount;
874     clearBuckets();
875     return *this;
876 }
877 
878 
879 //
880 //  init() - Common code for constructors.
881 //
882 
init(const Locale * locale,UErrorCode & status)883 void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
884     if (U_FAILURE(status)) { return; }
885     if (locale == NULL && collator_ == NULL) {
886         status = U_ILLEGAL_ARGUMENT_ERROR;
887         return;
888     }
889 
890     initialLabels_         = new UnicodeSet();
891     if (initialLabels_ == NULL) {
892         status = U_MEMORY_ALLOCATION_ERROR;
893         return;
894     }
895 
896     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
897     overflowLabel_ = inflowLabel_;
898     underflowLabel_ = inflowLabel_;
899 
900     if (collator_ == NULL) {
901         Collator *coll = Collator::createInstance(*locale, status);
902         if (U_FAILURE(status)) {
903             delete coll;
904             return;
905         }
906         if (coll == NULL) {
907             status = U_MEMORY_ALLOCATION_ERROR;
908             return;
909         }
910         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
911         if (collator_ == NULL) {
912             delete coll;
913             status = U_UNSUPPORTED_ERROR;
914             return;
915         }
916     }
917     collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
918     if (collatorPrimaryOnly_ == NULL) {
919         status = U_MEMORY_ALLOCATION_ERROR;
920         return;
921     }
922     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
923     firstCharsInScripts_ = firstStringsInScript(status);
924     if (U_FAILURE(status)) { return; }
925     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
926     // Guard against a degenerate collator where
927     // some script boundary strings are primary ignorable.
928     for (;;) {
929         if (U_FAILURE(status)) { return; }
930         if (firstCharsInScripts_->isEmpty()) {
931             // AlphabeticIndex requires some non-ignorable script boundary strings.
932             status = U_ILLEGAL_ARGUMENT_ERROR;
933             return;
934         }
935         if (collatorPrimaryOnly_->compare(
936                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
937                 emptyString_, status) == UCOL_EQUAL) {
938             firstCharsInScripts_->removeElementAt(0);
939         } else {
940             break;
941         }
942     }
943 
944     // Chinese index characters, which are specific to each of the several Chinese tailorings,
945     // take precedence over the single locale data exemplar set per language.
946     if (!addChineseIndexCharacters(status) && locale != NULL) {
947         addIndexExemplars(*locale, status);
948     }
949 }
950 
951 
952 //
953 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
954 //
955 static int32_t U_CALLCONV
collatorComparator(const void * context,const void * left,const void * right)956 collatorComparator(const void *context, const void *left, const void *right) {
957     const UElement *leftElement = static_cast<const UElement *>(left);
958     const UElement *rightElement = static_cast<const UElement *>(right);
959     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
960     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
961 
962     if (leftString == rightString) {
963         // Catches case where both are NULL
964         return 0;
965     }
966     if (leftString == NULL) {
967         return 1;
968     };
969     if (rightString == NULL) {
970         return -1;
971     }
972     const Collator *col = static_cast<const Collator *>(context);
973     UErrorCode errorCode = U_ZERO_ERROR;
974     return col->compare(*leftString, *rightString, errorCode);
975 }
976 
977 //
978 //  Comparison function for UVector<Record *> sorting with a collator.
979 //
980 static int32_t U_CALLCONV
recordCompareFn(const void * context,const void * left,const void * right)981 recordCompareFn(const void *context, const void *left, const void *right) {
982     const UElement *leftElement = static_cast<const UElement *>(left);
983     const UElement *rightElement = static_cast<const UElement *>(right);
984     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
985     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
986     const Collator *col = static_cast<const Collator *>(context);
987     UErrorCode errorCode = U_ZERO_ERROR;
988     return col->compare(leftRec->name_, rightRec->name_, errorCode);
989 }
990 
firstStringsInScript(UErrorCode & status)991 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
992     if (U_FAILURE(status)) {
993         return NULL;
994     }
995     LocalPointer<UVector> dest(new UVector(status));
996     if (dest.isNull()) {
997         status = U_MEMORY_ALLOCATION_ERROR;
998         return NULL;
999     }
1000     dest->setDeleter(uprv_deleteUObject);
1001     // Fetch the script-first-primary contractions which are defined in the root collator.
1002     // They all start with U+FDD1.
1003     UnicodeSet set;
1004     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
1005     if (U_FAILURE(status)) {
1006         return NULL;
1007     }
1008     if (set.isEmpty()) {
1009         status = U_UNSUPPORTED_ERROR;
1010         return NULL;
1011     }
1012     UnicodeSetIterator iter(set);
1013     while (iter.next()) {
1014         const UnicodeString &boundary = iter.getString();
1015         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1016         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1017             // Ignore boundaries for the special reordering groups.
1018             // Take only those for "real scripts" (where the sample character is a Letter,
1019             // and the one for unassigned implicit weights (Cn).
1020             continue;
1021         }
1022         UnicodeString *s = new UnicodeString(boundary);
1023         if (s == NULL) {
1024             status = U_MEMORY_ALLOCATION_ERROR;
1025             return NULL;
1026         }
1027         dest->addElement(s, status);
1028     }
1029     return dest.orphan();
1030 }
1031 
1032 
1033 namespace {
1034 
1035 /**
1036  * Returns true if one index character string is "better" than the other.
1037  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1038  * better, and otherwise binary-less-than is better.
1039  */
isOneLabelBetterThanOther(const Normalizer2 & nfkdNormalizer,const UnicodeString & one,const UnicodeString & other)1040 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1041                                 const UnicodeString &one, const UnicodeString &other) {
1042     // This is called with primary-equal strings, but never with one.equals(other).
1043     UErrorCode status = U_ZERO_ERROR;
1044     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1045     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1046     if (U_FAILURE(status)) { return FALSE; }
1047     int32_t result = n1.countChar32() - n2.countChar32();
1048     if (result != 0) {
1049         return result < 0;
1050     }
1051     result = n1.compareCodePointOrder(n2);
1052     if (result != 0) {
1053         return result < 0;
1054     }
1055     return one.compareCodePointOrder(other) < 0;
1056 }
1057 
1058 }  // namespace
1059 
1060 //
1061 //  Constructor & Destructor for AlphabeticIndex::Record
1062 //
1063 //     Records are internal only, instances are not directly surfaced in the public API.
1064 //     This class is mostly struct-like, with all public fields.
1065 
Record(const UnicodeString & name,const void * data)1066 AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1067         : name_(name), data_(data) {}
1068 
~Record()1069 AlphabeticIndex::Record::~Record() {
1070 }
1071 
1072 
addRecord(const UnicodeString & name,const void * data,UErrorCode & status)1073 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1074     if (U_FAILURE(status)) {
1075         return *this;
1076     }
1077     if (inputList_ == NULL) {
1078         inputList_ = new UVector(status);
1079         if (inputList_ == NULL) {
1080             status = U_MEMORY_ALLOCATION_ERROR;
1081             return *this;
1082         }
1083         inputList_->setDeleter(alphaIndex_deleteRecord);
1084     }
1085     Record *r = new Record(name, data);
1086     if (r == NULL) {
1087         status = U_MEMORY_ALLOCATION_ERROR;
1088         return *this;
1089     }
1090     inputList_->addElement(r, status);
1091     clearBuckets();
1092     //std::string ss;
1093     //std::string ss2;
1094     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1095     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1096     return *this;
1097 }
1098 
1099 
clearRecords(UErrorCode & status)1100 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1101     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1102         inputList_->removeAllElements();
1103         clearBuckets();
1104     }
1105     return *this;
1106 }
1107 
getBucketIndex(const UnicodeString & name,UErrorCode & status)1108 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1109     initBuckets(status);
1110     if (U_FAILURE(status)) {
1111         return 0;
1112     }
1113     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1114 }
1115 
1116 
getBucketIndex() const1117 int32_t AlphabeticIndex::getBucketIndex() const {
1118     return labelsIterIndex_;
1119 }
1120 
1121 
nextBucket(UErrorCode & status)1122 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1123     if (U_FAILURE(status)) {
1124         return FALSE;
1125     }
1126     if (buckets_ == NULL && currentBucket_ != NULL) {
1127         status = U_ENUM_OUT_OF_SYNC_ERROR;
1128         return FALSE;
1129     }
1130     initBuckets(status);
1131     if (U_FAILURE(status)) {
1132         return FALSE;
1133     }
1134     ++labelsIterIndex_;
1135     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1136         labelsIterIndex_ = buckets_->getBucketCount();
1137         return FALSE;
1138     }
1139     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1140     resetRecordIterator();
1141     return TRUE;
1142 }
1143 
getBucketLabel() const1144 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1145     if (currentBucket_ != NULL) {
1146         return currentBucket_->label_;
1147     } else {
1148         return emptyString_;
1149     }
1150 }
1151 
1152 
getBucketLabelType() const1153 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1154     if (currentBucket_ != NULL) {
1155         return currentBucket_->labelType_;
1156     } else {
1157         return U_ALPHAINDEX_NORMAL;
1158     }
1159 }
1160 
1161 
getBucketRecordCount() const1162 int32_t AlphabeticIndex::getBucketRecordCount() const {
1163     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1164         return currentBucket_->records_->size();
1165     } else {
1166         return 0;
1167     }
1168 }
1169 
1170 
resetBucketIterator(UErrorCode & status)1171 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1172     if (U_FAILURE(status)) {
1173         return *this;
1174     }
1175     internalResetBucketIterator();
1176     return *this;
1177 }
1178 
1179 
nextRecord(UErrorCode & status)1180 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1181     if (U_FAILURE(status)) {
1182         return FALSE;
1183     }
1184     if (currentBucket_ == NULL) {
1185         // We are trying to iterate over the items in a bucket, but there is no
1186         // current bucket from the enumeration of buckets.
1187         status = U_INVALID_STATE_ERROR;
1188         return FALSE;
1189     }
1190     if (buckets_ == NULL) {
1191         status = U_ENUM_OUT_OF_SYNC_ERROR;
1192         return FALSE;
1193     }
1194     if (currentBucket_->records_ == NULL) {
1195         return FALSE;
1196     }
1197     ++itemsIterIndex_;
1198     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1199         itemsIterIndex_  = currentBucket_->records_->size();
1200         return FALSE;
1201     }
1202     return TRUE;
1203 }
1204 
1205 
getRecordName() const1206 const UnicodeString &AlphabeticIndex::getRecordName() const {
1207     const UnicodeString *retStr = &emptyString_;
1208     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1209         itemsIterIndex_ >= 0 &&
1210         itemsIterIndex_ < currentBucket_->records_->size()) {
1211             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1212             retStr = &item->name_;
1213     }
1214     return *retStr;
1215 }
1216 
getRecordData() const1217 const void *AlphabeticIndex::getRecordData() const {
1218     const void *retPtr = NULL;
1219     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1220         itemsIterIndex_ >= 0 &&
1221         itemsIterIndex_ < currentBucket_->records_->size()) {
1222             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1223             retPtr = item->data_;
1224     }
1225     return retPtr;
1226 }
1227 
1228 
resetRecordIterator()1229 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1230     itemsIterIndex_ = -1;
1231     return *this;
1232 }
1233 
1234 
1235 
Bucket(const UnicodeString & label,const UnicodeString & lowerBoundary,UAlphabeticIndexLabelType type)1236 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1237                                 const UnicodeString &lowerBoundary,
1238                                 UAlphabeticIndexLabelType type)
1239         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1240           displayBucket_(NULL), displayIndex_(-1),
1241           records_(NULL) {
1242 }
1243 
1244 
~Bucket()1245 AlphabeticIndex::Bucket::~Bucket() {
1246     delete records_;
1247 }
1248 
1249 U_NAMESPACE_END
1250 
1251 #endif  // !UCONFIG_NO_COLLATION
1252