• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2007, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7 
8 /*
9 * File coleitr.cpp
10 *
11 *
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 *  Date      Name        Description
18 *
19 *  6/23/97   helena      Adding comments to make code more readable.
20 * 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99   aliu        Ported Thai collation support from Java.
22 * 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01   swquek      Removed CollationElementsIterator() since it is
24 *                        private constructor and no calls are made to it
25 */
26 
27 #include "unicode/utypes.h"
28 
29 #if !UCONFIG_NO_COLLATION
30 
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
33 #include "ucol_imp.h"
34 #include "cmemory.h"
35 
36 
37 /* Constants --------------------------------------------------------------- */
38 
39 U_NAMESPACE_BEGIN
40 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
42 
43 /* CollationElementIterator public constructor/destructor ------------------ */
44 
45 CollationElementIterator::CollationElementIterator(
46                                          const CollationElementIterator& other)
47                                          : UObject(other), isDataOwned_(TRUE)
48 {
49     UErrorCode status = U_ZERO_ERROR;
50     m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
51                                 &status);
52 
53     *this = other;
54 }
55 
~CollationElementIterator()56 CollationElementIterator::~CollationElementIterator()
57 {
58     if (isDataOwned_) {
59         ucol_closeElements(m_data_);
60     }
61 }
62 
63 /* CollationElementIterator public methods --------------------------------- */
64 
getOffset() const65 int32_t CollationElementIterator::getOffset() const
66 {
67     return ucol_getOffset(m_data_);
68 }
69 
70 /**
71 * Get the ordering priority of the next character in the string.
72 * @return the next character's ordering. Returns NULLORDER if an error has
73 *         occured or if the end of string has been reached
74 */
next(UErrorCode & status)75 int32_t CollationElementIterator::next(UErrorCode& status)
76 {
77     return ucol_next(m_data_, &status);
78 }
79 
operator !=(const CollationElementIterator & other) const80 UBool CollationElementIterator::operator!=(
81                                   const CollationElementIterator& other) const
82 {
83     return !(*this == other);
84 }
85 
operator ==(const CollationElementIterator & that) const86 UBool CollationElementIterator::operator==(
87                                     const CollationElementIterator& that) const
88 {
89     if (this == &that || m_data_ == that.m_data_) {
90         return TRUE;
91     }
92 
93     // option comparison
94     if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
95     {
96         return FALSE;
97     }
98 
99     // the constructor and setText always sets a length
100     // and we only compare the string not the contents of the normalization
101     // buffer
102     int thislength = m_data_->iteratordata_.endp -
103                      m_data_->iteratordata_.string;
104     int thatlength = that.m_data_->iteratordata_.endp -
105                      that.m_data_->iteratordata_.string;
106 
107     if (thislength != thatlength) {
108         return FALSE;
109     }
110 
111     if (uprv_memcmp(m_data_->iteratordata_.string,
112                     that.m_data_->iteratordata_.string,
113                     thislength * U_SIZEOF_UCHAR) != 0) {
114         return FALSE;
115     }
116     if (getOffset() != that.getOffset()) {
117         return FALSE;
118     }
119 
120     // checking normalization buffer
121     if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
122         if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
123             return FALSE;
124         }
125         // both are in the normalization buffer
126         if (m_data_->iteratordata_.pos
127             - m_data_->iteratordata_.writableBuffer
128             != that.m_data_->iteratordata_.pos
129             - that.m_data_->iteratordata_.writableBuffer) {
130             // not in the same position in the normalization buffer
131             return FALSE;
132         }
133     }
134     else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
135         return FALSE;
136     }
137     // checking ce position
138     return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
139             == (that.m_data_->iteratordata_.CEpos
140                                         - that.m_data_->iteratordata_.CEs);
141 }
142 
143 /**
144 * Get the ordering priority of the previous collation element in the string.
145 * @param status the error code status.
146 * @return the previous element's ordering. Returns NULLORDER if an error has
147 *         occured or if the start of string has been reached.
148 */
previous(UErrorCode & status)149 int32_t CollationElementIterator::previous(UErrorCode& status)
150 {
151     return ucol_previous(m_data_, &status);
152 }
153 
154 /**
155 * Resets the cursor to the beginning of the string.
156 */
reset()157 void CollationElementIterator::reset()
158 {
159     ucol_reset(m_data_);
160 }
161 
setOffset(int32_t newOffset,UErrorCode & status)162 void CollationElementIterator::setOffset(int32_t newOffset,
163                                          UErrorCode& status)
164 {
165     ucol_setOffset(m_data_, newOffset, &status);
166 }
167 
168 /**
169 * Sets the source to the new source string.
170 */
setText(const UnicodeString & source,UErrorCode & status)171 void CollationElementIterator::setText(const UnicodeString& source,
172                                        UErrorCode& status)
173 {
174     if (U_FAILURE(status)) {
175         return;
176     }
177 
178     int32_t length = source.length();
179     UChar *string = NULL;
180     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
181         uprv_free(m_data_->iteratordata_.string);
182     }
183     m_data_->isWritable = TRUE;
184     if (length > 0) {
185         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
186         /* test for NULL */
187         if (string == NULL) {
188             status = U_MEMORY_ALLOCATION_ERROR;
189             return;
190         }
191         u_memcpy(string, source.getBuffer(), length);
192     }
193     else {
194         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
195         /* test for NULL */
196         if (string == NULL) {
197             status = U_MEMORY_ALLOCATION_ERROR;
198             return;
199         }
200         *string = 0;
201     }
202     uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
203         &m_data_->iteratordata_);
204 
205     m_data_->reset_   = TRUE;
206 }
207 
208 // Sets the source to the new character iterator.
setText(CharacterIterator & source,UErrorCode & status)209 void CollationElementIterator::setText(CharacterIterator& source,
210                                        UErrorCode& status)
211 {
212     if (U_FAILURE(status))
213         return;
214 
215     int32_t length = source.getLength();
216     UChar *buffer = NULL;
217 
218     if (length == 0) {
219         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
220         /* test for NULL */
221         if (buffer == NULL) {
222             status = U_MEMORY_ALLOCATION_ERROR;
223             return;
224         }
225         *buffer = 0;
226     }
227     else {
228         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
229         /* test for NULL */
230         if (buffer == NULL) {
231             status = U_MEMORY_ALLOCATION_ERROR;
232             return;
233         }
234         /*
235         Using this constructor will prevent buffer from being removed when
236         string gets removed
237         */
238         UnicodeString string;
239         source.getText(string);
240         u_memcpy(buffer, string.getBuffer(), length);
241     }
242 
243     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
244         uprv_free(m_data_->iteratordata_.string);
245     }
246     m_data_->isWritable = TRUE;
247     uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
248         &m_data_->iteratordata_);
249     m_data_->reset_   = TRUE;
250 }
251 
strengthOrder(int32_t order) const252 int32_t CollationElementIterator::strengthOrder(int32_t order) const
253 {
254     UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
255     // Mask off the unwanted differences.
256     if (s == UCOL_PRIMARY) {
257         order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
258     }
259     else if (s == UCOL_SECONDARY) {
260         order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
261     }
262 
263     return order;
264 }
265 
266 /* CollationElementIterator private constructors/destructors --------------- */
267 
268 /**
269 * This is the "real" constructor for this class; it constructs an iterator
270 * over the source text using the specified collator
271 */
CollationElementIterator(const UnicodeString & sourceText,const RuleBasedCollator * order,UErrorCode & status)272 CollationElementIterator::CollationElementIterator(
273                                                const UnicodeString& sourceText,
274                                                const RuleBasedCollator* order,
275                                                UErrorCode& status)
276                                                : isDataOwned_(TRUE)
277 {
278     if (U_FAILURE(status)) {
279         return;
280     }
281 
282     int32_t length = sourceText.length();
283     UChar *string = NULL;
284 
285     if (length > 0) {
286         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
287         /* test for NULL */
288         if (string == NULL) {
289             status = U_MEMORY_ALLOCATION_ERROR;
290             return;
291         }
292         /*
293         Using this constructor will prevent buffer from being removed when
294         string gets removed
295         */
296         u_memcpy(string, sourceText.getBuffer(), length);
297     }
298     else {
299         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
300         /* test for NULL */
301         if (string == NULL) {
302             status = U_MEMORY_ALLOCATION_ERROR;
303             return;
304         }
305         *string = 0;
306     }
307     m_data_ = ucol_openElements(order->ucollator, string, length, &status);
308 
309     /* Test for buffer overflows */
310     if (U_FAILURE(status)) {
311         return;
312     }
313     m_data_->isWritable = TRUE;
314 }
315 
316 /**
317 * This is the "real" constructor for this class; it constructs an iterator over
318 * the source text using the specified collator
319 */
CollationElementIterator(const CharacterIterator & sourceText,const RuleBasedCollator * order,UErrorCode & status)320 CollationElementIterator::CollationElementIterator(
321                                            const CharacterIterator& sourceText,
322                                            const RuleBasedCollator* order,
323                                            UErrorCode& status)
324                                            : isDataOwned_(TRUE)
325 {
326     if (U_FAILURE(status))
327         return;
328 
329     // **** should I just drop this test? ****
330     /*
331     if ( sourceText.endIndex() != 0 )
332     {
333         // A CollationElementIterator is really a two-layered beast.
334         // Internally it uses a Normalizer to munge the source text into a form
335         // where all "composed" Unicode characters (such as \u00FC) are split into a
336         // normal character and a combining accent character.
337         // Afterward, CollationElementIterator does its own processing to handle
338         // expanding and contracting collation sequences, ignorables, and so on.
339 
340         Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
341                                 ? Normalizer::NO_OP : order->getDecomposition();
342 
343         text = new Normalizer(sourceText, decomp);
344         if (text == NULL)
345         status = U_MEMORY_ALLOCATION_ERROR;
346     }
347     */
348     int32_t length = sourceText.getLength();
349     UChar *buffer;
350     if (length > 0) {
351         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
352         /* test for NULL */
353         if (buffer == NULL) {
354             status = U_MEMORY_ALLOCATION_ERROR;
355             return;
356         }
357         /*
358         Using this constructor will prevent buffer from being removed when
359         string gets removed
360         */
361         UnicodeString string(buffer, length, length);
362         ((CharacterIterator &)sourceText).getText(string);
363         const UChar *temp = string.getBuffer();
364         u_memcpy(buffer, temp, length);
365     }
366     else {
367         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
368         /* test for NULL */
369         if (buffer == NULL) {
370             status = U_MEMORY_ALLOCATION_ERROR;
371             return;
372         }
373         *buffer = 0;
374     }
375     m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
376 
377     /* Test for buffer overflows */
378     if (U_FAILURE(status)) {
379         return;
380     }
381     m_data_->isWritable = TRUE;
382 }
383 
384 /* CollationElementIterator protected methods ----------------------------- */
385 
operator =(const CollationElementIterator & other)386 const CollationElementIterator& CollationElementIterator::operator=(
387                                          const CollationElementIterator& other)
388 {
389     if (this != &other)
390     {
391         UCollationElements *ucolelem      = this->m_data_;
392         UCollationElements *otherucolelem = other.m_data_;
393         collIterate        *coliter       = &(ucolelem->iteratordata_);
394         collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
395         int                length         = 0;
396 
397         // checking only UCOL_ITER_HASLEN is not enough here as we may be in
398         // the normalization buffer
399         length = othercoliter->endp - othercoliter->string;
400 
401         ucolelem->reset_         = otherucolelem->reset_;
402         ucolelem->isWritable     = TRUE;
403 
404         /* create a duplicate of string */
405         if (length > 0) {
406             coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
407             if(coliter->string != NULL) {
408                 uprv_memcpy(coliter->string, othercoliter->string,
409                     length * U_SIZEOF_UCHAR);
410             } else { // Error: couldn't allocate memory. No copying should be done
411                 length = 0;
412             }
413         }
414         else {
415             coliter->string = NULL;
416         }
417 
418         /* start and end of string */
419         coliter->endp = coliter->string + length;
420 
421         /* handle writable buffer here */
422 
423         if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
424             uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
425             if (wlength < coliter->writableBufSize) {
426                 uprv_memcpy(coliter->stackWritableBuffer,
427                     othercoliter->stackWritableBuffer,
428                     wlength * U_SIZEOF_UCHAR);
429             }
430             else {
431                 if (coliter->writableBuffer != coliter->stackWritableBuffer) {
432                     uprv_free(coliter->writableBuffer);
433                 }
434                 coliter->writableBuffer = (UChar *)uprv_malloc(
435                     wlength * U_SIZEOF_UCHAR);
436                 if(coliter->writableBuffer != NULL) {
437                     uprv_memcpy(coliter->writableBuffer,
438                         othercoliter->writableBuffer,
439                         wlength * U_SIZEOF_UCHAR);
440                     coliter->writableBufSize = wlength;
441                 } else { // Error: couldn't allocate memory for writableBuffer
442                     coliter->writableBufSize = 0;
443                 }
444             }
445         }
446 
447         /* current position */
448         if (othercoliter->pos >= othercoliter->string &&
449             othercoliter->pos <= othercoliter->endp)
450         {
451             coliter->pos = coliter->string +
452                 (othercoliter->pos - othercoliter->string);
453         }
454         else if (coliter->writableBuffer != NULL) {
455             coliter->pos = coliter->writableBuffer +
456                 (othercoliter->pos - othercoliter->writableBuffer);
457         }
458         else {
459             // Error: couldn't allocate memory for writableBuffer
460             coliter->pos = NULL;
461         }
462 
463         /* CE buffer */
464         int32_t CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
465         if (CEsize > 0) {
466             uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
467         }
468         coliter->toReturn = coliter->CEs +
469             (othercoliter->toReturn - othercoliter->CEs);
470         coliter->CEpos    = coliter->CEs + CEsize;
471 
472         if (othercoliter->fcdPosition != NULL) {
473             coliter->fcdPosition = coliter->string +
474                 (othercoliter->fcdPosition
475                 - othercoliter->string);
476         }
477         else {
478             coliter->fcdPosition = NULL;
479         }
480         coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
481         coliter->origFlags   = othercoliter->origFlags;
482         coliter->coll = othercoliter->coll;
483         this->isDataOwned_ = TRUE;
484     }
485 
486     return *this;
487 }
488 
489 U_NAMESPACE_END
490 
491 #endif /* #if !UCONFIG_NO_COLLATION */
492 
493 /* eof */
494