• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7 
8 /*
9 * File coleitr.cpp
10 *
11 *
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 *  Date      Name        Description
18 *
19 *  6/23/97   helena      Adding comments to make code more readable.
20 * 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99   aliu        Ported Thai collation support from Java.
22 * 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01   swquek      Removed CollationElementsIterator() since it is
24 *                        private constructor and no calls are made to it
25 */
26 
27 #include "unicode/utypes.h"
28 
29 #if !UCONFIG_NO_COLLATION
30 
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
33 #include "ucol_imp.h"
34 #include "cmemory.h"
35 
36 
37 /* Constants --------------------------------------------------------------- */
38 
39 U_NAMESPACE_BEGIN
40 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
42 
43 /* CollationElementIterator public constructor/destructor ------------------ */
44 
45 CollationElementIterator::CollationElementIterator(
46                                          const CollationElementIterator& other)
47                                          : UObject(other), isDataOwned_(TRUE)
48 {
49     UErrorCode status = U_ZERO_ERROR;
50     m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
51                                 &status);
52 
53     *this = other;
54 }
55 
~CollationElementIterator()56 CollationElementIterator::~CollationElementIterator()
57 {
58     if (isDataOwned_) {
59         ucol_closeElements(m_data_);
60     }
61 }
62 
63 /* CollationElementIterator public methods --------------------------------- */
64 
getOffset() const65 int32_t CollationElementIterator::getOffset() const
66 {
67     return ucol_getOffset(m_data_);
68 }
69 
70 /**
71 * Get the ordering priority of the next character in the string.
72 * @return the next character's ordering. Returns NULLORDER if an error has
73 *         occured or if the end of string has been reached
74 */
next(UErrorCode & status)75 int32_t CollationElementIterator::next(UErrorCode& status)
76 {
77     return ucol_next(m_data_, &status);
78 }
79 
operator !=(const CollationElementIterator & other) const80 UBool CollationElementIterator::operator!=(
81                                   const CollationElementIterator& other) const
82 {
83     return !(*this == other);
84 }
85 
operator ==(const CollationElementIterator & that) const86 UBool CollationElementIterator::operator==(
87                                     const CollationElementIterator& that) const
88 {
89     if (this == &that || m_data_ == that.m_data_) {
90         return TRUE;
91     }
92 
93     // option comparison
94     if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
95     {
96         return FALSE;
97     }
98 
99     // the constructor and setText always sets a length
100     // and we only compare the string not the contents of the normalization
101     // buffer
102     int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
103     int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
104 
105     if (thislength != thatlength) {
106         return FALSE;
107     }
108 
109     if (uprv_memcmp(m_data_->iteratordata_.string,
110                     that.m_data_->iteratordata_.string,
111                     thislength * U_SIZEOF_UCHAR) != 0) {
112         return FALSE;
113     }
114     if (getOffset() != that.getOffset()) {
115         return FALSE;
116     }
117 
118     // checking normalization buffer
119     if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
120         if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
121             return FALSE;
122         }
123         // both are in the normalization buffer
124         if (m_data_->iteratordata_.pos
125             - m_data_->iteratordata_.writableBuffer.getBuffer()
126             != that.m_data_->iteratordata_.pos
127             - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
128             // not in the same position in the normalization buffer
129             return FALSE;
130         }
131     }
132     else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
133         return FALSE;
134     }
135     // checking ce position
136     return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
137             == (that.m_data_->iteratordata_.CEpos
138                                         - that.m_data_->iteratordata_.CEs);
139 }
140 
141 /**
142 * Get the ordering priority of the previous collation element in the string.
143 * @param status the error code status.
144 * @return the previous element's ordering. Returns NULLORDER if an error has
145 *         occured or if the start of string has been reached.
146 */
previous(UErrorCode & status)147 int32_t CollationElementIterator::previous(UErrorCode& status)
148 {
149     return ucol_previous(m_data_, &status);
150 }
151 
152 /**
153 * Resets the cursor to the beginning of the string.
154 */
reset()155 void CollationElementIterator::reset()
156 {
157     ucol_reset(m_data_);
158 }
159 
setOffset(int32_t newOffset,UErrorCode & status)160 void CollationElementIterator::setOffset(int32_t newOffset,
161                                          UErrorCode& status)
162 {
163     ucol_setOffset(m_data_, newOffset, &status);
164 }
165 
166 /**
167 * Sets the source to the new source string.
168 */
setText(const UnicodeString & source,UErrorCode & status)169 void CollationElementIterator::setText(const UnicodeString& source,
170                                        UErrorCode& status)
171 {
172     if (U_FAILURE(status)) {
173         return;
174     }
175 
176     int32_t length = source.length();
177     UChar *string = NULL;
178     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
179         uprv_free((UChar *)m_data_->iteratordata_.string);
180     }
181     m_data_->isWritable = TRUE;
182     if (length > 0) {
183         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
184         /* test for NULL */
185         if (string == NULL) {
186             status = U_MEMORY_ALLOCATION_ERROR;
187             return;
188         }
189         u_memcpy(string, source.getBuffer(), length);
190     }
191     else {
192         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
193         /* test for NULL */
194         if (string == NULL) {
195             status = U_MEMORY_ALLOCATION_ERROR;
196             return;
197         }
198         *string = 0;
199     }
200     /* Free offsetBuffer before initializing it. */
201     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
202     uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
203         &m_data_->iteratordata_, &status);
204 
205     m_data_->reset_   = TRUE;
206 }
207 
208 // Sets the source to the new character iterator.
setText(CharacterIterator & source,UErrorCode & status)209 void CollationElementIterator::setText(CharacterIterator& source,
210                                        UErrorCode& status)
211 {
212     if (U_FAILURE(status))
213         return;
214 
215     int32_t length = source.getLength();
216     UChar *buffer = NULL;
217 
218     if (length == 0) {
219         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
220         /* test for NULL */
221         if (buffer == NULL) {
222             status = U_MEMORY_ALLOCATION_ERROR;
223             return;
224         }
225         *buffer = 0;
226     }
227     else {
228         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
229         /* test for NULL */
230         if (buffer == NULL) {
231             status = U_MEMORY_ALLOCATION_ERROR;
232             return;
233         }
234         /*
235         Using this constructor will prevent buffer from being removed when
236         string gets removed
237         */
238         UnicodeString string;
239         source.getText(string);
240         u_memcpy(buffer, string.getBuffer(), length);
241     }
242 
243     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
244         uprv_free((UChar *)m_data_->iteratordata_.string);
245     }
246     m_data_->isWritable = TRUE;
247     /* Free offsetBuffer before initializing it. */
248     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
249     uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
250         &m_data_->iteratordata_, &status);
251     m_data_->reset_   = TRUE;
252 }
253 
strengthOrder(int32_t order) const254 int32_t CollationElementIterator::strengthOrder(int32_t order) const
255 {
256     UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
257     // Mask off the unwanted differences.
258     if (s == UCOL_PRIMARY) {
259         order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
260     }
261     else if (s == UCOL_SECONDARY) {
262         order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
263     }
264 
265     return order;
266 }
267 
268 /* CollationElementIterator private constructors/destructors --------------- */
269 
270 /**
271 * This is the "real" constructor for this class; it constructs an iterator
272 * over the source text using the specified collator
273 */
CollationElementIterator(const UnicodeString & sourceText,const RuleBasedCollator * order,UErrorCode & status)274 CollationElementIterator::CollationElementIterator(
275                                                const UnicodeString& sourceText,
276                                                const RuleBasedCollator* order,
277                                                UErrorCode& status)
278                                                : isDataOwned_(TRUE)
279 {
280     if (U_FAILURE(status)) {
281         return;
282     }
283 
284     int32_t length = sourceText.length();
285     UChar *string = NULL;
286 
287     if (length > 0) {
288         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
289         /* test for NULL */
290         if (string == NULL) {
291             status = U_MEMORY_ALLOCATION_ERROR;
292             return;
293         }
294         /*
295         Using this constructor will prevent buffer from being removed when
296         string gets removed
297         */
298         u_memcpy(string, sourceText.getBuffer(), length);
299     }
300     else {
301         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
302         /* test for NULL */
303         if (string == NULL) {
304             status = U_MEMORY_ALLOCATION_ERROR;
305             return;
306         }
307         *string = 0;
308     }
309     m_data_ = ucol_openElements(order->ucollator, string, length, &status);
310 
311     /* Test for buffer overflows */
312     if (U_FAILURE(status)) {
313         return;
314     }
315     m_data_->isWritable = TRUE;
316 }
317 
318 /**
319 * This is the "real" constructor for this class; it constructs an iterator over
320 * the source text using the specified collator
321 */
CollationElementIterator(const CharacterIterator & sourceText,const RuleBasedCollator * order,UErrorCode & status)322 CollationElementIterator::CollationElementIterator(
323                                            const CharacterIterator& sourceText,
324                                            const RuleBasedCollator* order,
325                                            UErrorCode& status)
326                                            : isDataOwned_(TRUE)
327 {
328     if (U_FAILURE(status))
329         return;
330 
331     // **** should I just drop this test? ****
332     /*
333     if ( sourceText.endIndex() != 0 )
334     {
335         // A CollationElementIterator is really a two-layered beast.
336         // Internally it uses a Normalizer to munge the source text into a form
337         // where all "composed" Unicode characters (such as \u00FC) are split into a
338         // normal character and a combining accent character.
339         // Afterward, CollationElementIterator does its own processing to handle
340         // expanding and contracting collation sequences, ignorables, and so on.
341 
342         Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
343                                 ? Normalizer::NO_OP : order->getDecomposition();
344 
345         text = new Normalizer(sourceText, decomp);
346         if (text == NULL)
347         status = U_MEMORY_ALLOCATION_ERROR;
348     }
349     */
350     int32_t length = sourceText.getLength();
351     UChar *buffer;
352     if (length > 0) {
353         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
354         /* test for NULL */
355         if (buffer == NULL) {
356             status = U_MEMORY_ALLOCATION_ERROR;
357             return;
358         }
359         /*
360         Using this constructor will prevent buffer from being removed when
361         string gets removed
362         */
363         UnicodeString string(buffer, length, length);
364         ((CharacterIterator &)sourceText).getText(string);
365         const UChar *temp = string.getBuffer();
366         u_memcpy(buffer, temp, length);
367     }
368     else {
369         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
370         /* test for NULL */
371         if (buffer == NULL) {
372             status = U_MEMORY_ALLOCATION_ERROR;
373             return;
374         }
375         *buffer = 0;
376     }
377     m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
378 
379     /* Test for buffer overflows */
380     if (U_FAILURE(status)) {
381         return;
382     }
383     m_data_->isWritable = TRUE;
384 }
385 
386 /* CollationElementIterator protected methods ----------------------------- */
387 
operator =(const CollationElementIterator & other)388 const CollationElementIterator& CollationElementIterator::operator=(
389                                          const CollationElementIterator& other)
390 {
391     if (this != &other)
392     {
393         UCollationElements *ucolelem      = this->m_data_;
394         UCollationElements *otherucolelem = other.m_data_;
395         collIterate        *coliter       = &(ucolelem->iteratordata_);
396         collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
397         int                length         = 0;
398 
399         // checking only UCOL_ITER_HASLEN is not enough here as we may be in
400         // the normalization buffer
401         length = (int)(othercoliter->endp - othercoliter->string);
402 
403         ucolelem->reset_         = otherucolelem->reset_;
404         ucolelem->isWritable     = TRUE;
405 
406         /* create a duplicate of string */
407         if (length > 0) {
408             coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
409             if(coliter->string != NULL) {
410                 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
411                     length * U_SIZEOF_UCHAR);
412             } else { // Error: couldn't allocate memory. No copying should be done
413                 length = 0;
414             }
415         }
416         else {
417             coliter->string = NULL;
418         }
419 
420         /* start and end of string */
421         coliter->endp = coliter->string + length;
422 
423         /* handle writable buffer here */
424 
425         if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
426             coliter->writableBuffer = othercoliter->writableBuffer;
427             coliter->writableBuffer.getTerminatedBuffer();
428         }
429 
430         /* current position */
431         if (othercoliter->pos >= othercoliter->string &&
432             othercoliter->pos <= othercoliter->endp)
433         {
434             coliter->pos = coliter->string +
435                 (othercoliter->pos - othercoliter->string);
436         }
437         else {
438             coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
439                 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
440         }
441 
442         /* CE buffer */
443         int32_t CEsize;
444         if (coliter->extendCEs) {
445             uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
446             CEsize = sizeof(othercoliter->extendCEs);
447             if (CEsize > 0) {
448                 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
449                 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
450             }
451             coliter->toReturn = coliter->extendCEs +
452                 (othercoliter->toReturn - othercoliter->extendCEs);
453             coliter->CEpos    = coliter->extendCEs + CEsize;
454         } else {
455             CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
456             if (CEsize > 0) {
457                 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
458             }
459             coliter->toReturn = coliter->CEs +
460                 (othercoliter->toReturn - othercoliter->CEs);
461             coliter->CEpos    = coliter->CEs + CEsize;
462         }
463 
464         if (othercoliter->fcdPosition != NULL) {
465             coliter->fcdPosition = coliter->string +
466                 (othercoliter->fcdPosition
467                 - othercoliter->string);
468         }
469         else {
470             coliter->fcdPosition = NULL;
471         }
472         coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
473         coliter->origFlags   = othercoliter->origFlags;
474         coliter->coll = othercoliter->coll;
475         this->isDataOwned_ = TRUE;
476     }
477 
478     return *this;
479 }
480 
481 U_NAMESPACE_END
482 
483 #endif /* #if !UCONFIG_NO_COLLATION */
484 
485 /* eof */
486