• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7 
8 /*
9 * File coleitr.cpp
10 *
11 *
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 *  Date      Name        Description
18 *
19 *  6/23/97   helena      Adding comments to make code more readable.
20 * 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99   aliu        Ported Thai collation support from Java.
22 * 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01   swquek      Removed CollationElementsIterator() since it is
24 *                        private constructor and no calls are made to it
25 */
26 
27 #include "unicode/utypes.h"
28 
29 #if !UCONFIG_NO_COLLATION
30 
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
33 #include "ucol_imp.h"
34 #include "uassert.h"
35 #include "cmemory.h"
36 
37 
38 /* Constants --------------------------------------------------------------- */
39 
40 U_NAMESPACE_BEGIN
41 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
43 
44 /* CollationElementIterator public constructor/destructor ------------------ */
45 
46 CollationElementIterator::CollationElementIterator(
47                                          const CollationElementIterator& other)
48                                          : UObject(other), isDataOwned_(TRUE)
49 {
50     UErrorCode status = U_ZERO_ERROR;
51     m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
52                                 &status);
53 
54     *this = other;
55 }
56 
~CollationElementIterator()57 CollationElementIterator::~CollationElementIterator()
58 {
59     if (isDataOwned_) {
60         ucol_closeElements(m_data_);
61     }
62 }
63 
64 /* CollationElementIterator public methods --------------------------------- */
65 
getOffset() const66 int32_t CollationElementIterator::getOffset() const
67 {
68     return ucol_getOffset(m_data_);
69 }
70 
71 /**
72 * Get the ordering priority of the next character in the string.
73 * @return the next character's ordering. Returns NULLORDER if an error has
74 *         occured or if the end of string has been reached
75 */
next(UErrorCode & status)76 int32_t CollationElementIterator::next(UErrorCode& status)
77 {
78     return ucol_next(m_data_, &status);
79 }
80 
operator !=(const CollationElementIterator & other) const81 UBool CollationElementIterator::operator!=(
82                                   const CollationElementIterator& other) const
83 {
84     return !(*this == other);
85 }
86 
operator ==(const CollationElementIterator & that) const87 UBool CollationElementIterator::operator==(
88                                     const CollationElementIterator& that) const
89 {
90     if (this == &that || m_data_ == that.m_data_) {
91         return TRUE;
92     }
93 
94     // option comparison
95     if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
96     {
97         return FALSE;
98     }
99 
100     // the constructor and setText always sets a length
101     // and we only compare the string not the contents of the normalization
102     // buffer
103     int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
104     int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
105 
106     if (thislength != thatlength) {
107         return FALSE;
108     }
109 
110     if (uprv_memcmp(m_data_->iteratordata_.string,
111                     that.m_data_->iteratordata_.string,
112                     thislength * U_SIZEOF_UCHAR) != 0) {
113         return FALSE;
114     }
115     if (getOffset() != that.getOffset()) {
116         return FALSE;
117     }
118 
119     // checking normalization buffer
120     if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
121         if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
122             return FALSE;
123         }
124         // both are in the normalization buffer
125         if (m_data_->iteratordata_.pos
126             - m_data_->iteratordata_.writableBuffer.getBuffer()
127             != that.m_data_->iteratordata_.pos
128             - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
129             // not in the same position in the normalization buffer
130             return FALSE;
131         }
132     }
133     else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
134         return FALSE;
135     }
136     // checking ce position
137     return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
138             == (that.m_data_->iteratordata_.CEpos
139                                         - that.m_data_->iteratordata_.CEs);
140 }
141 
142 /**
143 * Get the ordering priority of the previous collation element in the string.
144 * @param status the error code status.
145 * @return the previous element's ordering. Returns NULLORDER if an error has
146 *         occured or if the start of string has been reached.
147 */
previous(UErrorCode & status)148 int32_t CollationElementIterator::previous(UErrorCode& status)
149 {
150     return ucol_previous(m_data_, &status);
151 }
152 
153 /**
154 * Resets the cursor to the beginning of the string.
155 */
reset()156 void CollationElementIterator::reset()
157 {
158     ucol_reset(m_data_);
159 }
160 
setOffset(int32_t newOffset,UErrorCode & status)161 void CollationElementIterator::setOffset(int32_t newOffset,
162                                          UErrorCode& status)
163 {
164     ucol_setOffset(m_data_, newOffset, &status);
165 }
166 
167 /**
168 * Sets the source to the new source string.
169 */
setText(const UnicodeString & source,UErrorCode & status)170 void CollationElementIterator::setText(const UnicodeString& source,
171                                        UErrorCode& status)
172 {
173     if (U_FAILURE(status)) {
174         return;
175     }
176 
177     int32_t length = source.length();
178     UChar *string = NULL;
179     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
180         uprv_free((UChar *)m_data_->iteratordata_.string);
181     }
182     m_data_->isWritable = TRUE;
183     if (length > 0) {
184         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
185         /* test for NULL */
186         if (string == NULL) {
187             status = U_MEMORY_ALLOCATION_ERROR;
188             return;
189         }
190         u_memcpy(string, source.getBuffer(), length);
191     }
192     else {
193         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
194         /* test for NULL */
195         if (string == NULL) {
196             status = U_MEMORY_ALLOCATION_ERROR;
197             return;
198         }
199         *string = 0;
200     }
201     /* Free offsetBuffer before initializing it. */
202     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
203     uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
204         &m_data_->iteratordata_, &status);
205 
206     m_data_->reset_   = TRUE;
207 }
208 
209 // Sets the source to the new character iterator.
setText(CharacterIterator & source,UErrorCode & status)210 void CollationElementIterator::setText(CharacterIterator& source,
211                                        UErrorCode& status)
212 {
213     if (U_FAILURE(status))
214         return;
215 
216     int32_t length = source.getLength();
217     UChar *buffer = NULL;
218 
219     if (length == 0) {
220         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
221         /* test for NULL */
222         if (buffer == NULL) {
223             status = U_MEMORY_ALLOCATION_ERROR;
224             return;
225         }
226         *buffer = 0;
227     }
228     else {
229         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
230         /* test for NULL */
231         if (buffer == NULL) {
232             status = U_MEMORY_ALLOCATION_ERROR;
233             return;
234         }
235         /*
236         Using this constructor will prevent buffer from being removed when
237         string gets removed
238         */
239         UnicodeString string;
240         source.getText(string);
241         u_memcpy(buffer, string.getBuffer(), length);
242     }
243 
244     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
245         uprv_free((UChar *)m_data_->iteratordata_.string);
246     }
247     m_data_->isWritable = TRUE;
248     /* Free offsetBuffer before initializing it. */
249     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
250     uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
251         &m_data_->iteratordata_, &status);
252     m_data_->reset_   = TRUE;
253 }
254 
strengthOrder(int32_t order) const255 int32_t CollationElementIterator::strengthOrder(int32_t order) const
256 {
257     UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
258     // Mask off the unwanted differences.
259     if (s == UCOL_PRIMARY) {
260         order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
261     }
262     else if (s == UCOL_SECONDARY) {
263         order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
264     }
265 
266     return order;
267 }
268 
269 /* CollationElementIterator private constructors/destructors --------------- */
270 
271 /**
272 * This is the "real" constructor for this class; it constructs an iterator
273 * over the source text using the specified collator
274 */
CollationElementIterator(const UnicodeString & sourceText,const RuleBasedCollator * order,UErrorCode & status)275 CollationElementIterator::CollationElementIterator(
276                                                const UnicodeString& sourceText,
277                                                const RuleBasedCollator* order,
278                                                UErrorCode& status)
279                                                : isDataOwned_(TRUE)
280 {
281     if (U_FAILURE(status)) {
282         return;
283     }
284 
285     int32_t length = sourceText.length();
286     UChar *string = NULL;
287 
288     if (length > 0) {
289         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
290         /* test for NULL */
291         if (string == NULL) {
292             status = U_MEMORY_ALLOCATION_ERROR;
293             return;
294         }
295         /*
296         Using this constructor will prevent buffer from being removed when
297         string gets removed
298         */
299         u_memcpy(string, sourceText.getBuffer(), length);
300     }
301     else {
302         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
303         /* test for NULL */
304         if (string == NULL) {
305             status = U_MEMORY_ALLOCATION_ERROR;
306             return;
307         }
308         *string = 0;
309     }
310     m_data_ = ucol_openElements(order->ucollator, string, length, &status);
311 
312     /* Test for buffer overflows */
313     if (U_FAILURE(status)) {
314         return;
315     }
316     m_data_->isWritable = TRUE;
317 }
318 
319 /**
320 * This is the "real" constructor for this class; it constructs an iterator over
321 * the source text using the specified collator
322 */
CollationElementIterator(const CharacterIterator & sourceText,const RuleBasedCollator * order,UErrorCode & status)323 CollationElementIterator::CollationElementIterator(
324                                            const CharacterIterator& sourceText,
325                                            const RuleBasedCollator* order,
326                                            UErrorCode& status)
327                                            : isDataOwned_(TRUE)
328 {
329     if (U_FAILURE(status))
330         return;
331 
332     // **** should I just drop this test? ****
333     /*
334     if ( sourceText.endIndex() != 0 )
335     {
336         // A CollationElementIterator is really a two-layered beast.
337         // Internally it uses a Normalizer to munge the source text into a form
338         // where all "composed" Unicode characters (such as \u00FC) are split into a
339         // normal character and a combining accent character.
340         // Afterward, CollationElementIterator does its own processing to handle
341         // expanding and contracting collation sequences, ignorables, and so on.
342 
343         Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
344                                 ? Normalizer::NO_OP : order->getDecomposition();
345 
346         text = new Normalizer(sourceText, decomp);
347         if (text == NULL)
348         status = U_MEMORY_ALLOCATION_ERROR;
349     }
350     */
351     int32_t length = sourceText.getLength();
352     UChar *buffer;
353     if (length > 0) {
354         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
355         /* test for NULL */
356         if (buffer == NULL) {
357             status = U_MEMORY_ALLOCATION_ERROR;
358             return;
359         }
360         /*
361         Using this constructor will prevent buffer from being removed when
362         string gets removed
363         */
364         UnicodeString string(buffer, length, length);
365         ((CharacterIterator &)sourceText).getText(string);
366         const UChar *temp = string.getBuffer();
367         u_memcpy(buffer, temp, length);
368     }
369     else {
370         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
371         /* test for NULL */
372         if (buffer == NULL) {
373             status = U_MEMORY_ALLOCATION_ERROR;
374             return;
375         }
376         *buffer = 0;
377     }
378     m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
379 
380     /* Test for buffer overflows */
381     if (U_FAILURE(status)) {
382         return;
383     }
384     m_data_->isWritable = TRUE;
385 }
386 
387 /* CollationElementIterator protected methods ----------------------------- */
388 
operator =(const CollationElementIterator & other)389 const CollationElementIterator& CollationElementIterator::operator=(
390                                          const CollationElementIterator& other)
391 {
392     if (this != &other)
393     {
394         UCollationElements *ucolelem      = this->m_data_;
395         UCollationElements *otherucolelem = other.m_data_;
396         collIterate        *coliter       = &(ucolelem->iteratordata_);
397         collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
398         int                length         = 0;
399 
400         // checking only UCOL_ITER_HASLEN is not enough here as we may be in
401         // the normalization buffer
402         length = (int)(othercoliter->endp - othercoliter->string);
403 
404         ucolelem->reset_         = otherucolelem->reset_;
405         ucolelem->isWritable     = TRUE;
406 
407         /* create a duplicate of string */
408         if (length > 0) {
409             coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
410             if(coliter->string != NULL) {
411                 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
412                     length * U_SIZEOF_UCHAR);
413             } else { // Error: couldn't allocate memory. No copying should be done
414                 length = 0;
415             }
416         }
417         else {
418             coliter->string = NULL;
419         }
420 
421         /* start and end of string */
422         coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
423 
424         /* handle writable buffer here */
425 
426         if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
427             coliter->writableBuffer = othercoliter->writableBuffer;
428             coliter->writableBuffer.getTerminatedBuffer();
429         }
430 
431         /* current position */
432         if (othercoliter->pos >= othercoliter->string &&
433             othercoliter->pos <= othercoliter->endp)
434         {
435             U_ASSERT(coliter->string != NULL);
436             coliter->pos = coliter->string +
437                 (othercoliter->pos - othercoliter->string);
438         }
439         else {
440             coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
441                 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
442         }
443 
444         /* CE buffer */
445         int32_t CEsize;
446         if (coliter->extendCEs) {
447             uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
448             CEsize = sizeof(othercoliter->extendCEs);
449             if (CEsize > 0) {
450                 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
451                 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
452             }
453             coliter->toReturn = coliter->extendCEs +
454                 (othercoliter->toReturn - othercoliter->extendCEs);
455             coliter->CEpos    = coliter->extendCEs + CEsize;
456         } else {
457             CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
458             if (CEsize > 0) {
459                 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
460             }
461             coliter->toReturn = coliter->CEs +
462                 (othercoliter->toReturn - othercoliter->CEs);
463             coliter->CEpos    = coliter->CEs + CEsize;
464         }
465 
466         if (othercoliter->fcdPosition != NULL) {
467             U_ASSERT(coliter->string != NULL);
468             coliter->fcdPosition = coliter->string +
469                 (othercoliter->fcdPosition
470                 - othercoliter->string);
471         }
472         else {
473             coliter->fcdPosition = NULL;
474         }
475         coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
476         coliter->origFlags   = othercoliter->origFlags;
477         coliter->coll = othercoliter->coll;
478         this->isDataOwned_ = TRUE;
479     }
480 
481     return *this;
482 }
483 
484 U_NAMESPACE_END
485 
486 #endif /* #if !UCONFIG_NO_COLLATION */
487 
488 /* eof */
489