1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2007, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 */
7
8 /*
9 * File coleitr.cpp
10 *
11 *
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 * Date Name Description
18 *
19 * 6/23/97 helena Adding comments to make code more readable.
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99 aliu Ported Thai collation support from Java.
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is
24 * private constructor and no calls are made to it
25 */
26
27 #include "unicode/utypes.h"
28
29 #if !UCONFIG_NO_COLLATION
30
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
33 #include "ucol_imp.h"
34 #include "cmemory.h"
35
36
37 /* Constants --------------------------------------------------------------- */
38
39 U_NAMESPACE_BEGIN
40
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
42
43 /* CollationElementIterator public constructor/destructor ------------------ */
44
45 CollationElementIterator::CollationElementIterator(
46 const CollationElementIterator& other)
47 : UObject(other), isDataOwned_(TRUE)
48 {
49 UErrorCode status = U_ZERO_ERROR;
50 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
51 &status);
52
53 *this = other;
54 }
55
~CollationElementIterator()56 CollationElementIterator::~CollationElementIterator()
57 {
58 if (isDataOwned_) {
59 ucol_closeElements(m_data_);
60 }
61 }
62
63 /* CollationElementIterator public methods --------------------------------- */
64
getOffset() const65 int32_t CollationElementIterator::getOffset() const
66 {
67 return ucol_getOffset(m_data_);
68 }
69
70 /**
71 * Get the ordering priority of the next character in the string.
72 * @return the next character's ordering. Returns NULLORDER if an error has
73 * occured or if the end of string has been reached
74 */
next(UErrorCode & status)75 int32_t CollationElementIterator::next(UErrorCode& status)
76 {
77 return ucol_next(m_data_, &status);
78 }
79
operator !=(const CollationElementIterator & other) const80 UBool CollationElementIterator::operator!=(
81 const CollationElementIterator& other) const
82 {
83 return !(*this == other);
84 }
85
operator ==(const CollationElementIterator & that) const86 UBool CollationElementIterator::operator==(
87 const CollationElementIterator& that) const
88 {
89 if (this == &that || m_data_ == that.m_data_) {
90 return TRUE;
91 }
92
93 // option comparison
94 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
95 {
96 return FALSE;
97 }
98
99 // the constructor and setText always sets a length
100 // and we only compare the string not the contents of the normalization
101 // buffer
102 int thislength = m_data_->iteratordata_.endp -
103 m_data_->iteratordata_.string;
104 int thatlength = that.m_data_->iteratordata_.endp -
105 that.m_data_->iteratordata_.string;
106
107 if (thislength != thatlength) {
108 return FALSE;
109 }
110
111 if (uprv_memcmp(m_data_->iteratordata_.string,
112 that.m_data_->iteratordata_.string,
113 thislength * U_SIZEOF_UCHAR) != 0) {
114 return FALSE;
115 }
116 if (getOffset() != that.getOffset()) {
117 return FALSE;
118 }
119
120 // checking normalization buffer
121 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
122 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
123 return FALSE;
124 }
125 // both are in the normalization buffer
126 if (m_data_->iteratordata_.pos
127 - m_data_->iteratordata_.writableBuffer
128 != that.m_data_->iteratordata_.pos
129 - that.m_data_->iteratordata_.writableBuffer) {
130 // not in the same position in the normalization buffer
131 return FALSE;
132 }
133 }
134 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
135 return FALSE;
136 }
137 // checking ce position
138 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
139 == (that.m_data_->iteratordata_.CEpos
140 - that.m_data_->iteratordata_.CEs);
141 }
142
143 /**
144 * Get the ordering priority of the previous collation element in the string.
145 * @param status the error code status.
146 * @return the previous element's ordering. Returns NULLORDER if an error has
147 * occured or if the start of string has been reached.
148 */
previous(UErrorCode & status)149 int32_t CollationElementIterator::previous(UErrorCode& status)
150 {
151 return ucol_previous(m_data_, &status);
152 }
153
154 /**
155 * Resets the cursor to the beginning of the string.
156 */
reset()157 void CollationElementIterator::reset()
158 {
159 ucol_reset(m_data_);
160 }
161
setOffset(int32_t newOffset,UErrorCode & status)162 void CollationElementIterator::setOffset(int32_t newOffset,
163 UErrorCode& status)
164 {
165 ucol_setOffset(m_data_, newOffset, &status);
166 }
167
168 /**
169 * Sets the source to the new source string.
170 */
setText(const UnicodeString & source,UErrorCode & status)171 void CollationElementIterator::setText(const UnicodeString& source,
172 UErrorCode& status)
173 {
174 if (U_FAILURE(status)) {
175 return;
176 }
177
178 int32_t length = source.length();
179 UChar *string = NULL;
180 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
181 uprv_free(m_data_->iteratordata_.string);
182 }
183 m_data_->isWritable = TRUE;
184 if (length > 0) {
185 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
186 /* test for NULL */
187 if (string == NULL) {
188 status = U_MEMORY_ALLOCATION_ERROR;
189 return;
190 }
191 u_memcpy(string, source.getBuffer(), length);
192 }
193 else {
194 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
195 /* test for NULL */
196 if (string == NULL) {
197 status = U_MEMORY_ALLOCATION_ERROR;
198 return;
199 }
200 *string = 0;
201 }
202 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
203 &m_data_->iteratordata_);
204
205 m_data_->reset_ = TRUE;
206 }
207
208 // Sets the source to the new character iterator.
setText(CharacterIterator & source,UErrorCode & status)209 void CollationElementIterator::setText(CharacterIterator& source,
210 UErrorCode& status)
211 {
212 if (U_FAILURE(status))
213 return;
214
215 int32_t length = source.getLength();
216 UChar *buffer = NULL;
217
218 if (length == 0) {
219 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
220 /* test for NULL */
221 if (buffer == NULL) {
222 status = U_MEMORY_ALLOCATION_ERROR;
223 return;
224 }
225 *buffer = 0;
226 }
227 else {
228 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
229 /* test for NULL */
230 if (buffer == NULL) {
231 status = U_MEMORY_ALLOCATION_ERROR;
232 return;
233 }
234 /*
235 Using this constructor will prevent buffer from being removed when
236 string gets removed
237 */
238 UnicodeString string;
239 source.getText(string);
240 u_memcpy(buffer, string.getBuffer(), length);
241 }
242
243 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
244 uprv_free(m_data_->iteratordata_.string);
245 }
246 m_data_->isWritable = TRUE;
247 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
248 &m_data_->iteratordata_);
249 m_data_->reset_ = TRUE;
250 }
251
strengthOrder(int32_t order) const252 int32_t CollationElementIterator::strengthOrder(int32_t order) const
253 {
254 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
255 // Mask off the unwanted differences.
256 if (s == UCOL_PRIMARY) {
257 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
258 }
259 else if (s == UCOL_SECONDARY) {
260 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
261 }
262
263 return order;
264 }
265
266 /* CollationElementIterator private constructors/destructors --------------- */
267
268 /**
269 * This is the "real" constructor for this class; it constructs an iterator
270 * over the source text using the specified collator
271 */
CollationElementIterator(const UnicodeString & sourceText,const RuleBasedCollator * order,UErrorCode & status)272 CollationElementIterator::CollationElementIterator(
273 const UnicodeString& sourceText,
274 const RuleBasedCollator* order,
275 UErrorCode& status)
276 : isDataOwned_(TRUE)
277 {
278 if (U_FAILURE(status)) {
279 return;
280 }
281
282 int32_t length = sourceText.length();
283 UChar *string = NULL;
284
285 if (length > 0) {
286 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
287 /* test for NULL */
288 if (string == NULL) {
289 status = U_MEMORY_ALLOCATION_ERROR;
290 return;
291 }
292 /*
293 Using this constructor will prevent buffer from being removed when
294 string gets removed
295 */
296 u_memcpy(string, sourceText.getBuffer(), length);
297 }
298 else {
299 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
300 /* test for NULL */
301 if (string == NULL) {
302 status = U_MEMORY_ALLOCATION_ERROR;
303 return;
304 }
305 *string = 0;
306 }
307 m_data_ = ucol_openElements(order->ucollator, string, length, &status);
308
309 /* Test for buffer overflows */
310 if (U_FAILURE(status)) {
311 return;
312 }
313 m_data_->isWritable = TRUE;
314 }
315
316 /**
317 * This is the "real" constructor for this class; it constructs an iterator over
318 * the source text using the specified collator
319 */
CollationElementIterator(const CharacterIterator & sourceText,const RuleBasedCollator * order,UErrorCode & status)320 CollationElementIterator::CollationElementIterator(
321 const CharacterIterator& sourceText,
322 const RuleBasedCollator* order,
323 UErrorCode& status)
324 : isDataOwned_(TRUE)
325 {
326 if (U_FAILURE(status))
327 return;
328
329 // **** should I just drop this test? ****
330 /*
331 if ( sourceText.endIndex() != 0 )
332 {
333 // A CollationElementIterator is really a two-layered beast.
334 // Internally it uses a Normalizer to munge the source text into a form
335 // where all "composed" Unicode characters (such as \u00FC) are split into a
336 // normal character and a combining accent character.
337 // Afterward, CollationElementIterator does its own processing to handle
338 // expanding and contracting collation sequences, ignorables, and so on.
339
340 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
341 ? Normalizer::NO_OP : order->getDecomposition();
342
343 text = new Normalizer(sourceText, decomp);
344 if (text == NULL)
345 status = U_MEMORY_ALLOCATION_ERROR;
346 }
347 */
348 int32_t length = sourceText.getLength();
349 UChar *buffer;
350 if (length > 0) {
351 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
352 /* test for NULL */
353 if (buffer == NULL) {
354 status = U_MEMORY_ALLOCATION_ERROR;
355 return;
356 }
357 /*
358 Using this constructor will prevent buffer from being removed when
359 string gets removed
360 */
361 UnicodeString string(buffer, length, length);
362 ((CharacterIterator &)sourceText).getText(string);
363 const UChar *temp = string.getBuffer();
364 u_memcpy(buffer, temp, length);
365 }
366 else {
367 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
368 /* test for NULL */
369 if (buffer == NULL) {
370 status = U_MEMORY_ALLOCATION_ERROR;
371 return;
372 }
373 *buffer = 0;
374 }
375 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
376
377 /* Test for buffer overflows */
378 if (U_FAILURE(status)) {
379 return;
380 }
381 m_data_->isWritable = TRUE;
382 }
383
384 /* CollationElementIterator protected methods ----------------------------- */
385
operator =(const CollationElementIterator & other)386 const CollationElementIterator& CollationElementIterator::operator=(
387 const CollationElementIterator& other)
388 {
389 if (this != &other)
390 {
391 UCollationElements *ucolelem = this->m_data_;
392 UCollationElements *otherucolelem = other.m_data_;
393 collIterate *coliter = &(ucolelem->iteratordata_);
394 collIterate *othercoliter = &(otherucolelem->iteratordata_);
395 int length = 0;
396
397 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
398 // the normalization buffer
399 length = othercoliter->endp - othercoliter->string;
400
401 ucolelem->reset_ = otherucolelem->reset_;
402 ucolelem->isWritable = TRUE;
403
404 /* create a duplicate of string */
405 if (length > 0) {
406 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
407 if(coliter->string != NULL) {
408 uprv_memcpy(coliter->string, othercoliter->string,
409 length * U_SIZEOF_UCHAR);
410 } else { // Error: couldn't allocate memory. No copying should be done
411 length = 0;
412 }
413 }
414 else {
415 coliter->string = NULL;
416 }
417
418 /* start and end of string */
419 coliter->endp = coliter->string + length;
420
421 /* handle writable buffer here */
422
423 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
424 uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
425 if (wlength < coliter->writableBufSize) {
426 uprv_memcpy(coliter->stackWritableBuffer,
427 othercoliter->stackWritableBuffer,
428 wlength * U_SIZEOF_UCHAR);
429 }
430 else {
431 if (coliter->writableBuffer != coliter->stackWritableBuffer) {
432 uprv_free(coliter->writableBuffer);
433 }
434 coliter->writableBuffer = (UChar *)uprv_malloc(
435 wlength * U_SIZEOF_UCHAR);
436 if(coliter->writableBuffer != NULL) {
437 uprv_memcpy(coliter->writableBuffer,
438 othercoliter->writableBuffer,
439 wlength * U_SIZEOF_UCHAR);
440 coliter->writableBufSize = wlength;
441 } else { // Error: couldn't allocate memory for writableBuffer
442 coliter->writableBufSize = 0;
443 }
444 }
445 }
446
447 /* current position */
448 if (othercoliter->pos >= othercoliter->string &&
449 othercoliter->pos <= othercoliter->endp)
450 {
451 coliter->pos = coliter->string +
452 (othercoliter->pos - othercoliter->string);
453 }
454 else if (coliter->writableBuffer != NULL) {
455 coliter->pos = coliter->writableBuffer +
456 (othercoliter->pos - othercoliter->writableBuffer);
457 }
458 else {
459 // Error: couldn't allocate memory for writableBuffer
460 coliter->pos = NULL;
461 }
462
463 /* CE buffer */
464 int32_t CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
465 if (CEsize > 0) {
466 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
467 }
468 coliter->toReturn = coliter->CEs +
469 (othercoliter->toReturn - othercoliter->CEs);
470 coliter->CEpos = coliter->CEs + CEsize;
471
472 if (othercoliter->fcdPosition != NULL) {
473 coliter->fcdPosition = coliter->string +
474 (othercoliter->fcdPosition
475 - othercoliter->string);
476 }
477 else {
478 coliter->fcdPosition = NULL;
479 }
480 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
481 coliter->origFlags = othercoliter->origFlags;
482 coliter->coll = othercoliter->coll;
483 this->isDataOwned_ = TRUE;
484 }
485
486 return *this;
487 }
488
489 U_NAMESPACE_END
490
491 #endif /* #if !UCONFIG_NO_COLLATION */
492
493 /* eof */
494