1 /*
2 *******************************************************************************
3 * Copyright (C) 1996-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 */
7
8 /*
9 * File coleitr.cpp
10 *
11 *
12 *
13 * Created by: Helena Shih
14 *
15 * Modification History:
16 *
17 * Date Name Description
18 *
19 * 6/23/97 helena Adding comments to make code more readable.
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
21 * 12/10/99 aliu Ported Thai collation support from Java.
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is
24 * private constructor and no calls are made to it
25 */
26
27 #include "unicode/utypes.h"
28
29 #if !UCONFIG_NO_COLLATION
30
31 #include "unicode/coleitr.h"
32 #include "unicode/ustring.h"
33 #include "ucol_imp.h"
34 #include "uassert.h"
35 #include "cmemory.h"
36
37
38 /* Constants --------------------------------------------------------------- */
39
40 U_NAMESPACE_BEGIN
41
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
43
44 /* CollationElementIterator public constructor/destructor ------------------ */
45
46 CollationElementIterator::CollationElementIterator(
47 const CollationElementIterator& other)
48 : UObject(other), isDataOwned_(TRUE)
49 {
50 UErrorCode status = U_ZERO_ERROR;
51 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
52 &status);
53
54 *this = other;
55 }
56
~CollationElementIterator()57 CollationElementIterator::~CollationElementIterator()
58 {
59 if (isDataOwned_) {
60 ucol_closeElements(m_data_);
61 }
62 }
63
64 /* CollationElementIterator public methods --------------------------------- */
65
getOffset() const66 int32_t CollationElementIterator::getOffset() const
67 {
68 return ucol_getOffset(m_data_);
69 }
70
71 /**
72 * Get the ordering priority of the next character in the string.
73 * @return the next character's ordering. Returns NULLORDER if an error has
74 * occured or if the end of string has been reached
75 */
next(UErrorCode & status)76 int32_t CollationElementIterator::next(UErrorCode& status)
77 {
78 return ucol_next(m_data_, &status);
79 }
80
operator !=(const CollationElementIterator & other) const81 UBool CollationElementIterator::operator!=(
82 const CollationElementIterator& other) const
83 {
84 return !(*this == other);
85 }
86
operator ==(const CollationElementIterator & that) const87 UBool CollationElementIterator::operator==(
88 const CollationElementIterator& that) const
89 {
90 if (this == &that || m_data_ == that.m_data_) {
91 return TRUE;
92 }
93
94 // option comparison
95 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
96 {
97 return FALSE;
98 }
99
100 // the constructor and setText always sets a length
101 // and we only compare the string not the contents of the normalization
102 // buffer
103 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
104 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
105
106 if (thislength != thatlength) {
107 return FALSE;
108 }
109
110 if (uprv_memcmp(m_data_->iteratordata_.string,
111 that.m_data_->iteratordata_.string,
112 thislength * U_SIZEOF_UCHAR) != 0) {
113 return FALSE;
114 }
115 if (getOffset() != that.getOffset()) {
116 return FALSE;
117 }
118
119 // checking normalization buffer
120 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
121 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
122 return FALSE;
123 }
124 // both are in the normalization buffer
125 if (m_data_->iteratordata_.pos
126 - m_data_->iteratordata_.writableBuffer.getBuffer()
127 != that.m_data_->iteratordata_.pos
128 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
129 // not in the same position in the normalization buffer
130 return FALSE;
131 }
132 }
133 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
134 return FALSE;
135 }
136 // checking ce position
137 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
138 == (that.m_data_->iteratordata_.CEpos
139 - that.m_data_->iteratordata_.CEs);
140 }
141
142 /**
143 * Get the ordering priority of the previous collation element in the string.
144 * @param status the error code status.
145 * @return the previous element's ordering. Returns NULLORDER if an error has
146 * occured or if the start of string has been reached.
147 */
previous(UErrorCode & status)148 int32_t CollationElementIterator::previous(UErrorCode& status)
149 {
150 return ucol_previous(m_data_, &status);
151 }
152
153 /**
154 * Resets the cursor to the beginning of the string.
155 */
reset()156 void CollationElementIterator::reset()
157 {
158 ucol_reset(m_data_);
159 }
160
setOffset(int32_t newOffset,UErrorCode & status)161 void CollationElementIterator::setOffset(int32_t newOffset,
162 UErrorCode& status)
163 {
164 ucol_setOffset(m_data_, newOffset, &status);
165 }
166
167 /**
168 * Sets the source to the new source string.
169 */
setText(const UnicodeString & source,UErrorCode & status)170 void CollationElementIterator::setText(const UnicodeString& source,
171 UErrorCode& status)
172 {
173 if (U_FAILURE(status)) {
174 return;
175 }
176
177 int32_t length = source.length();
178 UChar *string = NULL;
179 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
180 uprv_free((UChar *)m_data_->iteratordata_.string);
181 }
182 m_data_->isWritable = TRUE;
183 if (length > 0) {
184 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
185 /* test for NULL */
186 if (string == NULL) {
187 status = U_MEMORY_ALLOCATION_ERROR;
188 return;
189 }
190 u_memcpy(string, source.getBuffer(), length);
191 }
192 else {
193 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
194 /* test for NULL */
195 if (string == NULL) {
196 status = U_MEMORY_ALLOCATION_ERROR;
197 return;
198 }
199 *string = 0;
200 }
201 /* Free offsetBuffer before initializing it. */
202 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
203 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
204 &m_data_->iteratordata_, &status);
205
206 m_data_->reset_ = TRUE;
207 }
208
209 // Sets the source to the new character iterator.
setText(CharacterIterator & source,UErrorCode & status)210 void CollationElementIterator::setText(CharacterIterator& source,
211 UErrorCode& status)
212 {
213 if (U_FAILURE(status))
214 return;
215
216 int32_t length = source.getLength();
217 UChar *buffer = NULL;
218
219 if (length == 0) {
220 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
221 /* test for NULL */
222 if (buffer == NULL) {
223 status = U_MEMORY_ALLOCATION_ERROR;
224 return;
225 }
226 *buffer = 0;
227 }
228 else {
229 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
230 /* test for NULL */
231 if (buffer == NULL) {
232 status = U_MEMORY_ALLOCATION_ERROR;
233 return;
234 }
235 /*
236 Using this constructor will prevent buffer from being removed when
237 string gets removed
238 */
239 UnicodeString string;
240 source.getText(string);
241 u_memcpy(buffer, string.getBuffer(), length);
242 }
243
244 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
245 uprv_free((UChar *)m_data_->iteratordata_.string);
246 }
247 m_data_->isWritable = TRUE;
248 /* Free offsetBuffer before initializing it. */
249 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
250 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
251 &m_data_->iteratordata_, &status);
252 m_data_->reset_ = TRUE;
253 }
254
strengthOrder(int32_t order) const255 int32_t CollationElementIterator::strengthOrder(int32_t order) const
256 {
257 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
258 // Mask off the unwanted differences.
259 if (s == UCOL_PRIMARY) {
260 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
261 }
262 else if (s == UCOL_SECONDARY) {
263 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
264 }
265
266 return order;
267 }
268
269 /* CollationElementIterator private constructors/destructors --------------- */
270
271 /**
272 * This is the "real" constructor for this class; it constructs an iterator
273 * over the source text using the specified collator
274 */
CollationElementIterator(const UnicodeString & sourceText,const RuleBasedCollator * order,UErrorCode & status)275 CollationElementIterator::CollationElementIterator(
276 const UnicodeString& sourceText,
277 const RuleBasedCollator* order,
278 UErrorCode& status)
279 : isDataOwned_(TRUE)
280 {
281 if (U_FAILURE(status)) {
282 return;
283 }
284
285 int32_t length = sourceText.length();
286 UChar *string = NULL;
287
288 if (length > 0) {
289 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
290 /* test for NULL */
291 if (string == NULL) {
292 status = U_MEMORY_ALLOCATION_ERROR;
293 return;
294 }
295 /*
296 Using this constructor will prevent buffer from being removed when
297 string gets removed
298 */
299 u_memcpy(string, sourceText.getBuffer(), length);
300 }
301 else {
302 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
303 /* test for NULL */
304 if (string == NULL) {
305 status = U_MEMORY_ALLOCATION_ERROR;
306 return;
307 }
308 *string = 0;
309 }
310 m_data_ = ucol_openElements(order->ucollator, string, length, &status);
311
312 /* Test for buffer overflows */
313 if (U_FAILURE(status)) {
314 return;
315 }
316 m_data_->isWritable = TRUE;
317 }
318
319 /**
320 * This is the "real" constructor for this class; it constructs an iterator over
321 * the source text using the specified collator
322 */
CollationElementIterator(const CharacterIterator & sourceText,const RuleBasedCollator * order,UErrorCode & status)323 CollationElementIterator::CollationElementIterator(
324 const CharacterIterator& sourceText,
325 const RuleBasedCollator* order,
326 UErrorCode& status)
327 : isDataOwned_(TRUE)
328 {
329 if (U_FAILURE(status))
330 return;
331
332 // **** should I just drop this test? ****
333 /*
334 if ( sourceText.endIndex() != 0 )
335 {
336 // A CollationElementIterator is really a two-layered beast.
337 // Internally it uses a Normalizer to munge the source text into a form
338 // where all "composed" Unicode characters (such as \u00FC) are split into a
339 // normal character and a combining accent character.
340 // Afterward, CollationElementIterator does its own processing to handle
341 // expanding and contracting collation sequences, ignorables, and so on.
342
343 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
344 ? Normalizer::NO_OP : order->getDecomposition();
345
346 text = new Normalizer(sourceText, decomp);
347 if (text == NULL)
348 status = U_MEMORY_ALLOCATION_ERROR;
349 }
350 */
351 int32_t length = sourceText.getLength();
352 UChar *buffer;
353 if (length > 0) {
354 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
355 /* test for NULL */
356 if (buffer == NULL) {
357 status = U_MEMORY_ALLOCATION_ERROR;
358 return;
359 }
360 /*
361 Using this constructor will prevent buffer from being removed when
362 string gets removed
363 */
364 UnicodeString string(buffer, length, length);
365 ((CharacterIterator &)sourceText).getText(string);
366 const UChar *temp = string.getBuffer();
367 u_memcpy(buffer, temp, length);
368 }
369 else {
370 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
371 /* test for NULL */
372 if (buffer == NULL) {
373 status = U_MEMORY_ALLOCATION_ERROR;
374 return;
375 }
376 *buffer = 0;
377 }
378 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
379
380 /* Test for buffer overflows */
381 if (U_FAILURE(status)) {
382 return;
383 }
384 m_data_->isWritable = TRUE;
385 }
386
387 /* CollationElementIterator protected methods ----------------------------- */
388
operator =(const CollationElementIterator & other)389 const CollationElementIterator& CollationElementIterator::operator=(
390 const CollationElementIterator& other)
391 {
392 if (this != &other)
393 {
394 UCollationElements *ucolelem = this->m_data_;
395 UCollationElements *otherucolelem = other.m_data_;
396 collIterate *coliter = &(ucolelem->iteratordata_);
397 collIterate *othercoliter = &(otherucolelem->iteratordata_);
398 int length = 0;
399
400 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
401 // the normalization buffer
402 length = (int)(othercoliter->endp - othercoliter->string);
403
404 ucolelem->reset_ = otherucolelem->reset_;
405 ucolelem->isWritable = TRUE;
406
407 /* create a duplicate of string */
408 if (length > 0) {
409 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
410 if(coliter->string != NULL) {
411 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
412 length * U_SIZEOF_UCHAR);
413 } else { // Error: couldn't allocate memory. No copying should be done
414 length = 0;
415 }
416 }
417 else {
418 coliter->string = NULL;
419 }
420
421 /* start and end of string */
422 coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
423
424 /* handle writable buffer here */
425
426 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
427 coliter->writableBuffer = othercoliter->writableBuffer;
428 coliter->writableBuffer.getTerminatedBuffer();
429 }
430
431 /* current position */
432 if (othercoliter->pos >= othercoliter->string &&
433 othercoliter->pos <= othercoliter->endp)
434 {
435 U_ASSERT(coliter->string != NULL);
436 coliter->pos = coliter->string +
437 (othercoliter->pos - othercoliter->string);
438 }
439 else {
440 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
441 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
442 }
443
444 /* CE buffer */
445 int32_t CEsize;
446 if (coliter->extendCEs) {
447 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
448 CEsize = sizeof(othercoliter->extendCEs);
449 if (CEsize > 0) {
450 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
451 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
452 }
453 coliter->toReturn = coliter->extendCEs +
454 (othercoliter->toReturn - othercoliter->extendCEs);
455 coliter->CEpos = coliter->extendCEs + CEsize;
456 } else {
457 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
458 if (CEsize > 0) {
459 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
460 }
461 coliter->toReturn = coliter->CEs +
462 (othercoliter->toReturn - othercoliter->CEs);
463 coliter->CEpos = coliter->CEs + CEsize;
464 }
465
466 if (othercoliter->fcdPosition != NULL) {
467 U_ASSERT(coliter->string != NULL);
468 coliter->fcdPosition = coliter->string +
469 (othercoliter->fcdPosition
470 - othercoliter->string);
471 }
472 else {
473 coliter->fcdPosition = NULL;
474 }
475 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
476 coliter->origFlags = othercoliter->origFlags;
477 coliter->coll = othercoliter->coll;
478 this->isDataOwned_ = TRUE;
479 }
480
481 return *this;
482 }
483
484 U_NAMESPACE_END
485
486 #endif /* #if !UCONFIG_NO_COLLATION */
487
488 /* eof */
489