1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 #include "unicode/utypes.h"
8
9 #if !UCONFIG_NO_COLLATION
10
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
15 #include "itercoll.h"
16 #include "unicode/schriter.h"
17 #include "unicode/chariter.h"
18 #include "unicode/uchar.h"
19 #include "cmemory.h"
20
21 #define ARRAY_LENGTH(array) (sizeof array / sizeof array[0])
22
23 static UErrorCode status = U_ZERO_ERROR;
24
CollationIteratorTest()25 CollationIteratorTest::CollationIteratorTest()
26 : test1("What subset of all possible test cases?", ""),
27 test2("has the highest probability of detecting", "")
28 {
29 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
30 if(U_FAILURE(status)) {
31 delete en_us;
32 en_us = 0;
33 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
34 return;
35 }
36
37 }
38
~CollationIteratorTest()39 CollationIteratorTest::~CollationIteratorTest()
40 {
41 delete en_us;
42 }
43
44 /**
45 * Test for CollationElementIterator previous and next for the whole set of
46 * unicode characters.
47 */
TestUnicodeChar()48 void CollationIteratorTest::TestUnicodeChar()
49 {
50 CollationElementIterator *iter;
51 UChar codepoint;
52 UnicodeString source;
53
54 for (codepoint = 1; codepoint < 0xFFFE;)
55 {
56 source.remove();
57
58 while (codepoint % 0xFF != 0)
59 {
60 if (u_isdefined(codepoint))
61 source += codepoint;
62 codepoint ++;
63 }
64
65 if (u_isdefined(codepoint))
66 source += codepoint;
67
68 if (codepoint != 0xFFFF)
69 codepoint ++;
70
71 iter = en_us->createCollationElementIterator(source);
72 /* A basic test to see if it's working at all */
73 backAndForth(*iter);
74 delete iter;
75 }
76 }
77
78 /**
79 * Test for CollationElementIterator.previous()
80 *
81 * @bug 4108758 - Make sure it works with contracting characters
82 *
83 */
TestPrevious()84 void CollationIteratorTest::TestPrevious(/* char* par */)
85 {
86 UErrorCode status = U_ZERO_ERROR;
87 CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
88
89 // A basic test to see if it's working at all
90 backAndForth(*iter);
91 delete iter;
92
93 // Test with a contracting character sequence
94 UnicodeString source;
95 RuleBasedCollator *c1 = NULL;
96 c1 = new RuleBasedCollator(
97 (UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status);
98
99 if (c1 == NULL || U_FAILURE(status))
100 {
101 errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
102 delete c1;
103 return;
104 }
105
106 source = "abchdcba";
107 iter = c1->createCollationElementIterator(source);
108 backAndForth(*iter);
109 delete iter;
110 delete c1;
111
112 // Test with an expanding character sequence
113 RuleBasedCollator *c2 = NULL;
114 c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status);
115
116 if (c2 == NULL || U_FAILURE(status))
117 {
118 errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
119 delete c2;
120 return;
121 }
122
123 source = "abcd";
124 iter = c2->createCollationElementIterator(source);
125 backAndForth(*iter);
126 delete iter;
127 delete c2;
128
129 // Now try both
130 RuleBasedCollator *c3 = NULL;
131 c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status);
132
133 if (c3 == NULL || U_FAILURE(status))
134 {
135 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
136 delete c3;
137 return;
138 }
139
140 source = "abcdbchdc";
141 iter = c3->createCollationElementIterator(source);
142 backAndForth(*iter);
143 delete iter;
144 delete c3;
145
146 status=U_ZERO_ERROR;
147 source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
148
149 Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status);
150 if(U_FAILURE(status)){
151 errln("Couldn't create a collator");
152 }
153 iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source);
154 backAndForth(*iter);
155 delete iter;
156 delete c4;
157
158 source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
159 Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status);
160
161 iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source);
162 if(U_FAILURE(status)){
163 errln("Couldn't create Japanese collator\n");
164 }
165 backAndForth(*iter);
166 delete iter;
167 delete c5;
168 }
169
170 /**
171 * Test for getOffset() and setOffset()
172 */
TestOffset()173 void CollationIteratorTest::TestOffset(/* char* par */)
174 {
175 CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
176 UErrorCode status = U_ZERO_ERROR;
177 // testing boundaries
178 iter->setOffset(0, status);
179 if (U_FAILURE(status) || iter->previous(status) != UCOL_NULLORDER) {
180 errln("Error: After setting offset to 0, we should be at the end "
181 "of the backwards iteration");
182 }
183 iter->setOffset(test1.length(), status);
184 if (U_FAILURE(status) || iter->next(status) != UCOL_NULLORDER) {
185 errln("Error: After setting offset to end of the string, we should "
186 "be at the end of the backwards iteration");
187 }
188
189 // Run all the way through the iterator, then get the offset
190 int32_t orderLength = 0;
191 Order *orders = getOrders(*iter, orderLength);
192
193 int32_t offset = iter->getOffset();
194
195 if (offset != test1.length())
196 {
197 UnicodeString msg1("offset at end != length: ");
198 UnicodeString msg2(" vs ");
199
200 errln(msg1 + offset + msg2 + test1.length());
201 }
202
203 // Now set the offset back to the beginning and see if it works
204 CollationElementIterator *pristine = en_us->createCollationElementIterator(test1);
205
206 iter->setOffset(0, status);
207
208 if (U_FAILURE(status))
209 {
210 errln("setOffset failed.");
211 }
212 else
213 {
214 assertEqual(*iter, *pristine);
215 }
216
217 // TODO: try iterating halfway through a messy string.
218
219 delete pristine;
220 delete[] orders;
221 delete iter;
222 }
223
224 /**
225 * Test for setText()
226 */
TestSetText()227 void CollationIteratorTest::TestSetText(/* char* par */)
228 {
229 CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1);
230 CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2);
231 UErrorCode status = U_ZERO_ERROR;
232
233 // Run through the second iterator just to exercise it
234 int32_t c = iter2->next(status);
235 int32_t i = 0;
236
237 while ( ++i < 10 && c != CollationElementIterator::NULLORDER)
238 {
239 if (U_FAILURE(status))
240 {
241 errln("iter2->next() returned an error.");
242 delete iter2;
243 delete iter1;
244 }
245
246 c = iter2->next(status);
247 }
248
249 // Now set it to point to the same string as the first iterator
250 iter2->setText(test1, status);
251
252 if (U_FAILURE(status))
253 {
254 errln("call to iter2->setText(test1) failed.");
255 }
256 else
257 {
258 assertEqual(*iter1, *iter2);
259 }
260 iter1->reset();
261 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
262 CharacterIterator* chariter = new StringCharacterIterator(test1);
263 iter2->setText(*chariter, status);
264 if (U_FAILURE(status))
265 {
266 errln("call to iter2->setText(chariter(test1)) failed.");
267 }
268 else
269 {
270 assertEqual(*iter1, *iter2);
271 }
272
273 // test for an empty string
274 UnicodeString empty("");
275 iter1->setText(empty, status);
276 if (U_FAILURE(status)
277 || iter1->next(status) != (int32_t)UCOL_NULLORDER) {
278 errln("Empty string should have no CEs.");
279 }
280 ((StringCharacterIterator *)chariter)->setText(empty);
281 iter1->setText(*chariter, status);
282 if (U_FAILURE(status)
283 || iter1->next(status) != (int32_t)UCOL_NULLORDER) {
284 errln("Empty string should have no CEs.");
285 }
286 delete chariter;
287 delete iter2;
288 delete iter1;
289 }
290
291 /** @bug 4108762
292 * Test for getMaxExpansion()
293 */
TestMaxExpansion()294 void CollationIteratorTest::TestMaxExpansion(/* char* par */)
295 {
296 UErrorCode status = U_ZERO_ERROR;
297 UnicodeString rule("&a < ab < c/aba < d < z < ch");
298 RuleBasedCollator *coll = new RuleBasedCollator(rule, status);
299 UChar ch = 0;
300 UnicodeString str(ch);
301
302 CollationElementIterator *iter = coll->createCollationElementIterator(str);
303
304 while (ch < 0xFFFF && U_SUCCESS(status)) {
305 int count = 1;
306 uint32_t order;
307 ch ++;
308 UnicodeString str(ch);
309 iter->setText(str, status);
310 order = iter->previous(status);
311
312 /* thai management */
313 if (CollationElementIterator::isIgnorable(order))
314 order = iter->previous(status);
315
316 while (U_SUCCESS(status)
317 && iter->previous(status) != (int32_t)UCOL_NULLORDER)
318 {
319 count ++;
320 }
321
322 if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) {
323 errln("Failure at codepoint %d, maximum expansion count < %d\n",
324 ch, count);
325 }
326 }
327
328 delete iter;
329 delete coll;
330 }
331
332 /*
333 * @bug 4157299
334 */
TestClearBuffers()335 void CollationIteratorTest::TestClearBuffers(/* char* par */)
336 {
337 UErrorCode status = U_ZERO_ERROR;
338 RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status);
339
340 if (c == NULL || U_FAILURE(status))
341 {
342 errln("Couldn't create a RuleBasedCollator.");
343 delete c;
344 return;
345 }
346
347 UnicodeString source("abcd");
348 CollationElementIterator *i = c->createCollationElementIterator(source);
349 int32_t e0 = i->next(status); // save the first collation element
350
351 if (U_FAILURE(status))
352 {
353 errln("call to i->next() failed. err=%s", u_errorName(status));
354 }
355 else
356 {
357 i->setOffset(3, status); // go to the expanding character
358
359 if (U_FAILURE(status))
360 {
361 errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
362 }
363 else
364 {
365 i->next(status); // but only use up half of it
366
367 if (U_FAILURE(status))
368 {
369 errln("call to i->next() failed. err=%s", u_errorName(status));
370 }
371 else
372 {
373 i->setOffset(0, status); // go back to the beginning
374
375 if (U_FAILURE(status))
376 {
377 errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
378 }
379 else
380 {
381 int32_t e = i->next(status); // and get this one again
382
383 if (U_FAILURE(status))
384 {
385 errln("call to i->next() failed. err=%s", u_errorName(status));
386 }
387 else if (e != e0)
388 {
389 errln("got 0x%X, expected 0x%X", e, e0);
390 }
391 }
392 }
393 }
394 }
395
396 delete i;
397 delete c;
398 }
399
400 /**
401 * Testing the assignment operator
402 */
TestAssignment()403 void CollationIteratorTest::TestAssignment()
404 {
405 UErrorCode status = U_ZERO_ERROR;
406 RuleBasedCollator *coll =
407 (RuleBasedCollator *)Collator::createInstance(status);
408
409 if (coll == NULL || U_FAILURE(status))
410 {
411 errln("Couldn't create a default collator.");
412 return;
413 }
414
415 UnicodeString source("abcd");
416 CollationElementIterator *iter1 =
417 coll->createCollationElementIterator(source);
418
419 CollationElementIterator iter2 = *iter1;
420
421 if (*iter1 != iter2) {
422 errln("Fail collation iterator assignment does not produce the same elements");
423 }
424
425 CollationElementIterator iter3(*iter1);
426
427 if (*iter1 != iter3) {
428 errln("Fail collation iterator copy constructor does not produce the same elements");
429 }
430
431 source = CharsToUnicodeString("a\\u0300\\u0325");
432 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
433 CollationElementIterator *iter4
434 = coll->createCollationElementIterator(source);
435 CollationElementIterator iter5(*iter4);
436 if (*iter4 != iter5) {
437 errln("collation iterator assignment does not produce the same elements");
438 }
439 iter4->next(status);
440 if (U_FAILURE(status) || *iter4 == iter5) {
441 errln("collation iterator not equal");
442 }
443 iter5.next(status);
444 if (U_FAILURE(status) || *iter4 != iter5) {
445 errln("collation iterator equal");
446 }
447 iter4->next(status);
448 if (U_FAILURE(status) || *iter4 == iter5) {
449 errln("collation iterator not equal");
450 }
451 iter5.next(status);
452 if (U_FAILURE(status) || *iter4 != iter5) {
453 errln("collation iterator equal");
454 }
455 CollationElementIterator iter6(*iter4);
456 if (*iter4 != iter6) {
457 errln("collation iterator equal");
458 }
459 iter4->next(status);
460 if (U_FAILURE(status) || *iter4 == iter5) {
461 errln("collation iterator not equal");
462 }
463 iter5.next(status);
464 if (U_FAILURE(status) || *iter4 != iter5) {
465 errln("collation iterator equal");
466 }
467 iter4->next(status);
468 if (U_FAILURE(status) || *iter4 == iter5) {
469 errln("collation iterator not equal");
470 }
471 iter5.next(status);
472 if (U_FAILURE(status) || *iter4 != iter5) {
473 errln("collation iterator equal");
474 }
475 delete iter1;
476 delete iter4;
477 delete coll;
478 }
479
480 /**
481 * Testing the constructors
482 */
TestConstructors()483 void CollationIteratorTest::TestConstructors()
484 {
485 UErrorCode status = U_ZERO_ERROR;
486 RuleBasedCollator *coll =
487 (RuleBasedCollator *)Collator::createInstance(status);
488 if (coll == NULL || U_FAILURE(status))
489 {
490 errln("Couldn't create a default collator.");
491 return;
492 }
493
494 // testing protected constructor with character iterator as argument
495 StringCharacterIterator chariter(test1);
496 CollationElementIterator *iter1 =
497 coll->createCollationElementIterator(chariter);
498 if (U_FAILURE(status)) {
499 errln("Couldn't create collation element iterator with character iterator.");
500 return;
501 }
502 CollationElementIterator *iter2 =
503 coll->createCollationElementIterator(test1);
504
505 // initially the 2 collation element iterators should be the same
506 if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
507 || *iter2 != *iter1) {
508 errln("CollationElementIterators constructed with the same string data should be the same at the start");
509 }
510 assertEqual(*iter1, *iter2);
511
512 delete iter1;
513 delete iter2;
514
515 // tests empty strings
516 UnicodeString empty("");
517 iter1 = coll->createCollationElementIterator(empty);
518 chariter.setText(empty);
519 iter2 = coll->createCollationElementIterator(chariter);
520 if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
521 || *iter2 != *iter1) {
522 errln("CollationElementIterators constructed with the same string data should be the same at the start");
523 }
524 if (iter1->next(status) != (int32_t)UCOL_NULLORDER) {
525 errln("Empty string should have no CEs.");
526 }
527 if (iter2->next(status) != (int32_t)UCOL_NULLORDER) {
528 errln("Empty string should have no CEs.");
529 }
530 delete iter1;
531 delete iter2;
532 delete coll;
533 }
534
535 /**
536 * Testing the strength order
537 */
TestStrengthOrder()538 void CollationIteratorTest::TestStrengthOrder()
539 {
540 int order = 0x0123ABCD;
541
542 UErrorCode status = U_ZERO_ERROR;
543 RuleBasedCollator *coll =
544 (RuleBasedCollator *)Collator::createInstance(status);
545 if (coll == NULL || U_FAILURE(status))
546 {
547 errln("Couldn't create a default collator.");
548 return;
549 }
550
551 coll->setStrength(Collator::PRIMARY);
552 CollationElementIterator *iter =
553 coll->createCollationElementIterator(test1);
554
555 if (iter == NULL) {
556 errln("Couldn't create a collation element iterator from default collator");
557 return;
558 }
559
560 if (iter->strengthOrder(order) != 0x01230000) {
561 errln("Strength order for a primary strength collator should be the first 2 bytes");
562 return;
563 }
564
565 coll->setStrength(Collator::SECONDARY);
566 if (iter->strengthOrder(order) != 0x0123AB00) {
567 errln("Strength order for a secondary strength collator should be the third byte");
568 return;
569 }
570
571 coll->setStrength(Collator::TERTIARY);
572 if (iter->strengthOrder(order) != order) {
573 errln("Strength order for a tertiary strength collator should be the third byte");
574 return;
575 }
576 delete iter;
577 delete coll;
578 }
579
580 /**
581 * Return a string containing all of the collation orders
582 * returned by calls to next on the specified iterator
583 */
orderString(CollationElementIterator & iter,UnicodeString & target)584 UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target)
585 {
586 int32_t order;
587 UErrorCode status = U_ZERO_ERROR;
588
589 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
590 {
591 target += "0x";
592 appendHex(order, 8, target);
593 target += " ";
594 }
595
596 return target;
597 }
598
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)599 void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
600 {
601 int32_t c1, c2, count = 0;
602 UErrorCode status = U_ZERO_ERROR;
603
604 do
605 {
606 c1 = i1.next(status);
607 c2 = i2.next(status);
608
609 if (c1 != c2)
610 {
611 errln(" %d: strength(0x%X) != strength(0x%X)", count, c1, c2);
612 break;
613 }
614
615 count += 1;
616 }
617 while (c1 != CollationElementIterator::NULLORDER);
618 }
619
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)620 void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/)
621 {
622 if (exec)
623 {
624 logln("Collation Iteration Tests: ");
625 }
626
627 if(en_us) {
628 switch (index)
629 {
630 case 0: name = "TestPrevious"; if (exec) TestPrevious(/* par */); break;
631 case 1: name = "TestOffset"; if (exec) TestOffset(/* par */); break;
632 case 2: name = "TestSetText"; if (exec) TestSetText(/* par */); break;
633 case 3: name = "TestMaxExpansion"; if (exec) TestMaxExpansion(/* par */); break;
634 case 4: name = "TestClearBuffers"; if (exec) TestClearBuffers(/* par */); break;
635 case 5: name = "TestUnicodeChar"; if (exec) TestUnicodeChar(/* par */); break;
636 case 6: name = "TestAssignment"; if (exec) TestAssignment(/* par */); break;
637 case 7: name = "TestConstructors"; if (exec) TestConstructors(/* par */); break;
638 case 8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break;
639 default: name = ""; break;
640 }
641 } else {
642 dataerrln("Class iterator not instantiated");
643 name = "";
644 }
645 }
646
647 #endif /* #if !UCONFIG_NO_COLLATION */
648