1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 #include "unicode/utypes.h"
8
9 #if !UCONFIG_NO_COLLATION
10
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
15 #include "regcoll.h"
16 #include "sfwdchit.h"
17 #include "testutil.h"
18 #include "cmemory.h"
19
20 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
21
CollationRegressionTest()22 CollationRegressionTest::CollationRegressionTest()
23 {
24 UErrorCode status = U_ZERO_ERROR;
25
26 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
27 if(U_FAILURE(status)) {
28 delete en_us;
29 en_us = 0;
30 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
31 return;
32 }
33 }
34
~CollationRegressionTest()35 CollationRegressionTest::~CollationRegressionTest()
36 {
37 delete en_us;
38 }
39
40
41 // @bug 4048446
42 //
43 // CollationElementIterator.reset() doesn't work
44 //
Test4048446()45 void CollationRegressionTest::Test4048446(/* char* par */)
46 {
47 const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
48 const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
49 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
50 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
51 UErrorCode status = U_ZERO_ERROR;
52
53 if (i1 == NULL|| i2 == NULL)
54 {
55 errln("Could not create CollationElementIterator's");
56 delete i1;
57 delete i2;
58 return;
59 }
60
61 while (i1->next(status) != CollationElementIterator::NULLORDER)
62 {
63 if (U_FAILURE(status))
64 {
65 errln("error calling next()");
66
67 delete i1;
68 delete i2;
69 return;
70 }
71 }
72
73 i1->reset();
74
75 assertEqual(*i1, *i2);
76
77 delete i1;
78 delete i2;
79 }
80
81 // @bug 4051866
82 //
83 // Collator -> rules -> Collator round-trip broken for expanding characters
84 //
Test4051866()85 void CollationRegressionTest::Test4051866(/* char* par */)
86 {
87 /*
88 RuleBasedCollator c1 = new RuleBasedCollator("< o "
89 +"& oe ,o\u3080"
90 +"& oe ,\u1530 ,O"
91 +"& OE ,O\u3080"
92 +"& OE ,\u1520"
93 +"< p ,P");
94 */
95
96 UnicodeString rules;
97 UErrorCode status = U_ZERO_ERROR;
98
99 rules += "< o ";
100 rules += "& oe ,o";
101 rules += (UChar)0x3080;
102 rules += "& oe ,";
103 rules += (UChar)0x1530;
104 rules += " ,O";
105 rules += "& OE ,O";
106 rules += (UChar)0x3080;
107 rules += "& OE ,";
108 rules += (UChar)0x1520;
109 rules += "< p ,P";
110
111 // Build a collator containing expanding characters
112 RuleBasedCollator *c1 = new RuleBasedCollator(rules, status);
113
114 // Build another using the rules from the first
115 RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status);
116
117 // Make sure they're the same
118 if (!(c1->getRules() == c2->getRules()))
119 {
120 errln("Rules are not equal");
121 }
122
123 delete c2;
124 delete c1;
125 }
126
127 // @bug 4053636
128 //
129 // Collator thinks "black-bird" == "black"
130 //
Test4053636()131 void CollationRegressionTest::Test4053636(/* char* par */)
132 {
133 if (en_us->equals("black_bird", "black"))
134 {
135 errln("black-bird == black");
136 }
137 }
138
139 // @bug 4054238
140 //
141 // CollationElementIterator will not work correctly if the associated
142 // Collator object's mode is changed
143 //
Test4054238()144 void CollationRegressionTest::Test4054238(/* char* par */)
145 {
146 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
147 const UnicodeString test3(chars3);
148 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
149
150 // NOTE: The Java code uses en_us to create the CollationElementIterators
151 // but I'm pretty sure that's wrong, so I've changed this to use c.
152 UErrorCode status = U_ZERO_ERROR;
153 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
154 CollationElementIterator *i1 = c->createCollationElementIterator(test3);
155 delete i1;
156 delete c;
157 }
158
159 // @bug 4054734
160 //
161 // Collator::IDENTICAL documented but not implemented
162 //
Test4054734()163 void CollationRegressionTest::Test4054734(/* char* par */)
164 {
165 /*
166 Here's the original Java:
167
168 String[] decomp = {
169 "\u0001", "<", "\u0002",
170 "\u0001", "=", "\u0001",
171 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
172 "\u00C0", "=", "A\u0300" // Decomp should make these equal
173 };
174
175 String[] nodecomp = {
176 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
177 };
178 */
179
180 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
181 {
182 {0x0001, 0}, {0x3c, 0}, {0x0002, 0},
183 {0x0001, 0}, {0x3d, 0}, {0x0001, 0},
184 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
185 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
186 };
187
188
189 UErrorCode status = U_ZERO_ERROR;
190 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
191
192 c->setStrength(Collator::IDENTICAL);
193
194 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
195 compareArray(*c, decomp, ARRAY_LENGTH(decomp));
196
197 delete c;
198 }
199
200 // @bug 4054736
201 //
202 // Full Decomposition mode not implemented
203 //
Test4054736()204 void CollationRegressionTest::Test4054736(/* char* par */)
205 {
206 UErrorCode status = U_ZERO_ERROR;
207 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
208
209 c->setStrength(Collator::SECONDARY);
210 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
211
212 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
213 {
214 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed
215 };
216
217 compareArray(*c, tests, ARRAY_LENGTH(tests));
218
219 delete c;
220 }
221
222 // @bug 4058613
223 //
224 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
225 //
Test4058613()226 void CollationRegressionTest::Test4058613(/* char* par */)
227 {
228 // Creating a default collator doesn't work when Korean is the default
229 // locale
230
231 Locale oldDefault = Locale::getDefault();
232 UErrorCode status = U_ZERO_ERROR;
233
234 Locale::setDefault(Locale::getKorean(), status);
235
236 if (U_FAILURE(status))
237 {
238 errln("Could not set default locale to Locale::KOREAN");
239 return;
240 }
241
242 Collator *c = NULL;
243
244 c = Collator::createInstance("en_US", status);
245
246 if (c == NULL || U_FAILURE(status))
247 {
248 errln("Could not create a Korean collator");
249 Locale::setDefault(oldDefault, status);
250 delete c;
251 return;
252 }
253
254 // Since the fix to this bug was to turn off decomposition for Korean collators,
255 // ensure that's what we got
256 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
257 {
258 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
259 }
260
261 delete c;
262
263 Locale::setDefault(oldDefault, status);
264 }
265
266 // @bug 4059820
267 //
268 // RuleBasedCollator.getRules does not return the exact pattern as input
269 // for expanding character sequences
270 //
Test4059820()271 void CollationRegressionTest::Test4059820(/* char* par */)
272 {
273 UErrorCode status = U_ZERO_ERROR;
274
275 RuleBasedCollator *c = NULL;
276 UnicodeString rules = "< a < b , c/a < d < z";
277
278 c = new RuleBasedCollator(rules, status);
279
280 if (c == NULL || U_FAILURE(status))
281 {
282 errln("Failure building a collator.");
283 delete c;
284 return;
285 }
286
287 if ( c->getRules().indexOf("c/a") == -1)
288 {
289 errln("returned rules do not contain 'c/a'");
290 }
291
292 delete c;
293 }
294
295 // @bug 4060154
296 //
297 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
298 //
Test4060154()299 void CollationRegressionTest::Test4060154(/* char* par */)
300 {
301 UErrorCode status = U_ZERO_ERROR;
302 UnicodeString rules;
303
304 rules += "< g, G < h, H < i, I < j, J";
305 rules += " & H < ";
306 rules += (UChar)0x0131;
307 rules += ", ";
308 rules += (UChar)0x0130;
309 rules += ", i, I";
310
311 RuleBasedCollator *c = NULL;
312
313 c = new RuleBasedCollator(rules, status);
314
315 if (c == NULL || U_FAILURE(status))
316 {
317 errln("failure building collator.");
318 delete c;
319 return;
320 }
321
322 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
323
324 /*
325 String[] tertiary = {
326 "A", "<", "B",
327 "H", "<", "\u0131",
328 "H", "<", "I",
329 "\u0131", "<", "\u0130",
330 "\u0130", "<", "i",
331 "\u0130", ">", "H",
332 };
333 */
334
335 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
336 {
337 {0x41, 0}, {0x3c, 0}, {0x42, 0},
338 {0x48, 0}, {0x3c, 0}, {0x0131, 0},
339 {0x48, 0}, {0x3c, 0}, {0x49, 0},
340 {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
341 {0x0130, 0}, {0x3c, 0}, {0x69, 0},
342 {0x0130, 0}, {0x3e, 0}, {0x48, 0}
343 };
344
345 c->setStrength(Collator::TERTIARY);
346 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
347
348 /*
349 String[] secondary = {
350 "H", "<", "I",
351 "\u0131", "=", "\u0130",
352 };
353 */
354 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
355 {
356 {0x48, 0}, {0x3c, 0}, {0x49, 0},
357 {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
358 };
359
360 c->setStrength(Collator::PRIMARY);
361 compareArray(*c, secondary, ARRAY_LENGTH(secondary));
362
363 delete c;
364 }
365
366 // @bug 4062418
367 //
368 // Secondary/Tertiary comparison incorrect in French Secondary
369 //
Test4062418()370 void CollationRegressionTest::Test4062418(/* char* par */)
371 {
372 UErrorCode status = U_ZERO_ERROR;
373
374 RuleBasedCollator *c = NULL;
375
376 c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
377
378 if (c == NULL || U_FAILURE(status))
379 {
380 errln("Failed to create collator for Locale::getCanadaFrench()");
381 delete c;
382 return;
383 }
384
385 c->setStrength(Collator::SECONDARY);
386
387 /*
388 String[] tests = {
389 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
390 };
391 */
392 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
393 {
394 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
395 };
396
397 compareArray(*c, tests, ARRAY_LENGTH(tests));
398
399 delete c;
400 }
401
402 // @bug 4065540
403 //
404 // Collator::compare() method broken if either string contains spaces
405 //
Test4065540()406 void CollationRegressionTest::Test4065540(/* char* par */)
407 {
408 if (en_us->compare("abcd e", "abcd f") == 0)
409 {
410 errln("'abcd e' == 'abcd f'");
411 }
412 }
413
414 // @bug 4066189
415 //
416 // Unicode characters need to be recursively decomposed to get the
417 // correct result. For example,
418 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
419 //
Test4066189()420 void CollationRegressionTest::Test4066189(/* char* par */)
421 {
422 static const UChar chars1[] = {0x1EB1, 0};
423 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
424 const UnicodeString test1(chars1);
425 const UnicodeString test2(chars2);
426 UErrorCode status = U_ZERO_ERROR;
427
428 // NOTE: The java code used en_us to create the
429 // CollationElementIterator's. I'm pretty sure that
430 // was wrong, so I've change the code to use c1 and c2
431 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
432 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
433 CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
434
435 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
436 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
437 CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
438
439 assertEqual(*i1, *i2);
440
441 delete i2;
442 delete c2;
443 delete i1;
444 delete c1;
445 }
446
447 // @bug 4066696
448 //
449 // French secondary collation checking at the end of compare iteration fails
450 //
Test4066696()451 void CollationRegressionTest::Test4066696(/* char* par */)
452 {
453 UErrorCode status = U_ZERO_ERROR;
454 RuleBasedCollator *c = NULL;
455
456 c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
457
458 if (c == NULL || U_FAILURE(status))
459 {
460 errln("Failure creating collator for Locale::getCanadaFrench()");
461 delete c;
462 return;
463 }
464
465 c->setStrength(Collator::SECONDARY);
466
467 /*
468 String[] tests = {
469 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
470 };
471
472 should be:
473
474 String[] tests = {
475 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
476 };
477
478 */
479
480 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
481 {
482 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
483 };
484
485 compareArray(*c, tests, ARRAY_LENGTH(tests));
486
487 delete c;
488 }
489
490 // @bug 4076676
491 //
492 // Bad canonicalization of same-class combining characters
493 //
Test4076676()494 void CollationRegressionTest::Test4076676(/* char* par */)
495 {
496 // These combining characters are all in the same class, so they should not
497 // be reordered, and they should compare as unequal.
498 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
499 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
500
501 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
502 c->setStrength(Collator::TERTIARY);
503
504 if (c->compare(s1,s2) == 0)
505 {
506 errln("Same-class combining chars were reordered");
507 }
508
509 delete c;
510 }
511
512 // @bug 4079231
513 //
514 // RuleBasedCollator::operator==(NULL) throws NullPointerException
515 //
Test4079231()516 void CollationRegressionTest::Test4079231(/* char* par */)
517 {
518 // I don't think there's any way to write this test
519 // in C++. The following is equivalent to the Java,
520 // but doesn't compile 'cause NULL can't be converted
521 // to Collator&
522 //
523 // if (en_us->operator==(NULL))
524 // {
525 // errln("en_us->operator==(NULL) returned TRUE");
526 // }
527
528 /*
529 try {
530 if (en_us->equals(null)) {
531 errln("en_us->equals(null) returned true");
532 }
533 }
534 catch (Exception e) {
535 errln("en_us->equals(null) threw " + e.toString());
536 }
537 */
538 }
539
540 // @bug 4078588
541 //
542 // RuleBasedCollator breaks on "< a < bb" rule
543 //
Test4078588()544 void CollationRegressionTest::Test4078588(/* char *par */)
545 {
546 UErrorCode status = U_ZERO_ERROR;
547 RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status);
548
549 if (rbc == NULL || U_FAILURE(status))
550 {
551 errln("Failed to create RuleBasedCollator.");
552 delete rbc;
553 return;
554 }
555
556 Collator::EComparisonResult result = rbc->compare("a","bb");
557
558 if (result != Collator::LESS)
559 {
560 errln((UnicodeString)"Compare(a,bb) returned " + (int)result
561 + (UnicodeString)"; expected -1");
562 }
563
564 delete rbc;
565 }
566
567 // @bug 4081866
568 //
569 // Combining characters in different classes not reordered properly.
570 //
Test4081866()571 void CollationRegressionTest::Test4081866(/* char* par */)
572 {
573 // These combining characters are all in different classes,
574 // so they should be reordered and the strings should compare as equal.
575 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
576 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
577
578 UErrorCode status = U_ZERO_ERROR;
579 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
580 c->setStrength(Collator::TERTIARY);
581
582 // Now that the default collators are set to NO_DECOMPOSITION
583 // (as a result of fixing bug 4114077), we must set it explicitly
584 // when we're testing reordering behavior. -- lwerner, 5/5/98
585 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
586
587 if (c->compare(s1,s2) != 0)
588 {
589 errln("Combining chars were not reordered");
590 }
591
592 delete c;
593 }
594
595 // @bug 4087241
596 //
597 // string comparison errors in Scandinavian collators
598 //
Test4087241()599 void CollationRegressionTest::Test4087241(/* char* par */)
600 {
601 UErrorCode status = U_ZERO_ERROR;
602 Locale da_DK("da", "DK");
603 RuleBasedCollator *c = NULL;
604
605 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
606
607 if (c == NULL || U_FAILURE(status))
608 {
609 errln("Failed to create collator for da_DK locale");
610 delete c;
611 return;
612 }
613
614 c->setStrength(Collator::SECONDARY);
615
616 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
617 {
618 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae
619 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-unlaut < a-ring
620 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut
621 };
622
623 compareArray(*c, tests, ARRAY_LENGTH(tests));
624
625 delete c;
626 }
627
628 // @bug 4087243
629 //
630 // CollationKey takes ignorable strings into account when it shouldn't
631 //
Test4087243()632 void CollationRegressionTest::Test4087243(/* char* par */)
633 {
634 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
635 c->setStrength(Collator::TERTIARY);
636
637 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
638 {
639 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A
640 };
641
642 compareArray(*c, tests, ARRAY_LENGTH(tests));
643
644 delete c;
645 }
646
647 // @bug 4092260
648 //
649 // Mu/micro conflict
650 // Micro symbol and greek lowercase letter Mu should sort identically
651 //
Test4092260()652 void CollationRegressionTest::Test4092260(/* char* par */)
653 {
654 UErrorCode status = U_ZERO_ERROR;
655 Locale el("el", "");
656 Collator *c = NULL;
657
658 c = Collator::createInstance(el, status);
659
660 if (c == NULL || U_FAILURE(status))
661 {
662 errln("Failed to create collator for el locale.");
663 delete c;
664 return;
665 }
666
667 // These now have tertiary differences in UCA
668 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
669
670 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
671 {
672 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
673 };
674
675 compareArray(*c, tests, ARRAY_LENGTH(tests));
676
677 delete c;
678 }
679
680 // @bug 4095316
681 //
Test4095316()682 void CollationRegressionTest::Test4095316(/* char* par */)
683 {
684 UErrorCode status = U_ZERO_ERROR;
685 Locale el_GR("el", "GR");
686 Collator *c = Collator::createInstance(el_GR, status);
687
688 if (c == NULL || U_FAILURE(status))
689 {
690 errln("Failed to create collator for el_GR locale");
691 delete c;
692 return;
693 }
694 // These now have tertiary differences in UCA
695 //c->setStrength(Collator::TERTIARY);
696 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
697
698 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
699 {
700 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
701 };
702
703 compareArray(*c, tests, ARRAY_LENGTH(tests));
704
705 delete c;
706 }
707
708 // @bug 4101940
709 //
Test4101940()710 void CollationRegressionTest::Test4101940(/* char* par */)
711 {
712 UErrorCode status = U_ZERO_ERROR;
713 RuleBasedCollator *c = NULL;
714 UnicodeString rules = "< a < b";
715 UnicodeString nothing = "";
716
717 c = new RuleBasedCollator(rules, status);
718
719 if (c == NULL || U_FAILURE(status))
720 {
721 errln("Failed to create RuleBasedCollator");
722 delete c;
723 return;
724 }
725
726 CollationElementIterator *i = c->createCollationElementIterator(nothing);
727 i->reset();
728
729 if (i->next(status) != CollationElementIterator::NULLORDER)
730 {
731 errln("next did not return NULLORDER");
732 }
733
734 delete i;
735 delete c;
736 }
737
738 // @bug 4103436
739 //
740 // Collator::compare not handling spaces properly
741 //
Test4103436()742 void CollationRegressionTest::Test4103436(/* char* par */)
743 {
744 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
745 c->setStrength(Collator::TERTIARY);
746
747 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
748 {
749 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
750 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
751 };
752
753 compareArray(*c, tests, ARRAY_LENGTH(tests));
754
755 delete c;
756 }
757
758 // @bug 4114076
759 //
760 // Collation not Unicode conformant with Hangul syllables
761 //
Test4114076()762 void CollationRegressionTest::Test4114076(/* char* par */)
763 {
764 UErrorCode status = U_ZERO_ERROR;
765 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
766 c->setStrength(Collator::TERTIARY);
767
768 //
769 // With Canonical decomposition, Hangul syllables should get decomposed
770 // into Jamo, but Jamo characters should not be decomposed into
771 // conjoining Jamo
772 //
773 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
774 {
775 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
776 };
777
778 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
779 compareArray(*c, test1, ARRAY_LENGTH(test1));
780
781 // From UTR #15:
782 // *In earlier versions of Unicode, jamo characters like ksf
783 // had compatibility mappings to kf + sf. These mappings were
784 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
785 // That is, the following test is obsolete as of 2.1.9
786
787 //obsolete- // With Full decomposition, it should go all the way down to
788 //obsolete- // conjoining Jamo characters.
789 //obsolete- //
790 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
791 //obsolete- {
792 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
793 //obsolete- };
794 //obsolete-
795 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
796 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2));
797
798 delete c;
799 }
800
801
802 // @bug 4124632
803 //
804 // Collator::getCollationKey was hanging on certain character sequences
805 //
Test4124632()806 void CollationRegressionTest::Test4124632(/* char* par */)
807 {
808 UErrorCode status = U_ZERO_ERROR;
809 Collator *coll = NULL;
810
811 coll = Collator::createInstance(Locale::getJapan(), status);
812
813 if (coll == NULL || U_FAILURE(status))
814 {
815 errln("Failed to create collator for Locale::JAPAN");
816 delete coll;
817 return;
818 }
819
820 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
821 CollationKey key;
822
823 coll->getCollationKey(test, key, status);
824
825 if (key.isBogus() || U_FAILURE(status))
826 {
827 errln("CollationKey creation failed.");
828 }
829
830 delete coll;
831 }
832
833 // @bug 4132736
834 //
835 // sort order of french words with multiple accents has errors
836 //
Test4132736()837 void CollationRegressionTest::Test4132736(/* char* par */)
838 {
839 UErrorCode status = U_ZERO_ERROR;
840
841 Collator *c = NULL;
842
843 c = Collator::createInstance(Locale::getCanadaFrench(), status);
844 c->setStrength(Collator::TERTIARY);
845
846 if (c == NULL || U_FAILURE(status))
847 {
848 errln("Failed to create a collator for Locale::getCanadaFrench()");
849 delete c;
850 return;
851 }
852
853 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
854 {
855 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
856 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
857 };
858
859 compareArray(*c, test1, ARRAY_LENGTH(test1));
860
861 delete c;
862 }
863
864 // @bug 4133509
865 //
866 // The sorting using java.text.CollationKey is not in the exact order
867 //
Test4133509()868 void CollationRegressionTest::Test4133509(/* char* par */)
869 {
870 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
871 {
872 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
873 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
874 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
875 };
876
877 compareArray(*en_us, test1, ARRAY_LENGTH(test1));
878 }
879
880 // @bug 4114077
881 //
882 // Collation with decomposition off doesn't work for Europe
883 //
Test4114077()884 void CollationRegressionTest::Test4114077(/* char* par */)
885 {
886 // Ensure that we get the same results with decomposition off
887 // as we do with it on....
888
889 UErrorCode status = U_ZERO_ERROR;
890 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
891 c->setStrength(Collator::TERTIARY);
892
893 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
894 {
895 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
896 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
897 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0},
898 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute
899 // -> a, ring, acute
900 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal
901 };
902
903 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
904 compareArray(*c, test1, ARRAY_LENGTH(test1));
905
906 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
907 {
908 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal
909 };
910
911 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
912 compareArray(*c, test2, ARRAY_LENGTH(test2));
913
914 delete c;
915 }
916
917 // @bug 4141640
918 //
919 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
920 //
Test4141640()921 void CollationRegressionTest::Test4141640(/* char* par */)
922 {
923 //
924 // Rather than just creating a Swedish collator, we might as well
925 // try to instantiate one for every locale available on the system
926 // in order to prevent this sort of bug from cropping up in the future
927 //
928 UErrorCode status = U_ZERO_ERROR;
929 int32_t i, localeCount;
930 const Locale *locales = Locale::getAvailableLocales(localeCount);
931
932 for (i = 0; i < localeCount; i += 1)
933 {
934 Collator *c = NULL;
935
936 status = U_ZERO_ERROR;
937 c = Collator::createInstance(locales[i], status);
938
939 if (c == NULL || U_FAILURE(status))
940 {
941 UnicodeString msg, localeName;
942
943 msg += "Could not create collator for locale ";
944 msg += locales[i].getName();
945
946 errln(msg);
947 }
948
949 delete c;
950 }
951 }
952
953 // @bug 4139572
954 //
955 // getCollationKey throws exception for spanish text
956 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
957 //
Test4139572()958 void CollationRegressionTest::Test4139572(/* char* par */)
959 {
960 //
961 // Code pasted straight from the bug report
962 // (and then translated to C++ ;-)
963 //
964 // create spanish locale and collator
965 UErrorCode status = U_ZERO_ERROR;
966 Locale l("es", "es");
967 Collator *col = NULL;
968
969 col = Collator::createInstance(l, status);
970
971 if (col == NULL || U_FAILURE(status))
972 {
973 errln("Failed to create a collator for es_es locale.");
974 delete col;
975 return;
976 }
977
978 CollationKey key;
979
980 // this spanish phrase kills it!
981 col->getCollationKey("Nombre De Objeto", key, status);
982
983 if (key.isBogus() || U_FAILURE(status))
984 {
985 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
986 }
987
988 delete col;
989 }
990 /* HSYS : RuleBasedCollator::compare() performance enhancements
991 compare() does not create CollationElementIterator() anymore.*/
992
993 class My4146160Collator : public RuleBasedCollator
994 {
995 public:
996 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
997 ~My4146160Collator();
998
999 CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
1000
1001 CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
1002
1003 static int32_t count;
1004 };
1005
1006 int32_t My4146160Collator::count = 0;
1007
My4146160Collator(RuleBasedCollator & rbc,UErrorCode & status)1008 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1009 : RuleBasedCollator(rbc.getRules(), status)
1010 {
1011 }
1012
~My4146160Collator()1013 My4146160Collator::~My4146160Collator()
1014 {
1015 }
1016
createCollationElementIterator(const UnicodeString & text) const1017 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1018 {
1019 count += 1;
1020 return RuleBasedCollator::createCollationElementIterator(text);
1021 }
1022
createCollationElementIterator(const CharacterIterator & text) const1023 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1024 {
1025 count += 1;
1026 return RuleBasedCollator::createCollationElementIterator(text);
1027 }
1028
1029 // @bug 4146160
1030 //
1031 // RuleBasedCollator doesn't use createCollationElementIterator internally
1032 //
Test4146160()1033 void CollationRegressionTest::Test4146160(/* char* par */)
1034 {
1035 #if 0
1036 //
1037 // Use a custom collator class whose createCollationElementIterator
1038 // methods increment a count....
1039 //
1040 UErrorCode status = U_ZERO_ERROR;
1041 CollationKey key;
1042
1043 My4146160Collator::count = 0;
1044 My4146160Collator *mc = NULL;
1045
1046 mc = new My4146160Collator(*en_us, status);
1047
1048 if (mc == NULL || U_FAILURE(status))
1049 {
1050 errln("Failed to create a My4146160Collator.");
1051 delete mc;
1052 return;
1053 }
1054
1055 mc->getCollationKey("1", key, status);
1056
1057 if (key.isBogus() || U_FAILURE(status))
1058 {
1059 errln("Failure to get a CollationKey from a My4146160Collator.");
1060 delete mc;
1061 return;
1062 }
1063
1064 if (My4146160Collator::count < 1)
1065 {
1066 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1067 }
1068
1069 My4146160Collator::count = 0;
1070 mc->compare("1", "2");
1071
1072 if (My4146160Collator::count < 1)
1073 {
1074 errln("My4146160Collator::createtCollationElementIterator not called for compare");
1075 }
1076
1077 delete mc;
1078 #endif
1079 }
1080
1081 // Ticket 7189
1082 //
1083 // nextSortKeyPart incorrect for EO_S1 collation
calcKeyIncremental(UCollator * coll,const UChar * text,int32_t len,uint8_t * keyBuf,int32_t,UErrorCode & status)1084 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1085 UCharIterator uiter;
1086 uint32_t state[2] = { 0, 0 };
1087 int32_t keyLen;
1088 int32_t count = 8;
1089
1090 uiter_setString(&uiter, text, len);
1091 keyLen = 0;
1092 while (TRUE) {
1093 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1094 if (U_FAILURE(status)) {
1095 return -1;
1096 }
1097 if (keyPartLen == 0) {
1098 break;
1099 }
1100 keyLen += keyPartLen;
1101 }
1102 return keyLen;
1103 }
1104
TestT7189()1105 void CollationRegressionTest::TestT7189() {
1106 UErrorCode status = U_ZERO_ERROR;
1107 UCollator *coll;
1108 uint32_t i;
1109
1110 static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1111 // "Achter De Hoven"
1112 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1113 // "ABC"
1114 { 0x41, 0x42, 0x43, 0x00 },
1115 // "HELLO world!"
1116 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1117 };
1118
1119 static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1120 // "Achter de Hoven"
1121 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1122 // "abc"
1123 { 0x61, 0x62, 0x63, 0x00 },
1124 // "hello world!"
1125 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1126 };
1127
1128 // Open the collator
1129 coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
1130 if (U_FAILURE(status)) {
1131 errln("Failed to create a collator for short string EO_S1");
1132 return;
1133 }
1134
1135 for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
1136 uint8_t key1[100], key2[100];
1137 int32_t len1, len2;
1138
1139 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1140 if (U_FAILURE(status)) {
1141 errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1142 break;
1143 }
1144 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1145 if (U_FAILURE(status)) {
1146 errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1147 break;
1148 }
1149
1150 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1151 errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1));
1152 } else {
1153 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : "
1154 + TestUtility::hex(key2, len2));
1155 }
1156 }
1157 ucol_close(coll);
1158 }
1159
compareArray(Collator & c,const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],int32_t testCount)1160 void CollationRegressionTest::compareArray(Collator &c,
1161 const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1162 int32_t testCount)
1163 {
1164 int32_t i;
1165 Collator::EComparisonResult expectedResult = Collator::EQUAL;
1166
1167 for (i = 0; i < testCount; i += 3)
1168 {
1169 UnicodeString source(tests[i]);
1170 UnicodeString comparison(tests[i + 1]);
1171 UnicodeString target(tests[i + 2]);
1172
1173 if (comparison == "<")
1174 {
1175 expectedResult = Collator::LESS;
1176 }
1177 else if (comparison == ">")
1178 {
1179 expectedResult = Collator::GREATER;
1180 }
1181 else if (comparison == "=")
1182 {
1183 expectedResult = Collator::EQUAL;
1184 }
1185 else
1186 {
1187 UnicodeString bogus1("Bogus comparison string \"");
1188 UnicodeString bogus2("\"");
1189 errln(bogus1 + comparison + bogus2);
1190 }
1191
1192 Collator::EComparisonResult compareResult = c.compare(source, target);
1193
1194 CollationKey sourceKey, targetKey;
1195 UErrorCode status = U_ZERO_ERROR;
1196
1197 c.getCollationKey(source, sourceKey, status);
1198
1199 if (U_FAILURE(status))
1200 {
1201 errln("Couldn't get collationKey for source");
1202 continue;
1203 }
1204
1205 c.getCollationKey(target, targetKey, status);
1206
1207 if (U_FAILURE(status))
1208 {
1209 errln("Couldn't get collationKey for target");
1210 continue;
1211 }
1212
1213 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1214
1215 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1216
1217 }
1218 }
1219
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)1220 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1221 {
1222 int32_t c1, c2, count = 0;
1223 UErrorCode status = U_ZERO_ERROR;
1224
1225 do
1226 {
1227 c1 = i1.next(status);
1228 c2 = i2.next(status);
1229
1230 if (c1 != c2)
1231 {
1232 UnicodeString msg, msg1(" ");
1233
1234 msg += msg1 + count;
1235 msg += ": strength(0x";
1236 appendHex(c1, 8, msg);
1237 msg += ") != strength(0x";
1238 appendHex(c2, 8, msg);
1239 msg += ")";
1240
1241 errln(msg);
1242 break;
1243 }
1244
1245 count += 1;
1246 }
1247 while (c1 != CollationElementIterator::NULLORDER);
1248 }
1249
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1250 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1251 {
1252 if (exec)
1253 {
1254 logln("Collation Regression Tests: ");
1255 }
1256
1257 if(en_us) {
1258 switch (index)
1259 {
1260 case 0: name = "Test4048446"; if (exec) Test4048446(/* par */); break;
1261 case 1: name = "Test4051866"; if (exec) Test4051866(/* par */); break;
1262 case 2: name = "Test4053636"; if (exec) Test4053636(/* par */); break;
1263 case 3: name = "Test4054238"; if (exec) Test4054238(/* par */); break;
1264 case 4: name = "Test4054734"; if (exec) Test4054734(/* par */); break;
1265 case 5: name = "Test4054736"; if (exec) Test4054736(/* par */); break;
1266 case 6: name = "Test4058613"; if (exec) Test4058613(/* par */); break;
1267 case 7: name = "Test4059820"; if (exec) Test4059820(/* par */); break;
1268 case 8: name = "Test4060154"; if (exec) Test4060154(/* par */); break;
1269 case 9: name = "Test4062418"; if (exec) Test4062418(/* par */); break;
1270 case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break;
1271 case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break;
1272 case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break;
1273 case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break;
1274 case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break;
1275 case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break;
1276 case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break;
1277 case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break;
1278 case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break;
1279 case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break;
1280 case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break;
1281 case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break;
1282 case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break;
1283 case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break;
1284 case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break;
1285 case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break;
1286 case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break;
1287 case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break;
1288 case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break;
1289 case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break;
1290 case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break;
1291 case 31: name = "TestT7189"; if (exec) TestT7189(); break;
1292 default: name = ""; break;
1293 }
1294 } else {
1295 dataerrln("Class collator not instantiated");
1296 name = "";
1297 }
1298 }
1299
1300 #endif /* #if !UCONFIG_NO_COLLATION */
1301