1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 #include "unicode/utypes.h"
8
9 #if !UCONFIG_NO_COLLATION
10
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
15 #include "regcoll.h"
16 #include "sfwdchit.h"
17
18 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
19
CollationRegressionTest()20 CollationRegressionTest::CollationRegressionTest()
21 {
22 UErrorCode status = U_ZERO_ERROR;
23
24 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
25 if(U_FAILURE(status)) {
26 delete en_us;
27 en_us = 0;
28 errln("Collator creation failed with %s", u_errorName(status));
29 return;
30 }
31 }
32
~CollationRegressionTest()33 CollationRegressionTest::~CollationRegressionTest()
34 {
35 delete en_us;
36 }
37
38
39 // @bug 4048446
40 //
41 // CollationElementIterator.reset() doesn't work
42 //
Test4048446()43 void CollationRegressionTest::Test4048446(/* char* par */)
44 {
45 const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
46 const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
47 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
48 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
49 UErrorCode status = U_ZERO_ERROR;
50
51 if (i1 == NULL|| i2 == NULL)
52 {
53 errln("Could not create CollationElementIterator's");
54 delete i1;
55 delete i2;
56 return;
57 }
58
59 while (i1->next(status) != CollationElementIterator::NULLORDER)
60 {
61 if (U_FAILURE(status))
62 {
63 errln("error calling next()");
64
65 delete i1;
66 delete i2;
67 return;
68 }
69 }
70
71 i1->reset();
72
73 assertEqual(*i1, *i2);
74
75 delete i1;
76 delete i2;
77 }
78
79 // @bug 4051866
80 //
81 // Collator -> rules -> Collator round-trip broken for expanding characters
82 //
Test4051866()83 void CollationRegressionTest::Test4051866(/* char* par */)
84 {
85 /*
86 RuleBasedCollator c1 = new RuleBasedCollator("< o "
87 +"& oe ,o\u3080"
88 +"& oe ,\u1530 ,O"
89 +"& OE ,O\u3080"
90 +"& OE ,\u1520"
91 +"< p ,P");
92 */
93
94 UnicodeString rules;
95 UErrorCode status = U_ZERO_ERROR;
96
97 rules += "< o ";
98 rules += "& oe ,o";
99 rules += (UChar)0x3080;
100 rules += "& oe ,";
101 rules += (UChar)0x1530;
102 rules += " ,O";
103 rules += "& OE ,O";
104 rules += (UChar)0x3080;
105 rules += "& OE ,";
106 rules += (UChar)0x1520;
107 rules += "< p ,P";
108
109 // Build a collator containing expanding characters
110 RuleBasedCollator *c1 = new RuleBasedCollator(rules, status);
111
112 // Build another using the rules from the first
113 RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status);
114
115 // Make sure they're the same
116 if (!(c1->getRules() == c2->getRules()))
117 {
118 errln("Rules are not equal");
119 }
120
121 delete c2;
122 delete c1;
123 }
124
125 // @bug 4053636
126 //
127 // Collator thinks "black-bird" == "black"
128 //
Test4053636()129 void CollationRegressionTest::Test4053636(/* char* par */)
130 {
131 if (en_us->equals("black_bird", "black"))
132 {
133 errln("black-bird == black");
134 }
135 }
136
137 // @bug 4054238
138 //
139 // CollationElementIterator will not work correctly if the associated
140 // Collator object's mode is changed
141 //
Test4054238()142 void CollationRegressionTest::Test4054238(/* char* par */)
143 {
144 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
145 const UnicodeString test3(chars3);
146 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
147
148 // NOTE: The Java code uses en_us to create the CollationElementIterators
149 // but I'm pretty sure that's wrong, so I've changed this to use c.
150 UErrorCode status = U_ZERO_ERROR;
151 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
152 CollationElementIterator *i1 = c->createCollationElementIterator(test3);
153 delete i1;
154 delete c;
155 }
156
157 // @bug 4054734
158 //
159 // Collator::IDENTICAL documented but not implemented
160 //
Test4054734()161 void CollationRegressionTest::Test4054734(/* char* par */)
162 {
163 /*
164 Here's the original Java:
165
166 String[] decomp = {
167 "\u0001", "<", "\u0002",
168 "\u0001", "=", "\u0001",
169 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
170 "\u00C0", "=", "A\u0300" // Decomp should make these equal
171 };
172
173 String[] nodecomp = {
174 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
175 };
176 */
177
178 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
179 {
180 {0x0001, 0}, {0x3c, 0}, {0x0002, 0},
181 {0x0001, 0}, {0x3d, 0}, {0x0001, 0},
182 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
183 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
184 };
185
186
187 UErrorCode status = U_ZERO_ERROR;
188 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
189
190 c->setStrength(Collator::IDENTICAL);
191
192 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
193 compareArray(*c, decomp, ARRAY_LENGTH(decomp));
194
195 delete c;
196 }
197
198 // @bug 4054736
199 //
200 // Full Decomposition mode not implemented
201 //
Test4054736()202 void CollationRegressionTest::Test4054736(/* char* par */)
203 {
204 UErrorCode status = U_ZERO_ERROR;
205 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
206
207 c->setStrength(Collator::SECONDARY);
208 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
209
210 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
211 {
212 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed
213 };
214
215 compareArray(*c, tests, ARRAY_LENGTH(tests));
216
217 delete c;
218 }
219
220 // @bug 4058613
221 //
222 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
223 //
Test4058613()224 void CollationRegressionTest::Test4058613(/* char* par */)
225 {
226 // Creating a default collator doesn't work when Korean is the default
227 // locale
228
229 Locale oldDefault = Locale::getDefault();
230 UErrorCode status = U_ZERO_ERROR;
231
232 Locale::setDefault(Locale::getKorean(), status);
233
234 if (U_FAILURE(status))
235 {
236 errln("Could not set default locale to Locale::KOREAN");
237 return;
238 }
239
240 Collator *c = NULL;
241
242 c = Collator::createInstance("en_US", status);
243
244 if (c == NULL || U_FAILURE(status))
245 {
246 errln("Could not create a Korean collator");
247 Locale::setDefault(oldDefault, status);
248 delete c;
249 return;
250 }
251
252 // Since the fix to this bug was to turn off decomposition for Korean collators,
253 // ensure that's what we got
254 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
255 {
256 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
257 }
258
259 delete c;
260
261 Locale::setDefault(oldDefault, status);
262 }
263
264 // @bug 4059820
265 //
266 // RuleBasedCollator.getRules does not return the exact pattern as input
267 // for expanding character sequences
268 //
Test4059820()269 void CollationRegressionTest::Test4059820(/* char* par */)
270 {
271 UErrorCode status = U_ZERO_ERROR;
272
273 RuleBasedCollator *c = NULL;
274 UnicodeString rules = "< a < b , c/a < d < z";
275
276 c = new RuleBasedCollator(rules, status);
277
278 if (c == NULL || U_FAILURE(status))
279 {
280 errln("Failure building a collator.");
281 delete c;
282 return;
283 }
284
285 if ( c->getRules().indexOf("c/a") == -1)
286 {
287 errln("returned rules do not contain 'c/a'");
288 }
289
290 delete c;
291 }
292
293 // @bug 4060154
294 //
295 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
296 //
Test4060154()297 void CollationRegressionTest::Test4060154(/* char* par */)
298 {
299 UErrorCode status = U_ZERO_ERROR;
300 UnicodeString rules;
301
302 rules += "< g, G < h, H < i, I < j, J";
303 rules += " & H < ";
304 rules += (UChar)0x0131;
305 rules += ", ";
306 rules += (UChar)0x0130;
307 rules += ", i, I";
308
309 RuleBasedCollator *c = NULL;
310
311 c = new RuleBasedCollator(rules, status);
312
313 if (c == NULL || U_FAILURE(status))
314 {
315 errln("failure building collator.");
316 delete c;
317 return;
318 }
319
320 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
321
322 /*
323 String[] tertiary = {
324 "A", "<", "B",
325 "H", "<", "\u0131",
326 "H", "<", "I",
327 "\u0131", "<", "\u0130",
328 "\u0130", "<", "i",
329 "\u0130", ">", "H",
330 };
331 */
332
333 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
334 {
335 {0x41, 0}, {0x3c, 0}, {0x42, 0},
336 {0x48, 0}, {0x3c, 0}, {0x0131, 0},
337 {0x48, 0}, {0x3c, 0}, {0x49, 0},
338 {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
339 {0x0130, 0}, {0x3c, 0}, {0x69, 0},
340 {0x0130, 0}, {0x3e, 0}, {0x48, 0}
341 };
342
343 c->setStrength(Collator::TERTIARY);
344 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
345
346 /*
347 String[] secondary = {
348 "H", "<", "I",
349 "\u0131", "=", "\u0130",
350 };
351 */
352 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
353 {
354 {0x48, 0}, {0x3c, 0}, {0x49, 0},
355 {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
356 };
357
358 c->setStrength(Collator::PRIMARY);
359 compareArray(*c, secondary, ARRAY_LENGTH(secondary));
360
361 delete c;
362 }
363
364 // @bug 4062418
365 //
366 // Secondary/Tertiary comparison incorrect in French Secondary
367 //
Test4062418()368 void CollationRegressionTest::Test4062418(/* char* par */)
369 {
370 UErrorCode status = U_ZERO_ERROR;
371
372 RuleBasedCollator *c = NULL;
373
374 c = (RuleBasedCollator *) Collator::createInstance(Locale::getFrance(), status);
375
376 if (c == NULL || U_FAILURE(status))
377 {
378 errln("Failed to create collator for Locale::getFrance()");
379 delete c;
380 return;
381 }
382
383 c->setStrength(Collator::SECONDARY);
384
385 /*
386 String[] tests = {
387 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
388 };
389 */
390 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
391 {
392 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
393 };
394
395 compareArray(*c, tests, ARRAY_LENGTH(tests));
396
397 delete c;
398 }
399
400 // @bug 4065540
401 //
402 // Collator::compare() method broken if either string contains spaces
403 //
Test4065540()404 void CollationRegressionTest::Test4065540(/* char* par */)
405 {
406 if (en_us->compare("abcd e", "abcd f") == 0)
407 {
408 errln("'abcd e' == 'abcd f'");
409 }
410 }
411
412 // @bug 4066189
413 //
414 // Unicode characters need to be recursively decomposed to get the
415 // correct result. For example,
416 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
417 //
Test4066189()418 void CollationRegressionTest::Test4066189(/* char* par */)
419 {
420 static const UChar chars1[] = {0x1EB1, 0};
421 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
422 const UnicodeString test1(chars1);
423 const UnicodeString test2(chars2);
424 UErrorCode status = U_ZERO_ERROR;
425
426 // NOTE: The java code used en_us to create the
427 // CollationElementIterator's. I'm pretty sure that
428 // was wrong, so I've change the code to use c1 and c2
429 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
430 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
431 CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
432
433 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
434 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
435 CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
436
437 assertEqual(*i1, *i2);
438
439 delete i2;
440 delete c2;
441 delete i1;
442 delete c1;
443 }
444
445 // @bug 4066696
446 //
447 // French secondary collation checking at the end of compare iteration fails
448 //
Test4066696()449 void CollationRegressionTest::Test4066696(/* char* par */)
450 {
451 UErrorCode status = U_ZERO_ERROR;
452 RuleBasedCollator *c = NULL;
453
454 c = (RuleBasedCollator *)Collator::createInstance(Locale::getFrance(), status);
455
456 if (c == NULL || U_FAILURE(status))
457 {
458 errln("Failure creating collator for Locale::getFrance()");
459 delete c;
460 return;
461 }
462
463 c->setStrength(Collator::SECONDARY);
464
465 /*
466 String[] tests = {
467 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
468 };
469
470 should be:
471
472 String[] tests = {
473 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
474 };
475
476 */
477
478 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
479 {
480 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
481 };
482
483 compareArray(*c, tests, ARRAY_LENGTH(tests));
484
485 delete c;
486 }
487
488 // @bug 4076676
489 //
490 // Bad canonicalization of same-class combining characters
491 //
Test4076676()492 void CollationRegressionTest::Test4076676(/* char* par */)
493 {
494 // These combining characters are all in the same class, so they should not
495 // be reordered, and they should compare as unequal.
496 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
497 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
498
499 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
500 c->setStrength(Collator::TERTIARY);
501
502 if (c->compare(s1,s2) == 0)
503 {
504 errln("Same-class combining chars were reordered");
505 }
506
507 delete c;
508 }
509
510 // @bug 4079231
511 //
512 // RuleBasedCollator::operator==(NULL) throws NullPointerException
513 //
Test4079231()514 void CollationRegressionTest::Test4079231(/* char* par */)
515 {
516 // I don't think there's any way to write this test
517 // in C++. The following is equivalent to the Java,
518 // but doesn't compile 'cause NULL can't be converted
519 // to Collator&
520 //
521 // if (en_us->operator==(NULL))
522 // {
523 // errln("en_us->operator==(NULL) returned TRUE");
524 // }
525
526 /*
527 try {
528 if (en_us->equals(null)) {
529 errln("en_us->equals(null) returned true");
530 }
531 }
532 catch (Exception e) {
533 errln("en_us->equals(null) threw " + e.toString());
534 }
535 */
536 }
537
538 // @bug 4078588
539 //
540 // RuleBasedCollator breaks on "< a < bb" rule
541 //
Test4078588()542 void CollationRegressionTest::Test4078588(/* char *par */)
543 {
544 UErrorCode status = U_ZERO_ERROR;
545 RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status);
546
547 if (rbc == NULL || U_FAILURE(status))
548 {
549 errln("Failed to create RuleBasedCollator.");
550 delete rbc;
551 return;
552 }
553
554 Collator::EComparisonResult result = rbc->compare("a","bb");
555
556 if (result != Collator::LESS)
557 {
558 errln((UnicodeString)"Compare(a,bb) returned " + (int)result
559 + (UnicodeString)"; expected -1");
560 }
561
562 delete rbc;
563 }
564
565 // @bug 4081866
566 //
567 // Combining characters in different classes not reordered properly.
568 //
Test4081866()569 void CollationRegressionTest::Test4081866(/* char* par */)
570 {
571 // These combining characters are all in different classes,
572 // so they should be reordered and the strings should compare as equal.
573 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
574 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
575
576 UErrorCode status = U_ZERO_ERROR;
577 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
578 c->setStrength(Collator::TERTIARY);
579
580 // Now that the default collators are set to NO_DECOMPOSITION
581 // (as a result of fixing bug 4114077), we must set it explicitly
582 // when we're testing reordering behavior. -- lwerner, 5/5/98
583 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
584
585 if (c->compare(s1,s2) != 0)
586 {
587 errln("Combining chars were not reordered");
588 }
589
590 delete c;
591 }
592
593 // @bug 4087241
594 //
595 // string comparison errors in Scandinavian collators
596 //
Test4087241()597 void CollationRegressionTest::Test4087241(/* char* par */)
598 {
599 UErrorCode status = U_ZERO_ERROR;
600 Locale da_DK("da", "DK");
601 RuleBasedCollator *c = NULL;
602
603 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
604
605 if (c == NULL || U_FAILURE(status))
606 {
607 errln("Failed to create collator for da_DK locale");
608 delete c;
609 return;
610 }
611
612 c->setStrength(Collator::SECONDARY);
613
614 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
615 {
616 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae
617 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-unlaut < a-ring
618 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut
619 };
620
621 compareArray(*c, tests, ARRAY_LENGTH(tests));
622
623 delete c;
624 }
625
626 // @bug 4087243
627 //
628 // CollationKey takes ignorable strings into account when it shouldn't
629 //
Test4087243()630 void CollationRegressionTest::Test4087243(/* char* par */)
631 {
632 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
633 c->setStrength(Collator::TERTIARY);
634
635 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
636 {
637 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A
638 };
639
640 compareArray(*c, tests, ARRAY_LENGTH(tests));
641
642 delete c;
643 }
644
645 // @bug 4092260
646 //
647 // Mu/micro conflict
648 // Micro symbol and greek lowercase letter Mu should sort identically
649 //
Test4092260()650 void CollationRegressionTest::Test4092260(/* char* par */)
651 {
652 UErrorCode status = U_ZERO_ERROR;
653 Locale el("el", "");
654 Collator *c = NULL;
655
656 c = Collator::createInstance(el, status);
657
658 if (c == NULL || U_FAILURE(status))
659 {
660 errln("Failed to create collator for el locale.");
661 delete c;
662 return;
663 }
664
665 // These now have tertiary differences in UCA
666 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
667
668 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
669 {
670 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
671 };
672
673 compareArray(*c, tests, ARRAY_LENGTH(tests));
674
675 delete c;
676 }
677
678 // @bug 4095316
679 //
Test4095316()680 void CollationRegressionTest::Test4095316(/* char* par */)
681 {
682 UErrorCode status = U_ZERO_ERROR;
683 Locale el_GR("el", "GR");
684 Collator *c = Collator::createInstance(el_GR, status);
685
686 if (c == NULL || U_FAILURE(status))
687 {
688 errln("Failed to create collator for el_GR locale");
689 delete c;
690 return;
691 }
692 // These now have tertiary differences in UCA
693 //c->setStrength(Collator::TERTIARY);
694 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
695
696 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
697 {
698 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
699 };
700
701 compareArray(*c, tests, ARRAY_LENGTH(tests));
702
703 delete c;
704 }
705
706 // @bug 4101940
707 //
Test4101940()708 void CollationRegressionTest::Test4101940(/* char* par */)
709 {
710 UErrorCode status = U_ZERO_ERROR;
711 RuleBasedCollator *c = NULL;
712 UnicodeString rules = "< a < b";
713 UnicodeString nothing = "";
714
715 c = new RuleBasedCollator(rules, status);
716
717 if (c == NULL || U_FAILURE(status))
718 {
719 errln("Failed to create RuleBasedCollator");
720 delete c;
721 return;
722 }
723
724 CollationElementIterator *i = c->createCollationElementIterator(nothing);
725 i->reset();
726
727 if (i->next(status) != CollationElementIterator::NULLORDER)
728 {
729 errln("next did not return NULLORDER");
730 }
731
732 delete i;
733 delete c;
734 }
735
736 // @bug 4103436
737 //
738 // Collator::compare not handling spaces properly
739 //
Test4103436()740 void CollationRegressionTest::Test4103436(/* char* par */)
741 {
742 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
743 c->setStrength(Collator::TERTIARY);
744
745 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
746 {
747 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
748 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
749 };
750
751 compareArray(*c, tests, ARRAY_LENGTH(tests));
752
753 delete c;
754 }
755
756 // @bug 4114076
757 //
758 // Collation not Unicode conformant with Hangul syllables
759 //
Test4114076()760 void CollationRegressionTest::Test4114076(/* char* par */)
761 {
762 UErrorCode status = U_ZERO_ERROR;
763 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
764 c->setStrength(Collator::TERTIARY);
765
766 //
767 // With Canonical decomposition, Hangul syllables should get decomposed
768 // into Jamo, but Jamo characters should not be decomposed into
769 // conjoining Jamo
770 //
771 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
772 {
773 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
774 };
775
776 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
777 compareArray(*c, test1, ARRAY_LENGTH(test1));
778
779 // From UTR #15:
780 // *In earlier versions of Unicode, jamo characters like ksf
781 // had compatibility mappings to kf + sf. These mappings were
782 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
783 // That is, the following test is obsolete as of 2.1.9
784
785 //obsolete- // With Full decomposition, it should go all the way down to
786 //obsolete- // conjoining Jamo characters.
787 //obsolete- //
788 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
789 //obsolete- {
790 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
791 //obsolete- };
792 //obsolete-
793 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
794 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2));
795
796 delete c;
797 }
798
799
800 // @bug 4124632
801 //
802 // Collator::getCollationKey was hanging on certain character sequences
803 //
Test4124632()804 void CollationRegressionTest::Test4124632(/* char* par */)
805 {
806 UErrorCode status = U_ZERO_ERROR;
807 Collator *coll = NULL;
808
809 coll = Collator::createInstance(Locale::getJapan(), status);
810
811 if (coll == NULL || U_FAILURE(status))
812 {
813 errln("Failed to create collator for Locale::JAPAN");
814 delete coll;
815 }
816
817 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
818 CollationKey key;
819
820 coll->getCollationKey(test, key, status);
821
822 if (key.isBogus() || U_FAILURE(status))
823 {
824 errln("CollationKey creation failed.");
825 }
826
827 delete coll;
828 }
829
830 // @bug 4132736
831 //
832 // sort order of french words with multiple accents has errors
833 //
Test4132736()834 void CollationRegressionTest::Test4132736(/* char* par */)
835 {
836 UErrorCode status = U_ZERO_ERROR;
837
838 Collator *c = NULL;
839
840 c = Collator::createInstance(Locale::getFrance(), status);
841 c->setStrength(Collator::TERTIARY);
842
843 if (c == NULL || U_FAILURE(status))
844 {
845 errln("Failed to create a collator for Locale::getFrance()");
846 delete c;
847 }
848
849 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
850 {
851 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
852 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
853 };
854
855 compareArray(*c, test1, ARRAY_LENGTH(test1));
856
857 delete c;
858 }
859
860 // @bug 4133509
861 //
862 // The sorting using java.text.CollationKey is not in the exact order
863 //
Test4133509()864 void CollationRegressionTest::Test4133509(/* char* par */)
865 {
866 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
867 {
868 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
869 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
870 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
871 };
872
873 compareArray(*en_us, test1, ARRAY_LENGTH(test1));
874 }
875
876 // @bug 4114077
877 //
878 // Collation with decomposition off doesn't work for Europe
879 //
Test4114077()880 void CollationRegressionTest::Test4114077(/* char* par */)
881 {
882 // Ensure that we get the same results with decomposition off
883 // as we do with it on....
884
885 UErrorCode status = U_ZERO_ERROR;
886 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
887 c->setStrength(Collator::TERTIARY);
888
889 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
890 {
891 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
892 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
893 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0},
894 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute
895 // -> a, ring, acute
896 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal
897 };
898
899 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
900 compareArray(*c, test1, ARRAY_LENGTH(test1));
901
902 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
903 {
904 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal
905 };
906
907 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
908 compareArray(*c, test2, ARRAY_LENGTH(test2));
909
910 delete c;
911 }
912
913 // @bug 4141640
914 //
915 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
916 //
Test4141640()917 void CollationRegressionTest::Test4141640(/* char* par */)
918 {
919 //
920 // Rather than just creating a Swedish collator, we might as well
921 // try to instantiate one for every locale available on the system
922 // in order to prevent this sort of bug from cropping up in the future
923 //
924 UErrorCode status = U_ZERO_ERROR;
925 int32_t i, localeCount;
926 const Locale *locales = Locale::getAvailableLocales(localeCount);
927
928 for (i = 0; i < localeCount; i += 1)
929 {
930 Collator *c = NULL;
931
932 status = U_ZERO_ERROR;
933 c = Collator::createInstance(locales[i], status);
934
935 if (c == NULL || U_FAILURE(status))
936 {
937 UnicodeString msg, localeName;
938
939 msg += "Could not create collator for locale ";
940 msg += locales[i].getName();
941
942 errln(msg);
943 }
944
945 delete c;
946 }
947 }
948
949 // @bug 4139572
950 //
951 // getCollationKey throws exception for spanish text
952 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
953 //
Test4139572()954 void CollationRegressionTest::Test4139572(/* char* par */)
955 {
956 //
957 // Code pasted straight from the bug report
958 // (and then translated to C++ ;-)
959 //
960 // create spanish locale and collator
961 UErrorCode status = U_ZERO_ERROR;
962 Locale l("es", "es");
963 Collator *col = NULL;
964
965 col = Collator::createInstance(l, status);
966
967 if (col == NULL || U_FAILURE(status))
968 {
969 errln("Failed to create a collator for es_es locale.");
970 delete col;
971 return;
972 }
973
974 CollationKey key;
975
976 // this spanish phrase kills it!
977 col->getCollationKey("Nombre De Objeto", key, status);
978
979 if (key.isBogus() || U_FAILURE(status))
980 {
981 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
982 }
983
984 delete col;
985 }
986 /* HSYS : RuleBasedCollator::compare() performance enhancements
987 compare() does not create CollationElementIterator() anymore.*/
988
989 class My4146160Collator : public RuleBasedCollator
990 {
991 public:
992 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
993 ~My4146160Collator();
994
995 CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
996
997 CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
998
999 static int32_t count;
1000 };
1001
1002 int32_t My4146160Collator::count = 0;
1003
My4146160Collator(RuleBasedCollator & rbc,UErrorCode & status)1004 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1005 : RuleBasedCollator(rbc.getRules(), status)
1006 {
1007 }
1008
~My4146160Collator()1009 My4146160Collator::~My4146160Collator()
1010 {
1011 }
1012
createCollationElementIterator(const UnicodeString & text) const1013 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1014 {
1015 count += 1;
1016 return RuleBasedCollator::createCollationElementIterator(text);
1017 }
1018
createCollationElementIterator(const CharacterIterator & text) const1019 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1020 {
1021 count += 1;
1022 return RuleBasedCollator::createCollationElementIterator(text);
1023 }
1024
1025 // @bug 4146160
1026 //
1027 // RuleBasedCollator doesn't use createCollationElementIterator internally
1028 //
Test4146160()1029 void CollationRegressionTest::Test4146160(/* char* par */)
1030 {
1031 #if 0
1032 //
1033 // Use a custom collator class whose createCollationElementIterator
1034 // methods increment a count....
1035 //
1036 UErrorCode status = U_ZERO_ERROR;
1037 CollationKey key;
1038
1039 My4146160Collator::count = 0;
1040 My4146160Collator *mc = NULL;
1041
1042 mc = new My4146160Collator(*en_us, status);
1043
1044 if (mc == NULL || U_FAILURE(status))
1045 {
1046 errln("Failed to create a My4146160Collator.");
1047 delete mc;
1048 return;
1049 }
1050
1051 mc->getCollationKey("1", key, status);
1052
1053 if (key.isBogus() || U_FAILURE(status))
1054 {
1055 errln("Failure to get a CollationKey from a My4146160Collator.");
1056 delete mc;
1057 return;
1058 }
1059
1060 if (My4146160Collator::count < 1)
1061 {
1062 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1063 }
1064
1065 My4146160Collator::count = 0;
1066 mc->compare("1", "2");
1067
1068 if (My4146160Collator::count < 1)
1069 {
1070 errln("My4146160Collator::createtCollationElementIterator not called for compare");
1071 }
1072
1073 delete mc;
1074 #endif
1075 }
compareArray(Collator & c,const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],int32_t testCount)1076 void CollationRegressionTest::compareArray(Collator &c,
1077 const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1078 int32_t testCount)
1079 {
1080 int32_t i;
1081 Collator::EComparisonResult expectedResult = Collator::EQUAL;
1082
1083 for (i = 0; i < testCount; i += 3)
1084 {
1085 UnicodeString source(tests[i]);
1086 UnicodeString comparison(tests[i + 1]);
1087 UnicodeString target(tests[i + 2]);
1088
1089 if (comparison == "<")
1090 {
1091 expectedResult = Collator::LESS;
1092 }
1093 else if (comparison == ">")
1094 {
1095 expectedResult = Collator::GREATER;
1096 }
1097 else if (comparison == "=")
1098 {
1099 expectedResult = Collator::EQUAL;
1100 }
1101 else
1102 {
1103 UnicodeString bogus1("Bogus comparison string \"");
1104 UnicodeString bogus2("\"");
1105 errln(bogus1 + comparison + bogus2);
1106 }
1107
1108 Collator::EComparisonResult compareResult = c.compare(source, target);
1109
1110 CollationKey sourceKey, targetKey;
1111 UErrorCode status = U_ZERO_ERROR;
1112
1113 c.getCollationKey(source, sourceKey, status);
1114
1115 if (U_FAILURE(status))
1116 {
1117 errln("Couldn't get collationKey for source");
1118 continue;
1119 }
1120
1121 c.getCollationKey(target, targetKey, status);
1122
1123 if (U_FAILURE(status))
1124 {
1125 errln("Couldn't get collationKey for target");
1126 continue;
1127 }
1128
1129 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1130
1131 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1132
1133 }
1134 }
1135
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)1136 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1137 {
1138 int32_t c1, c2, count = 0;
1139 UErrorCode status = U_ZERO_ERROR;
1140
1141 do
1142 {
1143 c1 = i1.next(status);
1144 c2 = i2.next(status);
1145
1146 if (c1 != c2)
1147 {
1148 UnicodeString msg, msg1(" ");
1149
1150 msg += msg1 + count;
1151 msg += ": strength(0x";
1152 appendHex(c1, 8, msg);
1153 msg += ") != strength(0x";
1154 appendHex(c2, 8, msg);
1155 msg += ")";
1156
1157 errln(msg);
1158 break;
1159 }
1160
1161 count += 1;
1162 }
1163 while (c1 != CollationElementIterator::NULLORDER);
1164 }
1165
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1166 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1167 {
1168 if (exec)
1169 {
1170 logln("Collation Regression Tests: ");
1171 }
1172
1173 if(en_us) {
1174 switch (index)
1175 {
1176 case 0: name = "Test4048446"; if (exec) Test4048446(/* par */); break;
1177 case 1: name = "Test4051866"; if (exec) Test4051866(/* par */); break;
1178 case 2: name = "Test4053636"; if (exec) Test4053636(/* par */); break;
1179 case 3: name = "Test4054238"; if (exec) Test4054238(/* par */); break;
1180 case 4: name = "Test4054734"; if (exec) Test4054734(/* par */); break;
1181 case 5: name = "Test4054736"; if (exec) Test4054736(/* par */); break;
1182 case 6: name = "Test4058613"; if (exec) Test4058613(/* par */); break;
1183 case 7: name = "Test4059820"; if (exec) Test4059820(/* par */); break;
1184 case 8: name = "Test4060154"; if (exec) Test4060154(/* par */); break;
1185 case 9: name = "Test4062418"; if (exec) Test4062418(/* par */); break;
1186 case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break;
1187 case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break;
1188 case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break;
1189 case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break;
1190 case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break;
1191 case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break;
1192 case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break;
1193 case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break;
1194 case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break;
1195 case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break;
1196 case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break;
1197 case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break;
1198 case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break;
1199 case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break;
1200 case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break;
1201 case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break;
1202 case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break;
1203 case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break;
1204 case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break;
1205 case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break;
1206 case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break;
1207 default: name = ""; break;
1208 }
1209 } else {
1210 errln("Class collator not instantiated");
1211 name = "";
1212 }
1213 }
1214
1215 #endif /* #if !UCONFIG_NO_COLLATION */
1216