• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2010, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_COLLATION
10 
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
15 #include "regcoll.h"
16 #include "sfwdchit.h"
17 #include "testutil.h"
18 #include "cmemory.h"
19 
20 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
21 
CollationRegressionTest()22 CollationRegressionTest::CollationRegressionTest()
23 {
24     UErrorCode status = U_ZERO_ERROR;
25 
26     en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
27     if(U_FAILURE(status)) {
28       delete en_us;
29       en_us = 0;
30       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
31       return;
32     }
33 }
34 
~CollationRegressionTest()35 CollationRegressionTest::~CollationRegressionTest()
36 {
37     delete en_us;
38 }
39 
40 
41     // @bug 4048446
42 //
43 // CollationElementIterator.reset() doesn't work
44 //
Test4048446()45 void CollationRegressionTest::Test4048446(/* char* par */)
46 {
47     const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
48     const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
49     CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
50     CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
51     UErrorCode status = U_ZERO_ERROR;
52 
53     if (i1 == NULL|| i2 == NULL)
54     {
55         errln("Could not create CollationElementIterator's");
56         delete i1;
57         delete i2;
58         return;
59     }
60 
61     while (i1->next(status) != CollationElementIterator::NULLORDER)
62     {
63         if (U_FAILURE(status))
64         {
65             errln("error calling next()");
66 
67             delete i1;
68             delete i2;
69             return;
70         }
71     }
72 
73     i1->reset();
74 
75     assertEqual(*i1, *i2);
76 
77     delete i1;
78     delete i2;
79 }
80 
81 // @bug 4051866
82 //
83 // Collator -> rules -> Collator round-trip broken for expanding characters
84 //
Test4051866()85 void CollationRegressionTest::Test4051866(/* char* par */)
86 {
87 /*
88     RuleBasedCollator c1 = new RuleBasedCollator("< o "
89                                                 +"& oe ,o\u3080"
90                                                 +"& oe ,\u1530 ,O"
91                                                 +"& OE ,O\u3080"
92                                                 +"& OE ,\u1520"
93                                                 +"< p ,P");
94 */
95 
96     UnicodeString rules;
97     UErrorCode status = U_ZERO_ERROR;
98 
99     rules += "< o ";
100     rules += "& oe ,o";
101     rules += (UChar)0x3080;
102     rules += "& oe ,";
103     rules += (UChar)0x1530;
104     rules += " ,O";
105     rules += "& OE ,O";
106     rules += (UChar)0x3080;
107     rules += "& OE ,";
108     rules += (UChar)0x1520;
109     rules += "< p ,P";
110 
111     // Build a collator containing expanding characters
112     RuleBasedCollator *c1 = new RuleBasedCollator(rules, status);
113 
114     // Build another using the rules from  the first
115     RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status);
116 
117     // Make sure they're the same
118     if (!(c1->getRules() == c2->getRules()))
119     {
120         errln("Rules are not equal");
121     }
122 
123     delete c2;
124     delete c1;
125 }
126 
127 // @bug 4053636
128 //
129 // Collator thinks "black-bird" == "black"
130 //
Test4053636()131 void CollationRegressionTest::Test4053636(/* char* par */)
132 {
133     if (en_us->equals("black_bird", "black"))
134     {
135         errln("black-bird == black");
136     }
137 }
138 
139 // @bug 4054238
140 //
141 // CollationElementIterator will not work correctly if the associated
142 // Collator object's mode is changed
143 //
Test4054238()144 void CollationRegressionTest::Test4054238(/* char* par */)
145 {
146     const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
147     const UnicodeString test3(chars3);
148     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
149 
150     // NOTE: The Java code uses en_us to create the CollationElementIterators
151     // but I'm pretty sure that's wrong, so I've changed this to use c.
152     UErrorCode status = U_ZERO_ERROR;
153     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
154     CollationElementIterator *i1 = c->createCollationElementIterator(test3);
155     delete i1;
156     delete c;
157 }
158 
159 // @bug 4054734
160 //
161 // Collator::IDENTICAL documented but not implemented
162 //
Test4054734()163 void CollationRegressionTest::Test4054734(/* char* par */)
164 {
165     /*
166         Here's the original Java:
167 
168         String[] decomp = {
169             "\u0001",   "<",    "\u0002",
170             "\u0001",   "=",    "\u0001",
171             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
172             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
173         };
174 
175         String[] nodecomp = {
176             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
177         };
178     */
179 
180     static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
181     {
182         {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
183         {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
184         {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
185         {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
186     };
187 
188 
189     UErrorCode status = U_ZERO_ERROR;
190     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
191 
192     c->setStrength(Collator::IDENTICAL);
193 
194     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
195     compareArray(*c, decomp, ARRAY_LENGTH(decomp));
196 
197     delete c;
198 }
199 
200 // @bug 4054736
201 //
202 // Full Decomposition mode not implemented
203 //
Test4054736()204 void CollationRegressionTest::Test4054736(/* char* par */)
205 {
206     UErrorCode status = U_ZERO_ERROR;
207     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
208 
209     c->setStrength(Collator::SECONDARY);
210     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
211 
212     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
213     {
214         {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
215     };
216 
217     compareArray(*c, tests, ARRAY_LENGTH(tests));
218 
219     delete c;
220 }
221 
222 // @bug 4058613
223 //
224 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
225 //
Test4058613()226 void CollationRegressionTest::Test4058613(/* char* par */)
227 {
228     // Creating a default collator doesn't work when Korean is the default
229     // locale
230 
231     Locale oldDefault = Locale::getDefault();
232     UErrorCode status = U_ZERO_ERROR;
233 
234     Locale::setDefault(Locale::getKorean(), status);
235 
236     if (U_FAILURE(status))
237     {
238         errln("Could not set default locale to Locale::KOREAN");
239         return;
240     }
241 
242     Collator *c = NULL;
243 
244     c = Collator::createInstance("en_US", status);
245 
246     if (c == NULL || U_FAILURE(status))
247     {
248         errln("Could not create a Korean collator");
249         Locale::setDefault(oldDefault, status);
250         delete c;
251         return;
252     }
253 
254     // Since the fix to this bug was to turn off decomposition for Korean collators,
255     // ensure that's what we got
256     if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
257     {
258       errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
259     }
260 
261     delete c;
262 
263     Locale::setDefault(oldDefault, status);
264 }
265 
266 // @bug 4059820
267 //
268 // RuleBasedCollator.getRules does not return the exact pattern as input
269 // for expanding character sequences
270 //
Test4059820()271 void CollationRegressionTest::Test4059820(/* char* par */)
272 {
273     UErrorCode status = U_ZERO_ERROR;
274 
275     RuleBasedCollator *c = NULL;
276     UnicodeString rules = "< a < b , c/a < d < z";
277 
278     c = new RuleBasedCollator(rules, status);
279 
280     if (c == NULL || U_FAILURE(status))
281     {
282         errln("Failure building a collator.");
283         delete c;
284         return;
285     }
286 
287     if ( c->getRules().indexOf("c/a") == -1)
288     {
289         errln("returned rules do not contain 'c/a'");
290     }
291 
292     delete c;
293 }
294 
295 // @bug 4060154
296 //
297 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
298 //
Test4060154()299 void CollationRegressionTest::Test4060154(/* char* par */)
300 {
301     UErrorCode status = U_ZERO_ERROR;
302     UnicodeString rules;
303 
304     rules += "< g, G < h, H < i, I < j, J";
305     rules +=  " & H < ";
306     rules += (UChar)0x0131;
307     rules += ", ";
308     rules += (UChar)0x0130;
309     rules += ", i, I";
310 
311     RuleBasedCollator *c = NULL;
312 
313     c = new RuleBasedCollator(rules, status);
314 
315     if (c == NULL || U_FAILURE(status))
316     {
317         errln("failure building collator.");
318         delete c;
319         return;
320     }
321 
322     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
323 
324  /*
325     String[] tertiary = {
326         "A",        "<",    "B",
327         "H",        "<",    "\u0131",
328         "H",        "<",    "I",
329         "\u0131",   "<",    "\u0130",
330         "\u0130",   "<",    "i",
331         "\u0130",   ">",    "H",
332     };
333 */
334 
335     static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
336     {
337         {0x41, 0},    {0x3c, 0}, {0x42, 0},
338         {0x48, 0},    {0x3c, 0}, {0x0131, 0},
339         {0x48, 0},    {0x3c, 0}, {0x49, 0},
340         {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
341         {0x0130, 0}, {0x3c, 0}, {0x69, 0},
342         {0x0130, 0}, {0x3e, 0}, {0x48, 0}
343     };
344 
345     c->setStrength(Collator::TERTIARY);
346     compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
347 
348     /*
349     String[] secondary = {
350         "H",        "<",    "I",
351         "\u0131",   "=",    "\u0130",
352     };
353 */
354     static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
355     {
356         {0x48, 0},    {0x3c, 0}, {0x49, 0},
357         {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
358     };
359 
360     c->setStrength(Collator::PRIMARY);
361     compareArray(*c, secondary, ARRAY_LENGTH(secondary));
362 
363     delete c;
364 }
365 
366 // @bug 4062418
367 //
368 // Secondary/Tertiary comparison incorrect in French Secondary
369 //
Test4062418()370 void CollationRegressionTest::Test4062418(/* char* par */)
371 {
372     UErrorCode status = U_ZERO_ERROR;
373 
374     RuleBasedCollator *c = NULL;
375 
376     c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
377 
378     if (c == NULL || U_FAILURE(status))
379     {
380         errln("Failed to create collator for Locale::getCanadaFrench()");
381         delete c;
382         return;
383     }
384 
385     c->setStrength(Collator::SECONDARY);
386 
387 /*
388     String[] tests = {
389             "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
390     };
391 */
392     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
393     {
394         {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
395     };
396 
397     compareArray(*c, tests, ARRAY_LENGTH(tests));
398 
399     delete c;
400 }
401 
402 // @bug 4065540
403 //
404 // Collator::compare() method broken if either string contains spaces
405 //
Test4065540()406 void CollationRegressionTest::Test4065540(/* char* par */)
407 {
408     if (en_us->compare("abcd e", "abcd f") == 0)
409     {
410         errln("'abcd e' == 'abcd f'");
411     }
412 }
413 
414 // @bug 4066189
415 //
416 // Unicode characters need to be recursively decomposed to get the
417 // correct result. For example,
418 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
419 //
Test4066189()420 void CollationRegressionTest::Test4066189(/* char* par */)
421 {
422     static const UChar chars1[] = {0x1EB1, 0};
423     static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
424     const UnicodeString test1(chars1);
425     const UnicodeString test2(chars2);
426     UErrorCode status = U_ZERO_ERROR;
427 
428     // NOTE: The java code used en_us to create the
429     // CollationElementIterator's. I'm pretty sure that
430     // was wrong, so I've change the code to use c1 and c2
431     RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
432     c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
433     CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
434 
435     RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
436     c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
437     CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
438 
439     assertEqual(*i1, *i2);
440 
441     delete i2;
442     delete c2;
443     delete i1;
444     delete c1;
445 }
446 
447 // @bug 4066696
448 //
449 // French secondary collation checking at the end of compare iteration fails
450 //
Test4066696()451 void CollationRegressionTest::Test4066696(/* char* par */)
452 {
453     UErrorCode status = U_ZERO_ERROR;
454     RuleBasedCollator *c = NULL;
455 
456     c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
457 
458     if (c == NULL || U_FAILURE(status))
459     {
460         errln("Failure creating collator for Locale::getCanadaFrench()");
461         delete c;
462         return;
463     }
464 
465     c->setStrength(Collator::SECONDARY);
466 
467 /*
468     String[] tests = {
469         "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
470     };
471 
472   should be:
473 
474     String[] tests = {
475         "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
476     };
477 
478 */
479 
480     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
481     {
482         {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
483     };
484 
485     compareArray(*c, tests, ARRAY_LENGTH(tests));
486 
487     delete c;
488 }
489 
490 // @bug 4076676
491 //
492 // Bad canonicalization of same-class combining characters
493 //
Test4076676()494 void CollationRegressionTest::Test4076676(/* char* par */)
495 {
496     // These combining characters are all in the same class, so they should not
497     // be reordered, and they should compare as unequal.
498     static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
499     static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
500 
501     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
502     c->setStrength(Collator::TERTIARY);
503 
504     if (c->compare(s1,s2) == 0)
505     {
506         errln("Same-class combining chars were reordered");
507     }
508 
509     delete c;
510 }
511 
512 // @bug 4079231
513 //
514 // RuleBasedCollator::operator==(NULL) throws NullPointerException
515 //
Test4079231()516 void CollationRegressionTest::Test4079231(/* char* par */)
517 {
518     // I don't think there's any way to write this test
519     // in C++. The following is equivalent to the Java,
520     // but doesn't compile 'cause NULL can't be converted
521     // to Collator&
522     //
523     // if (en_us->operator==(NULL))
524     // {
525     //     errln("en_us->operator==(NULL) returned TRUE");
526     // }
527 
528  /*
529    try {
530         if (en_us->equals(null)) {
531             errln("en_us->equals(null) returned true");
532         }
533     }
534     catch (Exception e) {
535         errln("en_us->equals(null) threw " + e.toString());
536     }
537 */
538 }
539 
540 // @bug 4078588
541 //
542 // RuleBasedCollator breaks on "< a < bb" rule
543 //
Test4078588()544 void CollationRegressionTest::Test4078588(/* char *par */)
545 {
546     UErrorCode status = U_ZERO_ERROR;
547     RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status);
548 
549     if (rbc == NULL || U_FAILURE(status))
550     {
551         errln("Failed to create RuleBasedCollator.");
552         delete rbc;
553         return;
554     }
555 
556     Collator::EComparisonResult result = rbc->compare("a","bb");
557 
558     if (result != Collator::LESS)
559     {
560         errln((UnicodeString)"Compare(a,bb) returned " + (int)result
561             + (UnicodeString)"; expected -1");
562     }
563 
564     delete rbc;
565 }
566 
567 // @bug 4081866
568 //
569 // Combining characters in different classes not reordered properly.
570 //
Test4081866()571 void CollationRegressionTest::Test4081866(/* char* par */)
572 {
573     // These combining characters are all in different classes,
574     // so they should be reordered and the strings should compare as equal.
575     static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
576     static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
577 
578     UErrorCode status = U_ZERO_ERROR;
579     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
580     c->setStrength(Collator::TERTIARY);
581 
582     // Now that the default collators are set to NO_DECOMPOSITION
583     // (as a result of fixing bug 4114077), we must set it explicitly
584     // when we're testing reordering behavior.  -- lwerner, 5/5/98
585     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
586 
587     if (c->compare(s1,s2) != 0)
588     {
589         errln("Combining chars were not reordered");
590     }
591 
592     delete c;
593 }
594 
595 // @bug 4087241
596 //
597 // string comparison errors in Scandinavian collators
598 //
Test4087241()599 void CollationRegressionTest::Test4087241(/* char* par */)
600 {
601     UErrorCode status = U_ZERO_ERROR;
602     Locale da_DK("da", "DK");
603     RuleBasedCollator *c = NULL;
604 
605     c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
606 
607     if (c == NULL || U_FAILURE(status))
608     {
609         errln("Failed to create collator for da_DK locale");
610         delete c;
611         return;
612     }
613 
614     c->setStrength(Collator::SECONDARY);
615 
616     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
617     {
618         {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
619         {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0},      // a-unlaut < a-ring
620         {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
621     };
622 
623     compareArray(*c, tests, ARRAY_LENGTH(tests));
624 
625     delete c;
626 }
627 
628 // @bug 4087243
629 //
630 // CollationKey takes ignorable strings into account when it shouldn't
631 //
Test4087243()632 void CollationRegressionTest::Test4087243(/* char* par */)
633 {
634     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
635     c->setStrength(Collator::TERTIARY);
636 
637     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
638     {
639         {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
640     };
641 
642     compareArray(*c, tests, ARRAY_LENGTH(tests));
643 
644     delete c;
645 }
646 
647 // @bug 4092260
648 //
649 // Mu/micro conflict
650 // Micro symbol and greek lowercase letter Mu should sort identically
651 //
Test4092260()652 void CollationRegressionTest::Test4092260(/* char* par */)
653 {
654     UErrorCode status = U_ZERO_ERROR;
655     Locale el("el", "");
656     Collator *c = NULL;
657 
658     c = Collator::createInstance(el, status);
659 
660     if (c == NULL || U_FAILURE(status))
661     {
662         errln("Failed to create collator for el locale.");
663         delete c;
664         return;
665     }
666 
667     // These now have tertiary differences in UCA
668     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
669 
670     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
671     {
672         {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
673     };
674 
675     compareArray(*c, tests, ARRAY_LENGTH(tests));
676 
677     delete c;
678 }
679 
680 // @bug 4095316
681 //
Test4095316()682 void CollationRegressionTest::Test4095316(/* char* par */)
683 {
684     UErrorCode status = U_ZERO_ERROR;
685     Locale el_GR("el", "GR");
686     Collator *c = Collator::createInstance(el_GR, status);
687 
688     if (c == NULL || U_FAILURE(status))
689     {
690         errln("Failed to create collator for el_GR locale");
691         delete c;
692         return;
693     }
694     // These now have tertiary differences in UCA
695     //c->setStrength(Collator::TERTIARY);
696     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
697 
698     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
699     {
700         {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
701     };
702 
703     compareArray(*c, tests, ARRAY_LENGTH(tests));
704 
705     delete c;
706 }
707 
708 // @bug 4101940
709 //
Test4101940()710 void CollationRegressionTest::Test4101940(/* char* par */)
711 {
712     UErrorCode status = U_ZERO_ERROR;
713     RuleBasedCollator *c = NULL;
714     UnicodeString rules = "< a < b";
715     UnicodeString nothing = "";
716 
717     c = new RuleBasedCollator(rules, status);
718 
719     if (c == NULL || U_FAILURE(status))
720     {
721         errln("Failed to create RuleBasedCollator");
722         delete c;
723         return;
724     }
725 
726     CollationElementIterator *i = c->createCollationElementIterator(nothing);
727     i->reset();
728 
729     if (i->next(status) != CollationElementIterator::NULLORDER)
730     {
731         errln("next did not return NULLORDER");
732     }
733 
734     delete i;
735     delete c;
736 }
737 
738 // @bug 4103436
739 //
740 // Collator::compare not handling spaces properly
741 //
Test4103436()742 void CollationRegressionTest::Test4103436(/* char* par */)
743 {
744     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
745     c->setStrength(Collator::TERTIARY);
746 
747     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
748     {
749         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
750         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
751     };
752 
753     compareArray(*c, tests, ARRAY_LENGTH(tests));
754 
755     delete c;
756 }
757 
758 // @bug 4114076
759 //
760 // Collation not Unicode conformant with Hangul syllables
761 //
Test4114076()762 void CollationRegressionTest::Test4114076(/* char* par */)
763 {
764     UErrorCode status = U_ZERO_ERROR;
765     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
766     c->setStrength(Collator::TERTIARY);
767 
768     //
769     // With Canonical decomposition, Hangul syllables should get decomposed
770     // into Jamo, but Jamo characters should not be decomposed into
771     // conjoining Jamo
772     //
773     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
774     {
775         {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
776     };
777 
778     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
779     compareArray(*c, test1, ARRAY_LENGTH(test1));
780 
781     // From UTR #15:
782     // *In earlier versions of Unicode, jamo characters like ksf
783     //  had compatibility mappings to kf + sf. These mappings were
784     //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
785     // That is, the following test is obsolete as of 2.1.9
786 
787 //obsolete-    // With Full decomposition, it should go all the way down to
788 //obsolete-    // conjoining Jamo characters.
789 //obsolete-    //
790 //obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
791 //obsolete-    {
792 //obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
793 //obsolete-    };
794 //obsolete-
795 //obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
796 //obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));
797 
798     delete c;
799 }
800 
801 
802 // @bug 4124632
803 //
804 // Collator::getCollationKey was hanging on certain character sequences
805 //
Test4124632()806 void CollationRegressionTest::Test4124632(/* char* par */)
807 {
808     UErrorCode status = U_ZERO_ERROR;
809     Collator *coll = NULL;
810 
811     coll = Collator::createInstance(Locale::getJapan(), status);
812 
813     if (coll == NULL || U_FAILURE(status))
814     {
815         errln("Failed to create collator for Locale::JAPAN");
816         delete coll;
817         return;
818     }
819 
820     static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
821     CollationKey key;
822 
823     coll->getCollationKey(test, key, status);
824 
825     if (key.isBogus() || U_FAILURE(status))
826     {
827         errln("CollationKey creation failed.");
828     }
829 
830     delete coll;
831 }
832 
833 // @bug 4132736
834 //
835 // sort order of french words with multiple accents has errors
836 //
Test4132736()837 void CollationRegressionTest::Test4132736(/* char* par */)
838 {
839     UErrorCode status = U_ZERO_ERROR;
840 
841     Collator *c = NULL;
842 
843     c = Collator::createInstance(Locale::getCanadaFrench(), status);
844     c->setStrength(Collator::TERTIARY);
845 
846     if (c == NULL || U_FAILURE(status))
847     {
848         errln("Failed to create a collator for Locale::getCanadaFrench()");
849         delete c;
850         return;
851     }
852 
853     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
854     {
855         {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
856         {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
857     };
858 
859     compareArray(*c, test1, ARRAY_LENGTH(test1));
860 
861     delete c;
862 }
863 
864 // @bug 4133509
865 //
866 // The sorting using java.text.CollationKey is not in the exact order
867 //
Test4133509()868 void CollationRegressionTest::Test4133509(/* char* par */)
869 {
870     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
871     {
872         {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
873         {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
874         {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
875     };
876 
877     compareArray(*en_us, test1, ARRAY_LENGTH(test1));
878 }
879 
880 // @bug 4114077
881 //
882 // Collation with decomposition off doesn't work for Europe
883 //
Test4114077()884 void CollationRegressionTest::Test4114077(/* char* par */)
885 {
886     // Ensure that we get the same results with decomposition off
887     // as we do with it on....
888 
889     UErrorCode status = U_ZERO_ERROR;
890     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
891     c->setStrength(Collator::TERTIARY);
892 
893     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
894     {
895         {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
896         {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
897         {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
898         {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
899                                                 //   -> a, ring, acute
900         {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
901     };
902 
903     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
904     compareArray(*c, test1, ARRAY_LENGTH(test1));
905 
906     static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
907     {
908         {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
909     };
910 
911     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
912     compareArray(*c, test2, ARRAY_LENGTH(test2));
913 
914     delete c;
915 }
916 
917 // @bug 4141640
918 //
919 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
920 //
Test4141640()921 void CollationRegressionTest::Test4141640(/* char* par */)
922 {
923     //
924     // Rather than just creating a Swedish collator, we might as well
925     // try to instantiate one for every locale available on the system
926     // in order to prevent this sort of bug from cropping up in the future
927     //
928     UErrorCode status = U_ZERO_ERROR;
929     int32_t i, localeCount;
930     const Locale *locales = Locale::getAvailableLocales(localeCount);
931 
932     for (i = 0; i < localeCount; i += 1)
933     {
934         Collator *c = NULL;
935 
936         status = U_ZERO_ERROR;
937         c = Collator::createInstance(locales[i], status);
938 
939         if (c == NULL || U_FAILURE(status))
940         {
941             UnicodeString msg, localeName;
942 
943             msg += "Could not create collator for locale ";
944             msg += locales[i].getName();
945 
946             errln(msg);
947         }
948 
949         delete c;
950     }
951 }
952 
953 // @bug 4139572
954 //
955 // getCollationKey throws exception for spanish text
956 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
957 //
Test4139572()958 void CollationRegressionTest::Test4139572(/* char* par */)
959 {
960     //
961     // Code pasted straight from the bug report
962     // (and then translated to C++ ;-)
963     //
964     // create spanish locale and collator
965     UErrorCode status = U_ZERO_ERROR;
966     Locale l("es", "es");
967     Collator *col = NULL;
968 
969     col = Collator::createInstance(l, status);
970 
971     if (col == NULL || U_FAILURE(status))
972     {
973         errln("Failed to create a collator for es_es locale.");
974         delete col;
975         return;
976     }
977 
978     CollationKey key;
979 
980     // this spanish phrase kills it!
981     col->getCollationKey("Nombre De Objeto", key, status);
982 
983     if (key.isBogus() || U_FAILURE(status))
984     {
985         errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
986     }
987 
988     delete col;
989 }
990 /* HSYS : RuleBasedCollator::compare() performance enhancements
991           compare() does not create CollationElementIterator() anymore.*/
992 
993 class My4146160Collator : public RuleBasedCollator
994 {
995 public:
996     My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
997     ~My4146160Collator();
998 
999     CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
1000 
1001     CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
1002 
1003     static int32_t count;
1004 };
1005 
1006 int32_t My4146160Collator::count = 0;
1007 
My4146160Collator(RuleBasedCollator & rbc,UErrorCode & status)1008 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1009   : RuleBasedCollator(rbc.getRules(), status)
1010 {
1011 }
1012 
~My4146160Collator()1013 My4146160Collator::~My4146160Collator()
1014 {
1015 }
1016 
createCollationElementIterator(const UnicodeString & text) const1017 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1018 {
1019     count += 1;
1020     return RuleBasedCollator::createCollationElementIterator(text);
1021 }
1022 
createCollationElementIterator(const CharacterIterator & text) const1023 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1024 {
1025     count += 1;
1026     return RuleBasedCollator::createCollationElementIterator(text);
1027 }
1028 
1029 // @bug 4146160
1030 //
1031 // RuleBasedCollator doesn't use createCollationElementIterator internally
1032 //
Test4146160()1033 void CollationRegressionTest::Test4146160(/* char* par */)
1034 {
1035 #if 0
1036     //
1037     // Use a custom collator class whose createCollationElementIterator
1038     // methods increment a count....
1039     //
1040     UErrorCode status = U_ZERO_ERROR;
1041     CollationKey key;
1042 
1043     My4146160Collator::count = 0;
1044     My4146160Collator *mc = NULL;
1045 
1046     mc = new My4146160Collator(*en_us, status);
1047 
1048     if (mc == NULL || U_FAILURE(status))
1049     {
1050         errln("Failed to create a My4146160Collator.");
1051         delete mc;
1052         return;
1053     }
1054 
1055     mc->getCollationKey("1", key, status);
1056 
1057     if (key.isBogus() || U_FAILURE(status))
1058     {
1059         errln("Failure to get a CollationKey from a My4146160Collator.");
1060         delete mc;
1061         return;
1062     }
1063 
1064     if (My4146160Collator::count < 1)
1065     {
1066         errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1067     }
1068 
1069     My4146160Collator::count = 0;
1070     mc->compare("1", "2");
1071 
1072     if (My4146160Collator::count < 1)
1073     {
1074         errln("My4146160Collator::createtCollationElementIterator not called for compare");
1075     }
1076 
1077     delete mc;
1078 #endif
1079 }
1080 
1081 // Ticket 7189
1082 //
1083 // nextSortKeyPart incorrect for EO_S1 collation
calcKeyIncremental(UCollator * coll,const UChar * text,int32_t len,uint8_t * keyBuf,int32_t,UErrorCode & status)1084 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1085     UCharIterator uiter;
1086     uint32_t state[2] = { 0, 0 };
1087     int32_t keyLen;
1088     int32_t count = 8;
1089 
1090     uiter_setString(&uiter, text, len);
1091     keyLen = 0;
1092     while (TRUE) {
1093         int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1094         if (U_FAILURE(status)) {
1095             return -1;
1096         }
1097         if (keyPartLen == 0) {
1098             break;
1099         }
1100         keyLen += keyPartLen;
1101     }
1102     return keyLen;
1103 }
1104 
TestT7189()1105 void CollationRegressionTest::TestT7189() {
1106     UErrorCode status = U_ZERO_ERROR;
1107     UCollator *coll;
1108     uint32_t i;
1109 
1110     static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1111     // "Achter De Hoven"
1112         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1113         // "ABC"
1114         { 0x41, 0x42, 0x43, 0x00 },
1115         // "HELLO world!"
1116         { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1117     };
1118 
1119     static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1120     // "Achter de Hoven"
1121         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1122         // "abc"
1123         { 0x61, 0x62, 0x63, 0x00 },
1124         // "hello world!"
1125         { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1126     };
1127 
1128     // Open the collator
1129     coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
1130     if (U_FAILURE(status)) {
1131         errln("Failed to create a collator for short string EO_S1");
1132         return;
1133     }
1134 
1135     for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
1136         uint8_t key1[100], key2[100];
1137         int32_t len1, len2;
1138 
1139         len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1140         if (U_FAILURE(status)) {
1141             errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1142             break;
1143         }
1144         len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1145         if (U_FAILURE(status)) {
1146             errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1147             break;
1148         }
1149 
1150         if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1151             errln(UnicodeString("Failed: Identical key\n") + "    text1: " + text1[i] + "\n" + "    text2: " + text2[i] + "\n" + "    key  : " + TestUtility::hex(key1, len1));
1152         } else {
1153             logln(UnicodeString("Keys produced -\n") + "    text1: " + text1[i] + "\n" + "    key1 : " + TestUtility::hex(key1, len1) + "\n" + "    text2: " + text2[i] + "\n" + "    key2 : "
1154                     + TestUtility::hex(key2, len2));
1155         }
1156     }
1157     ucol_close(coll);
1158 }
1159 
compareArray(Collator & c,const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],int32_t testCount)1160 void CollationRegressionTest::compareArray(Collator &c,
1161                                            const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1162                                            int32_t testCount)
1163 {
1164     int32_t i;
1165     Collator::EComparisonResult expectedResult = Collator::EQUAL;
1166 
1167     for (i = 0; i < testCount; i += 3)
1168     {
1169         UnicodeString source(tests[i]);
1170         UnicodeString comparison(tests[i + 1]);
1171         UnicodeString target(tests[i + 2]);
1172 
1173         if (comparison == "<")
1174         {
1175             expectedResult = Collator::LESS;
1176         }
1177         else if (comparison == ">")
1178         {
1179             expectedResult = Collator::GREATER;
1180         }
1181         else if (comparison == "=")
1182         {
1183             expectedResult = Collator::EQUAL;
1184         }
1185         else
1186         {
1187             UnicodeString bogus1("Bogus comparison string \"");
1188             UnicodeString bogus2("\"");
1189             errln(bogus1 + comparison + bogus2);
1190         }
1191 
1192         Collator::EComparisonResult compareResult = c.compare(source, target);
1193 
1194         CollationKey sourceKey, targetKey;
1195         UErrorCode status = U_ZERO_ERROR;
1196 
1197         c.getCollationKey(source, sourceKey, status);
1198 
1199         if (U_FAILURE(status))
1200         {
1201             errln("Couldn't get collationKey for source");
1202             continue;
1203         }
1204 
1205         c.getCollationKey(target, targetKey, status);
1206 
1207         if (U_FAILURE(status))
1208         {
1209             errln("Couldn't get collationKey for target");
1210             continue;
1211         }
1212 
1213         Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1214 
1215         reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1216 
1217     }
1218 }
1219 
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)1220 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1221 {
1222     int32_t c1, c2, count = 0;
1223     UErrorCode status = U_ZERO_ERROR;
1224 
1225     do
1226     {
1227         c1 = i1.next(status);
1228         c2 = i2.next(status);
1229 
1230         if (c1 != c2)
1231         {
1232             UnicodeString msg, msg1("    ");
1233 
1234             msg += msg1 + count;
1235             msg += ": strength(0x";
1236             appendHex(c1, 8, msg);
1237             msg += ") != strength(0x";
1238             appendHex(c2, 8, msg);
1239             msg += ")";
1240 
1241             errln(msg);
1242             break;
1243         }
1244 
1245         count += 1;
1246     }
1247     while (c1 != CollationElementIterator::NULLORDER);
1248 }
1249 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1250 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1251 {
1252     if (exec)
1253     {
1254         logln("Collation Regression Tests: ");
1255     }
1256 
1257     if(en_us) {
1258       switch (index)
1259       {
1260           case  0: name = "Test4048446"; if (exec) Test4048446(/* par */); break;
1261           case  1: name = "Test4051866"; if (exec) Test4051866(/* par */); break;
1262           case  2: name = "Test4053636"; if (exec) Test4053636(/* par */); break;
1263           case  3: name = "Test4054238"; if (exec) Test4054238(/* par */); break;
1264           case  4: name = "Test4054734"; if (exec) Test4054734(/* par */); break;
1265           case  5: name = "Test4054736"; if (exec) Test4054736(/* par */); break;
1266           case  6: name = "Test4058613"; if (exec) Test4058613(/* par */); break;
1267           case  7: name = "Test4059820"; if (exec) Test4059820(/* par */); break;
1268           case  8: name = "Test4060154"; if (exec) Test4060154(/* par */); break;
1269           case  9: name = "Test4062418"; if (exec) Test4062418(/* par */); break;
1270           case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break;
1271           case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break;
1272           case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break;
1273           case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break;
1274           case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break;
1275           case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break;
1276           case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break;
1277           case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break;
1278           case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break;
1279           case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break;
1280           case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break;
1281           case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break;
1282           case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break;
1283           case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break;
1284           case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break;
1285           case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break;
1286           case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break;
1287           case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break;
1288           case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break;
1289           case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break;
1290           case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break;
1291 		  case 31: name = "TestT7189";   if (exec) TestT7189(); break;
1292           default: name = ""; break;
1293       }
1294     } else {
1295       dataerrln("Class collator not instantiated");
1296       name = "";
1297     }
1298 }
1299 
1300 #endif /* #if !UCONFIG_NO_COLLATION */
1301