• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2003, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_COLLATION
10 
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
15 #include "regcoll.h"
16 #include "sfwdchit.h"
17 
18 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
19 
CollationRegressionTest()20 CollationRegressionTest::CollationRegressionTest()
21 {
22     UErrorCode status = U_ZERO_ERROR;
23 
24     en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
25     if(U_FAILURE(status)) {
26       delete en_us;
27       en_us = 0;
28       errln("Collator creation failed with %s", u_errorName(status));
29       return;
30     }
31 }
32 
~CollationRegressionTest()33 CollationRegressionTest::~CollationRegressionTest()
34 {
35     delete en_us;
36 }
37 
38 
39     // @bug 4048446
40 //
41 // CollationElementIterator.reset() doesn't work
42 //
Test4048446()43 void CollationRegressionTest::Test4048446(/* char* par */)
44 {
45     const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
46     const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
47     CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
48     CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
49     UErrorCode status = U_ZERO_ERROR;
50 
51     if (i1 == NULL|| i2 == NULL)
52     {
53         errln("Could not create CollationElementIterator's");
54         delete i1;
55         delete i2;
56         return;
57     }
58 
59     while (i1->next(status) != CollationElementIterator::NULLORDER)
60     {
61         if (U_FAILURE(status))
62         {
63             errln("error calling next()");
64 
65             delete i1;
66             delete i2;
67             return;
68         }
69     }
70 
71     i1->reset();
72 
73     assertEqual(*i1, *i2);
74 
75     delete i1;
76     delete i2;
77 }
78 
79 // @bug 4051866
80 //
81 // Collator -> rules -> Collator round-trip broken for expanding characters
82 //
Test4051866()83 void CollationRegressionTest::Test4051866(/* char* par */)
84 {
85 /*
86     RuleBasedCollator c1 = new RuleBasedCollator("< o "
87                                                 +"& oe ,o\u3080"
88                                                 +"& oe ,\u1530 ,O"
89                                                 +"& OE ,O\u3080"
90                                                 +"& OE ,\u1520"
91                                                 +"< p ,P");
92 */
93 
94     UnicodeString rules;
95     UErrorCode status = U_ZERO_ERROR;
96 
97     rules += "< o ";
98     rules += "& oe ,o";
99     rules += (UChar)0x3080;
100     rules += "& oe ,";
101     rules += (UChar)0x1530;
102     rules += " ,O";
103     rules += "& OE ,O";
104     rules += (UChar)0x3080;
105     rules += "& OE ,";
106     rules += (UChar)0x1520;
107     rules += "< p ,P";
108 
109     // Build a collator containing expanding characters
110     RuleBasedCollator *c1 = new RuleBasedCollator(rules, status);
111 
112     // Build another using the rules from  the first
113     RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status);
114 
115     // Make sure they're the same
116     if (!(c1->getRules() == c2->getRules()))
117     {
118         errln("Rules are not equal");
119     }
120 
121     delete c2;
122     delete c1;
123 }
124 
125 // @bug 4053636
126 //
127 // Collator thinks "black-bird" == "black"
128 //
Test4053636()129 void CollationRegressionTest::Test4053636(/* char* par */)
130 {
131     if (en_us->equals("black_bird", "black"))
132     {
133         errln("black-bird == black");
134     }
135 }
136 
137 // @bug 4054238
138 //
139 // CollationElementIterator will not work correctly if the associated
140 // Collator object's mode is changed
141 //
Test4054238()142 void CollationRegressionTest::Test4054238(/* char* par */)
143 {
144     const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
145     const UnicodeString test3(chars3);
146     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
147 
148     // NOTE: The Java code uses en_us to create the CollationElementIterators
149     // but I'm pretty sure that's wrong, so I've changed this to use c.
150     UErrorCode status = U_ZERO_ERROR;
151     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
152     CollationElementIterator *i1 = c->createCollationElementIterator(test3);
153     delete i1;
154     delete c;
155 }
156 
157 // @bug 4054734
158 //
159 // Collator::IDENTICAL documented but not implemented
160 //
Test4054734()161 void CollationRegressionTest::Test4054734(/* char* par */)
162 {
163     /*
164         Here's the original Java:
165 
166         String[] decomp = {
167             "\u0001",   "<",    "\u0002",
168             "\u0001",   "=",    "\u0001",
169             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
170             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
171         };
172 
173         String[] nodecomp = {
174             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
175         };
176     */
177 
178     static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
179     {
180         {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
181         {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
182         {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
183         {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
184     };
185 
186 
187     UErrorCode status = U_ZERO_ERROR;
188     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
189 
190     c->setStrength(Collator::IDENTICAL);
191 
192     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
193     compareArray(*c, decomp, ARRAY_LENGTH(decomp));
194 
195     delete c;
196 }
197 
198 // @bug 4054736
199 //
200 // Full Decomposition mode not implemented
201 //
Test4054736()202 void CollationRegressionTest::Test4054736(/* char* par */)
203 {
204     UErrorCode status = U_ZERO_ERROR;
205     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
206 
207     c->setStrength(Collator::SECONDARY);
208     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
209 
210     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
211     {
212         {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
213     };
214 
215     compareArray(*c, tests, ARRAY_LENGTH(tests));
216 
217     delete c;
218 }
219 
220 // @bug 4058613
221 //
222 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
223 //
Test4058613()224 void CollationRegressionTest::Test4058613(/* char* par */)
225 {
226     // Creating a default collator doesn't work when Korean is the default
227     // locale
228 
229     Locale oldDefault = Locale::getDefault();
230     UErrorCode status = U_ZERO_ERROR;
231 
232     Locale::setDefault(Locale::getKorean(), status);
233 
234     if (U_FAILURE(status))
235     {
236         errln("Could not set default locale to Locale::KOREAN");
237         return;
238     }
239 
240     Collator *c = NULL;
241 
242     c = Collator::createInstance("en_US", status);
243 
244     if (c == NULL || U_FAILURE(status))
245     {
246         errln("Could not create a Korean collator");
247         Locale::setDefault(oldDefault, status);
248         delete c;
249         return;
250     }
251 
252     // Since the fix to this bug was to turn off decomposition for Korean collators,
253     // ensure that's what we got
254     if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
255     {
256       errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
257     }
258 
259     delete c;
260 
261     Locale::setDefault(oldDefault, status);
262 }
263 
264 // @bug 4059820
265 //
266 // RuleBasedCollator.getRules does not return the exact pattern as input
267 // for expanding character sequences
268 //
Test4059820()269 void CollationRegressionTest::Test4059820(/* char* par */)
270 {
271     UErrorCode status = U_ZERO_ERROR;
272 
273     RuleBasedCollator *c = NULL;
274     UnicodeString rules = "< a < b , c/a < d < z";
275 
276     c = new RuleBasedCollator(rules, status);
277 
278     if (c == NULL || U_FAILURE(status))
279     {
280         errln("Failure building a collator.");
281         delete c;
282         return;
283     }
284 
285     if ( c->getRules().indexOf("c/a") == -1)
286     {
287         errln("returned rules do not contain 'c/a'");
288     }
289 
290     delete c;
291 }
292 
293 // @bug 4060154
294 //
295 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
296 //
Test4060154()297 void CollationRegressionTest::Test4060154(/* char* par */)
298 {
299     UErrorCode status = U_ZERO_ERROR;
300     UnicodeString rules;
301 
302     rules += "< g, G < h, H < i, I < j, J";
303     rules +=  " & H < ";
304     rules += (UChar)0x0131;
305     rules += ", ";
306     rules += (UChar)0x0130;
307     rules += ", i, I";
308 
309     RuleBasedCollator *c = NULL;
310 
311     c = new RuleBasedCollator(rules, status);
312 
313     if (c == NULL || U_FAILURE(status))
314     {
315         errln("failure building collator.");
316         delete c;
317         return;
318     }
319 
320     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
321 
322  /*
323     String[] tertiary = {
324         "A",        "<",    "B",
325         "H",        "<",    "\u0131",
326         "H",        "<",    "I",
327         "\u0131",   "<",    "\u0130",
328         "\u0130",   "<",    "i",
329         "\u0130",   ">",    "H",
330     };
331 */
332 
333     static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
334     {
335         {0x41, 0},    {0x3c, 0}, {0x42, 0},
336         {0x48, 0},    {0x3c, 0}, {0x0131, 0},
337         {0x48, 0},    {0x3c, 0}, {0x49, 0},
338         {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
339         {0x0130, 0}, {0x3c, 0}, {0x69, 0},
340         {0x0130, 0}, {0x3e, 0}, {0x48, 0}
341     };
342 
343     c->setStrength(Collator::TERTIARY);
344     compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
345 
346     /*
347     String[] secondary = {
348         "H",        "<",    "I",
349         "\u0131",   "=",    "\u0130",
350     };
351 */
352     static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
353     {
354         {0x48, 0},    {0x3c, 0}, {0x49, 0},
355         {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
356     };
357 
358     c->setStrength(Collator::PRIMARY);
359     compareArray(*c, secondary, ARRAY_LENGTH(secondary));
360 
361     delete c;
362 }
363 
364 // @bug 4062418
365 //
366 // Secondary/Tertiary comparison incorrect in French Secondary
367 //
Test4062418()368 void CollationRegressionTest::Test4062418(/* char* par */)
369 {
370     UErrorCode status = U_ZERO_ERROR;
371 
372     RuleBasedCollator *c = NULL;
373 
374     c = (RuleBasedCollator *) Collator::createInstance(Locale::getFrance(), status);
375 
376     if (c == NULL || U_FAILURE(status))
377     {
378         errln("Failed to create collator for Locale::getFrance()");
379         delete c;
380         return;
381     }
382 
383     c->setStrength(Collator::SECONDARY);
384 
385 /*
386     String[] tests = {
387             "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
388     };
389 */
390     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
391     {
392         {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
393     };
394 
395     compareArray(*c, tests, ARRAY_LENGTH(tests));
396 
397     delete c;
398 }
399 
400 // @bug 4065540
401 //
402 // Collator::compare() method broken if either string contains spaces
403 //
Test4065540()404 void CollationRegressionTest::Test4065540(/* char* par */)
405 {
406     if (en_us->compare("abcd e", "abcd f") == 0)
407     {
408         errln("'abcd e' == 'abcd f'");
409     }
410 }
411 
412 // @bug 4066189
413 //
414 // Unicode characters need to be recursively decomposed to get the
415 // correct result. For example,
416 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
417 //
Test4066189()418 void CollationRegressionTest::Test4066189(/* char* par */)
419 {
420     static const UChar chars1[] = {0x1EB1, 0};
421     static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
422     const UnicodeString test1(chars1);
423     const UnicodeString test2(chars2);
424     UErrorCode status = U_ZERO_ERROR;
425 
426     // NOTE: The java code used en_us to create the
427     // CollationElementIterator's. I'm pretty sure that
428     // was wrong, so I've change the code to use c1 and c2
429     RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
430     c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
431     CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
432 
433     RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
434     c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
435     CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
436 
437     assertEqual(*i1, *i2);
438 
439     delete i2;
440     delete c2;
441     delete i1;
442     delete c1;
443 }
444 
445 // @bug 4066696
446 //
447 // French secondary collation checking at the end of compare iteration fails
448 //
Test4066696()449 void CollationRegressionTest::Test4066696(/* char* par */)
450 {
451     UErrorCode status = U_ZERO_ERROR;
452     RuleBasedCollator *c = NULL;
453 
454     c = (RuleBasedCollator *)Collator::createInstance(Locale::getFrance(), status);
455 
456     if (c == NULL || U_FAILURE(status))
457     {
458         errln("Failure creating collator for Locale::getFrance()");
459         delete c;
460         return;
461     }
462 
463     c->setStrength(Collator::SECONDARY);
464 
465 /*
466     String[] tests = {
467         "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
468     };
469 
470   should be:
471 
472     String[] tests = {
473         "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
474     };
475 
476 */
477 
478     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
479     {
480         {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
481     };
482 
483     compareArray(*c, tests, ARRAY_LENGTH(tests));
484 
485     delete c;
486 }
487 
488 // @bug 4076676
489 //
490 // Bad canonicalization of same-class combining characters
491 //
Test4076676()492 void CollationRegressionTest::Test4076676(/* char* par */)
493 {
494     // These combining characters are all in the same class, so they should not
495     // be reordered, and they should compare as unequal.
496     static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
497     static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
498 
499     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
500     c->setStrength(Collator::TERTIARY);
501 
502     if (c->compare(s1,s2) == 0)
503     {
504         errln("Same-class combining chars were reordered");
505     }
506 
507     delete c;
508 }
509 
510 // @bug 4079231
511 //
512 // RuleBasedCollator::operator==(NULL) throws NullPointerException
513 //
Test4079231()514 void CollationRegressionTest::Test4079231(/* char* par */)
515 {
516     // I don't think there's any way to write this test
517     // in C++. The following is equivalent to the Java,
518     // but doesn't compile 'cause NULL can't be converted
519     // to Collator&
520     //
521     // if (en_us->operator==(NULL))
522     // {
523     //     errln("en_us->operator==(NULL) returned TRUE");
524     // }
525 
526  /*
527    try {
528         if (en_us->equals(null)) {
529             errln("en_us->equals(null) returned true");
530         }
531     }
532     catch (Exception e) {
533         errln("en_us->equals(null) threw " + e.toString());
534     }
535 */
536 }
537 
538 // @bug 4078588
539 //
540 // RuleBasedCollator breaks on "< a < bb" rule
541 //
Test4078588()542 void CollationRegressionTest::Test4078588(/* char *par */)
543 {
544     UErrorCode status = U_ZERO_ERROR;
545     RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status);
546 
547     if (rbc == NULL || U_FAILURE(status))
548     {
549         errln("Failed to create RuleBasedCollator.");
550         delete rbc;
551         return;
552     }
553 
554     Collator::EComparisonResult result = rbc->compare("a","bb");
555 
556     if (result != Collator::LESS)
557     {
558         errln((UnicodeString)"Compare(a,bb) returned " + (int)result
559             + (UnicodeString)"; expected -1");
560     }
561 
562     delete rbc;
563 }
564 
565 // @bug 4081866
566 //
567 // Combining characters in different classes not reordered properly.
568 //
Test4081866()569 void CollationRegressionTest::Test4081866(/* char* par */)
570 {
571     // These combining characters are all in different classes,
572     // so they should be reordered and the strings should compare as equal.
573     static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
574     static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
575 
576     UErrorCode status = U_ZERO_ERROR;
577     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
578     c->setStrength(Collator::TERTIARY);
579 
580     // Now that the default collators are set to NO_DECOMPOSITION
581     // (as a result of fixing bug 4114077), we must set it explicitly
582     // when we're testing reordering behavior.  -- lwerner, 5/5/98
583     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
584 
585     if (c->compare(s1,s2) != 0)
586     {
587         errln("Combining chars were not reordered");
588     }
589 
590     delete c;
591 }
592 
593 // @bug 4087241
594 //
595 // string comparison errors in Scandinavian collators
596 //
Test4087241()597 void CollationRegressionTest::Test4087241(/* char* par */)
598 {
599     UErrorCode status = U_ZERO_ERROR;
600     Locale da_DK("da", "DK");
601     RuleBasedCollator *c = NULL;
602 
603     c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
604 
605     if (c == NULL || U_FAILURE(status))
606     {
607         errln("Failed to create collator for da_DK locale");
608         delete c;
609         return;
610     }
611 
612     c->setStrength(Collator::SECONDARY);
613 
614     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
615     {
616         {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
617         {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0},      // a-unlaut < a-ring
618         {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
619     };
620 
621     compareArray(*c, tests, ARRAY_LENGTH(tests));
622 
623     delete c;
624 }
625 
626 // @bug 4087243
627 //
628 // CollationKey takes ignorable strings into account when it shouldn't
629 //
Test4087243()630 void CollationRegressionTest::Test4087243(/* char* par */)
631 {
632     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
633     c->setStrength(Collator::TERTIARY);
634 
635     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
636     {
637         {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
638     };
639 
640     compareArray(*c, tests, ARRAY_LENGTH(tests));
641 
642     delete c;
643 }
644 
645 // @bug 4092260
646 //
647 // Mu/micro conflict
648 // Micro symbol and greek lowercase letter Mu should sort identically
649 //
Test4092260()650 void CollationRegressionTest::Test4092260(/* char* par */)
651 {
652     UErrorCode status = U_ZERO_ERROR;
653     Locale el("el", "");
654     Collator *c = NULL;
655 
656     c = Collator::createInstance(el, status);
657 
658     if (c == NULL || U_FAILURE(status))
659     {
660         errln("Failed to create collator for el locale.");
661         delete c;
662         return;
663     }
664 
665     // These now have tertiary differences in UCA
666     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
667 
668     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
669     {
670         {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
671     };
672 
673     compareArray(*c, tests, ARRAY_LENGTH(tests));
674 
675     delete c;
676 }
677 
678 // @bug 4095316
679 //
Test4095316()680 void CollationRegressionTest::Test4095316(/* char* par */)
681 {
682     UErrorCode status = U_ZERO_ERROR;
683     Locale el_GR("el", "GR");
684     Collator *c = Collator::createInstance(el_GR, status);
685 
686     if (c == NULL || U_FAILURE(status))
687     {
688         errln("Failed to create collator for el_GR locale");
689         delete c;
690         return;
691     }
692     // These now have tertiary differences in UCA
693     //c->setStrength(Collator::TERTIARY);
694     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
695 
696     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
697     {
698         {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
699     };
700 
701     compareArray(*c, tests, ARRAY_LENGTH(tests));
702 
703     delete c;
704 }
705 
706 // @bug 4101940
707 //
Test4101940()708 void CollationRegressionTest::Test4101940(/* char* par */)
709 {
710     UErrorCode status = U_ZERO_ERROR;
711     RuleBasedCollator *c = NULL;
712     UnicodeString rules = "< a < b";
713     UnicodeString nothing = "";
714 
715     c = new RuleBasedCollator(rules, status);
716 
717     if (c == NULL || U_FAILURE(status))
718     {
719         errln("Failed to create RuleBasedCollator");
720         delete c;
721         return;
722     }
723 
724     CollationElementIterator *i = c->createCollationElementIterator(nothing);
725     i->reset();
726 
727     if (i->next(status) != CollationElementIterator::NULLORDER)
728     {
729         errln("next did not return NULLORDER");
730     }
731 
732     delete i;
733     delete c;
734 }
735 
736 // @bug 4103436
737 //
738 // Collator::compare not handling spaces properly
739 //
Test4103436()740 void CollationRegressionTest::Test4103436(/* char* par */)
741 {
742     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
743     c->setStrength(Collator::TERTIARY);
744 
745     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
746     {
747         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
748         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
749     };
750 
751     compareArray(*c, tests, ARRAY_LENGTH(tests));
752 
753     delete c;
754 }
755 
756 // @bug 4114076
757 //
758 // Collation not Unicode conformant with Hangul syllables
759 //
Test4114076()760 void CollationRegressionTest::Test4114076(/* char* par */)
761 {
762     UErrorCode status = U_ZERO_ERROR;
763     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
764     c->setStrength(Collator::TERTIARY);
765 
766     //
767     // With Canonical decomposition, Hangul syllables should get decomposed
768     // into Jamo, but Jamo characters should not be decomposed into
769     // conjoining Jamo
770     //
771     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
772     {
773         {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
774     };
775 
776     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
777     compareArray(*c, test1, ARRAY_LENGTH(test1));
778 
779     // From UTR #15:
780     // *In earlier versions of Unicode, jamo characters like ksf
781     //  had compatibility mappings to kf + sf. These mappings were
782     //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
783     // That is, the following test is obsolete as of 2.1.9
784 
785 //obsolete-    // With Full decomposition, it should go all the way down to
786 //obsolete-    // conjoining Jamo characters.
787 //obsolete-    //
788 //obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
789 //obsolete-    {
790 //obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
791 //obsolete-    };
792 //obsolete-
793 //obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
794 //obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));
795 
796     delete c;
797 }
798 
799 
800 // @bug 4124632
801 //
802 // Collator::getCollationKey was hanging on certain character sequences
803 //
Test4124632()804 void CollationRegressionTest::Test4124632(/* char* par */)
805 {
806     UErrorCode status = U_ZERO_ERROR;
807     Collator *coll = NULL;
808 
809     coll = Collator::createInstance(Locale::getJapan(), status);
810 
811     if (coll == NULL || U_FAILURE(status))
812     {
813         errln("Failed to create collator for Locale::JAPAN");
814         delete coll;
815     }
816 
817     static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
818     CollationKey key;
819 
820     coll->getCollationKey(test, key, status);
821 
822     if (key.isBogus() || U_FAILURE(status))
823     {
824         errln("CollationKey creation failed.");
825     }
826 
827     delete coll;
828 }
829 
830 // @bug 4132736
831 //
832 // sort order of french words with multiple accents has errors
833 //
Test4132736()834 void CollationRegressionTest::Test4132736(/* char* par */)
835 {
836     UErrorCode status = U_ZERO_ERROR;
837 
838     Collator *c = NULL;
839 
840     c = Collator::createInstance(Locale::getFrance(), status);
841     c->setStrength(Collator::TERTIARY);
842 
843     if (c == NULL || U_FAILURE(status))
844     {
845         errln("Failed to create a collator for Locale::getFrance()");
846         delete c;
847     }
848 
849     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
850     {
851         {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
852         {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
853     };
854 
855     compareArray(*c, test1, ARRAY_LENGTH(test1));
856 
857     delete c;
858 }
859 
860 // @bug 4133509
861 //
862 // The sorting using java.text.CollationKey is not in the exact order
863 //
Test4133509()864 void CollationRegressionTest::Test4133509(/* char* par */)
865 {
866     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
867     {
868         {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
869         {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
870         {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
871     };
872 
873     compareArray(*en_us, test1, ARRAY_LENGTH(test1));
874 }
875 
876 // @bug 4114077
877 //
878 // Collation with decomposition off doesn't work for Europe
879 //
Test4114077()880 void CollationRegressionTest::Test4114077(/* char* par */)
881 {
882     // Ensure that we get the same results with decomposition off
883     // as we do with it on....
884 
885     UErrorCode status = U_ZERO_ERROR;
886     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
887     c->setStrength(Collator::TERTIARY);
888 
889     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
890     {
891         {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
892         {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
893         {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
894         {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
895                                                 //   -> a, ring, acute
896         {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
897     };
898 
899     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
900     compareArray(*c, test1, ARRAY_LENGTH(test1));
901 
902     static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
903     {
904         {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
905     };
906 
907     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
908     compareArray(*c, test2, ARRAY_LENGTH(test2));
909 
910     delete c;
911 }
912 
913 // @bug 4141640
914 //
915 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
916 //
Test4141640()917 void CollationRegressionTest::Test4141640(/* char* par */)
918 {
919     //
920     // Rather than just creating a Swedish collator, we might as well
921     // try to instantiate one for every locale available on the system
922     // in order to prevent this sort of bug from cropping up in the future
923     //
924     UErrorCode status = U_ZERO_ERROR;
925     int32_t i, localeCount;
926     const Locale *locales = Locale::getAvailableLocales(localeCount);
927 
928     for (i = 0; i < localeCount; i += 1)
929     {
930         Collator *c = NULL;
931 
932         status = U_ZERO_ERROR;
933         c = Collator::createInstance(locales[i], status);
934 
935         if (c == NULL || U_FAILURE(status))
936         {
937             UnicodeString msg, localeName;
938 
939             msg += "Could not create collator for locale ";
940             msg += locales[i].getName();
941 
942             errln(msg);
943         }
944 
945         delete c;
946     }
947 }
948 
949 // @bug 4139572
950 //
951 // getCollationKey throws exception for spanish text
952 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
953 //
Test4139572()954 void CollationRegressionTest::Test4139572(/* char* par */)
955 {
956     //
957     // Code pasted straight from the bug report
958     // (and then translated to C++ ;-)
959     //
960     // create spanish locale and collator
961     UErrorCode status = U_ZERO_ERROR;
962     Locale l("es", "es");
963     Collator *col = NULL;
964 
965     col = Collator::createInstance(l, status);
966 
967     if (col == NULL || U_FAILURE(status))
968     {
969         errln("Failed to create a collator for es_es locale.");
970         delete col;
971         return;
972     }
973 
974     CollationKey key;
975 
976     // this spanish phrase kills it!
977     col->getCollationKey("Nombre De Objeto", key, status);
978 
979     if (key.isBogus() || U_FAILURE(status))
980     {
981         errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
982     }
983 
984     delete col;
985 }
986 /* HSYS : RuleBasedCollator::compare() performance enhancements
987           compare() does not create CollationElementIterator() anymore.*/
988 
989 class My4146160Collator : public RuleBasedCollator
990 {
991 public:
992     My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
993     ~My4146160Collator();
994 
995     CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
996 
997     CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
998 
999     static int32_t count;
1000 };
1001 
1002 int32_t My4146160Collator::count = 0;
1003 
My4146160Collator(RuleBasedCollator & rbc,UErrorCode & status)1004 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1005   : RuleBasedCollator(rbc.getRules(), status)
1006 {
1007 }
1008 
~My4146160Collator()1009 My4146160Collator::~My4146160Collator()
1010 {
1011 }
1012 
createCollationElementIterator(const UnicodeString & text) const1013 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1014 {
1015     count += 1;
1016     return RuleBasedCollator::createCollationElementIterator(text);
1017 }
1018 
createCollationElementIterator(const CharacterIterator & text) const1019 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1020 {
1021     count += 1;
1022     return RuleBasedCollator::createCollationElementIterator(text);
1023 }
1024 
1025 // @bug 4146160
1026 //
1027 // RuleBasedCollator doesn't use createCollationElementIterator internally
1028 //
Test4146160()1029 void CollationRegressionTest::Test4146160(/* char* par */)
1030 {
1031 #if 0
1032     //
1033     // Use a custom collator class whose createCollationElementIterator
1034     // methods increment a count....
1035     //
1036     UErrorCode status = U_ZERO_ERROR;
1037     CollationKey key;
1038 
1039     My4146160Collator::count = 0;
1040     My4146160Collator *mc = NULL;
1041 
1042     mc = new My4146160Collator(*en_us, status);
1043 
1044     if (mc == NULL || U_FAILURE(status))
1045     {
1046         errln("Failed to create a My4146160Collator.");
1047         delete mc;
1048         return;
1049     }
1050 
1051     mc->getCollationKey("1", key, status);
1052 
1053     if (key.isBogus() || U_FAILURE(status))
1054     {
1055         errln("Failure to get a CollationKey from a My4146160Collator.");
1056         delete mc;
1057         return;
1058     }
1059 
1060     if (My4146160Collator::count < 1)
1061     {
1062         errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1063     }
1064 
1065     My4146160Collator::count = 0;
1066     mc->compare("1", "2");
1067 
1068     if (My4146160Collator::count < 1)
1069     {
1070         errln("My4146160Collator::createtCollationElementIterator not called for compare");
1071     }
1072 
1073     delete mc;
1074 #endif
1075 }
compareArray(Collator & c,const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],int32_t testCount)1076 void CollationRegressionTest::compareArray(Collator &c,
1077                                            const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1078                                            int32_t testCount)
1079 {
1080     int32_t i;
1081     Collator::EComparisonResult expectedResult = Collator::EQUAL;
1082 
1083     for (i = 0; i < testCount; i += 3)
1084     {
1085         UnicodeString source(tests[i]);
1086         UnicodeString comparison(tests[i + 1]);
1087         UnicodeString target(tests[i + 2]);
1088 
1089         if (comparison == "<")
1090         {
1091             expectedResult = Collator::LESS;
1092         }
1093         else if (comparison == ">")
1094         {
1095             expectedResult = Collator::GREATER;
1096         }
1097         else if (comparison == "=")
1098         {
1099             expectedResult = Collator::EQUAL;
1100         }
1101         else
1102         {
1103             UnicodeString bogus1("Bogus comparison string \"");
1104             UnicodeString bogus2("\"");
1105             errln(bogus1 + comparison + bogus2);
1106         }
1107 
1108         Collator::EComparisonResult compareResult = c.compare(source, target);
1109 
1110         CollationKey sourceKey, targetKey;
1111         UErrorCode status = U_ZERO_ERROR;
1112 
1113         c.getCollationKey(source, sourceKey, status);
1114 
1115         if (U_FAILURE(status))
1116         {
1117             errln("Couldn't get collationKey for source");
1118             continue;
1119         }
1120 
1121         c.getCollationKey(target, targetKey, status);
1122 
1123         if (U_FAILURE(status))
1124         {
1125             errln("Couldn't get collationKey for target");
1126             continue;
1127         }
1128 
1129         Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1130 
1131         reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1132 
1133     }
1134 }
1135 
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)1136 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1137 {
1138     int32_t c1, c2, count = 0;
1139     UErrorCode status = U_ZERO_ERROR;
1140 
1141     do
1142     {
1143         c1 = i1.next(status);
1144         c2 = i2.next(status);
1145 
1146         if (c1 != c2)
1147         {
1148             UnicodeString msg, msg1("    ");
1149 
1150             msg += msg1 + count;
1151             msg += ": strength(0x";
1152             appendHex(c1, 8, msg);
1153             msg += ") != strength(0x";
1154             appendHex(c2, 8, msg);
1155             msg += ")";
1156 
1157             errln(msg);
1158             break;
1159         }
1160 
1161         count += 1;
1162     }
1163     while (c1 != CollationElementIterator::NULLORDER);
1164 }
1165 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1166 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1167 {
1168     if (exec)
1169     {
1170         logln("Collation Regression Tests: ");
1171     }
1172 
1173     if(en_us) {
1174       switch (index)
1175       {
1176           case  0: name = "Test4048446"; if (exec) Test4048446(/* par */); break;
1177           case  1: name = "Test4051866"; if (exec) Test4051866(/* par */); break;
1178           case  2: name = "Test4053636"; if (exec) Test4053636(/* par */); break;
1179           case  3: name = "Test4054238"; if (exec) Test4054238(/* par */); break;
1180           case  4: name = "Test4054734"; if (exec) Test4054734(/* par */); break;
1181           case  5: name = "Test4054736"; if (exec) Test4054736(/* par */); break;
1182           case  6: name = "Test4058613"; if (exec) Test4058613(/* par */); break;
1183           case  7: name = "Test4059820"; if (exec) Test4059820(/* par */); break;
1184           case  8: name = "Test4060154"; if (exec) Test4060154(/* par */); break;
1185           case  9: name = "Test4062418"; if (exec) Test4062418(/* par */); break;
1186           case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break;
1187           case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break;
1188           case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break;
1189           case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break;
1190           case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break;
1191           case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break;
1192           case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break;
1193           case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break;
1194           case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break;
1195           case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break;
1196           case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break;
1197           case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break;
1198           case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break;
1199           case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break;
1200           case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break;
1201           case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break;
1202           case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break;
1203           case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break;
1204           case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break;
1205           case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break;
1206           case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break;
1207           default: name = ""; break;
1208       }
1209     } else {
1210       errln("Class collator not instantiated");
1211       name = "";
1212     }
1213 }
1214 
1215 #endif /* #if !UCONFIG_NO_COLLATION */
1216