• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_COLLATION
10 
11 #include "unicode/coll.h"
12 #include "unicode/localpointer.h"
13 #include "unicode/tblcoll.h"
14 #include "unicode/unistr.h"
15 #include "unicode/sortkey.h"
16 #include "regcoll.h"
17 #include "sfwdchit.h"
18 #include "testutil.h"
19 #include "cmemory.h"
20 
21 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
22 
CollationRegressionTest()23 CollationRegressionTest::CollationRegressionTest()
24 {
25     UErrorCode status = U_ZERO_ERROR;
26 
27     en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
28     if(U_FAILURE(status)) {
29       delete en_us;
30       en_us = 0;
31       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
32       return;
33     }
34 }
35 
~CollationRegressionTest()36 CollationRegressionTest::~CollationRegressionTest()
37 {
38     delete en_us;
39 }
40 
41 
42     // @bug 4048446
43 //
44 // CollationElementIterator.reset() doesn't work
45 //
Test4048446()46 void CollationRegressionTest::Test4048446(/* char* par */)
47 {
48     const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
49     const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
50     CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
51     CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
52     UErrorCode status = U_ZERO_ERROR;
53 
54     if (i1 == NULL|| i2 == NULL)
55     {
56         errln("Could not create CollationElementIterator's");
57         delete i1;
58         delete i2;
59         return;
60     }
61 
62     while (i1->next(status) != CollationElementIterator::NULLORDER)
63     {
64         if (U_FAILURE(status))
65         {
66             errln("error calling next()");
67 
68             delete i1;
69             delete i2;
70             return;
71         }
72     }
73 
74     i1->reset();
75 
76     assertEqual(*i1, *i2);
77 
78     delete i1;
79     delete i2;
80 }
81 
82 // @bug 4051866
83 //
84 // Collator -> rules -> Collator round-trip broken for expanding characters
85 //
Test4051866()86 void CollationRegressionTest::Test4051866(/* char* par */)
87 {
88     UnicodeString rules;
89     UErrorCode status = U_ZERO_ERROR;
90 
91     rules += "&n < o ";
92     rules += "& oe ,o";
93     rules += (UChar)0x3080;
94     rules += "& oe ,";
95     rules += (UChar)0x1530;
96     rules += " ,O";
97     rules += "& OE ,O";
98     rules += (UChar)0x3080;
99     rules += "& OE ,";
100     rules += (UChar)0x1520;
101     rules += "< p ,P";
102 
103     // Build a collator containing expanding characters
104     LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status));
105 
106     // Build another using the rules from  the first
107     LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status));
108     if (U_FAILURE(status)) {
109         errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
110         return;
111     }
112 
113     // Make sure they're the same
114     if (!(c1->getRules() == c2->getRules()))
115     {
116         errln("Rules are not equal");
117     }
118 }
119 
120 // @bug 4053636
121 //
122 // Collator thinks "black-bird" == "black"
123 //
Test4053636()124 void CollationRegressionTest::Test4053636(/* char* par */)
125 {
126     if (en_us->equals("black_bird", "black"))
127     {
128         errln("black-bird == black");
129     }
130 }
131 
132 // @bug 4054238
133 //
134 // CollationElementIterator will not work correctly if the associated
135 // Collator object's mode is changed
136 //
Test4054238()137 void CollationRegressionTest::Test4054238(/* char* par */)
138 {
139     const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
140     const UnicodeString test3(chars3);
141     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
142 
143     // NOTE: The Java code uses en_us to create the CollationElementIterators
144     // but I'm pretty sure that's wrong, so I've changed this to use c.
145     UErrorCode status = U_ZERO_ERROR;
146     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
147     CollationElementIterator *i1 = c->createCollationElementIterator(test3);
148     delete i1;
149     delete c;
150 }
151 
152 // @bug 4054734
153 //
154 // Collator::IDENTICAL documented but not implemented
155 //
Test4054734()156 void CollationRegressionTest::Test4054734(/* char* par */)
157 {
158     /*
159         Here's the original Java:
160 
161         String[] decomp = {
162             "\u0001",   "<",    "\u0002",
163             "\u0001",   "=",    "\u0001",
164             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
165             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
166         };
167 
168         String[] nodecomp = {
169             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
170         };
171     */
172 
173     static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
174     {
175         {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
176         {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
177         {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
178         {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
179     };
180 
181 
182     UErrorCode status = U_ZERO_ERROR;
183     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
184 
185     c->setStrength(Collator::IDENTICAL);
186 
187     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
188     compareArray(*c, decomp, ARRAY_LENGTH(decomp));
189 
190     delete c;
191 }
192 
193 // @bug 4054736
194 //
195 // Full Decomposition mode not implemented
196 //
Test4054736()197 void CollationRegressionTest::Test4054736(/* char* par */)
198 {
199     UErrorCode status = U_ZERO_ERROR;
200     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
201 
202     c->setStrength(Collator::SECONDARY);
203     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
204 
205     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
206     {
207         {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
208     };
209 
210     compareArray(*c, tests, ARRAY_LENGTH(tests));
211 
212     delete c;
213 }
214 
215 // @bug 4058613
216 //
217 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
218 //
Test4058613()219 void CollationRegressionTest::Test4058613(/* char* par */)
220 {
221     // Creating a default collator doesn't work when Korean is the default
222     // locale
223 
224     Locale oldDefault = Locale::getDefault();
225     UErrorCode status = U_ZERO_ERROR;
226 
227     Locale::setDefault(Locale::getKorean(), status);
228 
229     if (U_FAILURE(status))
230     {
231         errln("Could not set default locale to Locale::KOREAN");
232         return;
233     }
234 
235     Collator *c = NULL;
236 
237     c = Collator::createInstance("en_US", status);
238 
239     if (c == NULL || U_FAILURE(status))
240     {
241         errln("Could not create a Korean collator");
242         Locale::setDefault(oldDefault, status);
243         delete c;
244         return;
245     }
246 
247     // Since the fix to this bug was to turn off decomposition for Korean collators,
248     // ensure that's what we got
249     if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
250     {
251       errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
252     }
253 
254     delete c;
255 
256     Locale::setDefault(oldDefault, status);
257 }
258 
259 // @bug 4059820
260 //
261 // RuleBasedCollator.getRules does not return the exact pattern as input
262 // for expanding character sequences
263 //
Test4059820()264 void CollationRegressionTest::Test4059820(/* char* par */)
265 {
266     UErrorCode status = U_ZERO_ERROR;
267 
268     RuleBasedCollator *c = NULL;
269     UnicodeString rules = "&9 < a < b , c/a < d < z";
270 
271     c = new RuleBasedCollator(rules, status);
272 
273     if (c == NULL || U_FAILURE(status))
274     {
275         errln("Failure building a collator.");
276         delete c;
277         return;
278     }
279 
280     if ( c->getRules().indexOf("c/a") == -1)
281     {
282         errln("returned rules do not contain 'c/a'");
283     }
284 
285     delete c;
286 }
287 
288 // @bug 4060154
289 //
290 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
291 //
Test4060154()292 void CollationRegressionTest::Test4060154(/* char* par */)
293 {
294     UErrorCode status = U_ZERO_ERROR;
295     UnicodeString rules;
296 
297     rules += "&f < g, G < h, H < i, I < j, J";
298     rules +=  " & H < ";
299     rules += (UChar)0x0131;
300     rules += ", ";
301     rules += (UChar)0x0130;
302     rules += ", i, I";
303 
304     RuleBasedCollator *c = NULL;
305 
306     c = new RuleBasedCollator(rules, status);
307 
308     if (c == NULL || U_FAILURE(status))
309     {
310         errln("failure building collator.");
311         delete c;
312         return;
313     }
314 
315     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
316 
317  /*
318     String[] tertiary = {
319         "A",        "<",    "B",
320         "H",        "<",    "\u0131",
321         "H",        "<",    "I",
322         "\u0131",   "<",    "\u0130",
323         "\u0130",   "<",    "i",
324         "\u0130",   ">",    "H",
325     };
326 */
327 
328     static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
329     {
330         {0x41, 0},    {0x3c, 0}, {0x42, 0},
331         {0x48, 0},    {0x3c, 0}, {0x0131, 0},
332         {0x48, 0},    {0x3c, 0}, {0x49, 0},
333         {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
334         {0x0130, 0}, {0x3c, 0}, {0x69, 0},
335         {0x0130, 0}, {0x3e, 0}, {0x48, 0}
336     };
337 
338     c->setStrength(Collator::TERTIARY);
339     compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
340 
341     /*
342     String[] secondary = {
343         "H",        "<",    "I",
344         "\u0131",   "=",    "\u0130",
345     };
346 */
347     static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
348     {
349         {0x48, 0},    {0x3c, 0}, {0x49, 0},
350         {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
351     };
352 
353     c->setStrength(Collator::PRIMARY);
354     compareArray(*c, secondary, ARRAY_LENGTH(secondary));
355 
356     delete c;
357 }
358 
359 // @bug 4062418
360 //
361 // Secondary/Tertiary comparison incorrect in French Secondary
362 //
Test4062418()363 void CollationRegressionTest::Test4062418(/* char* par */)
364 {
365     UErrorCode status = U_ZERO_ERROR;
366 
367     RuleBasedCollator *c = NULL;
368 
369     c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
370 
371     if (c == NULL || U_FAILURE(status))
372     {
373         errln("Failed to create collator for Locale::getCanadaFrench()");
374         delete c;
375         return;
376     }
377 
378     c->setStrength(Collator::SECONDARY);
379 
380 /*
381     String[] tests = {
382             "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
383     };
384 */
385     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
386     {
387         {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
388     };
389 
390     compareArray(*c, tests, ARRAY_LENGTH(tests));
391 
392     delete c;
393 }
394 
395 // @bug 4065540
396 //
397 // Collator::compare() method broken if either string contains spaces
398 //
Test4065540()399 void CollationRegressionTest::Test4065540(/* char* par */)
400 {
401     if (en_us->compare("abcd e", "abcd f") == 0)
402     {
403         errln("'abcd e' == 'abcd f'");
404     }
405 }
406 
407 // @bug 4066189
408 //
409 // Unicode characters need to be recursively decomposed to get the
410 // correct result. For example,
411 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
412 //
Test4066189()413 void CollationRegressionTest::Test4066189(/* char* par */)
414 {
415     static const UChar chars1[] = {0x1EB1, 0};
416     static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
417     const UnicodeString test1(chars1);
418     const UnicodeString test2(chars2);
419     UErrorCode status = U_ZERO_ERROR;
420 
421     // NOTE: The java code used en_us to create the
422     // CollationElementIterator's. I'm pretty sure that
423     // was wrong, so I've change the code to use c1 and c2
424     RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
425     c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
426     CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
427 
428     RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
429     c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
430     CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
431 
432     assertEqual(*i1, *i2);
433 
434     delete i2;
435     delete c2;
436     delete i1;
437     delete c1;
438 }
439 
440 // @bug 4066696
441 //
442 // French secondary collation checking at the end of compare iteration fails
443 //
Test4066696()444 void CollationRegressionTest::Test4066696(/* char* par */)
445 {
446     UErrorCode status = U_ZERO_ERROR;
447     RuleBasedCollator *c = NULL;
448 
449     c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
450 
451     if (c == NULL || U_FAILURE(status))
452     {
453         errln("Failure creating collator for Locale::getCanadaFrench()");
454         delete c;
455         return;
456     }
457 
458     c->setStrength(Collator::SECONDARY);
459 
460 /*
461     String[] tests = {
462         "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
463     };
464 
465   should be:
466 
467     String[] tests = {
468         "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
469     };
470 
471 */
472 
473     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
474     {
475         {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
476     };
477 
478     compareArray(*c, tests, ARRAY_LENGTH(tests));
479 
480     delete c;
481 }
482 
483 // @bug 4076676
484 //
485 // Bad canonicalization of same-class combining characters
486 //
Test4076676()487 void CollationRegressionTest::Test4076676(/* char* par */)
488 {
489     // These combining characters are all in the same class, so they should not
490     // be reordered, and they should compare as unequal.
491     static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
492     static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
493 
494     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
495     c->setStrength(Collator::TERTIARY);
496 
497     if (c->compare(s1,s2) == 0)
498     {
499         errln("Same-class combining chars were reordered");
500     }
501 
502     delete c;
503 }
504 
505 // @bug 4079231
506 //
507 // RuleBasedCollator::operator==(NULL) throws NullPointerException
508 //
Test4079231()509 void CollationRegressionTest::Test4079231(/* char* par */)
510 {
511     // I don't think there's any way to write this test
512     // in C++. The following is equivalent to the Java,
513     // but doesn't compile 'cause NULL can't be converted
514     // to Collator&
515     //
516     // if (en_us->operator==(NULL))
517     // {
518     //     errln("en_us->operator==(NULL) returned TRUE");
519     // }
520 
521  /*
522    try {
523         if (en_us->equals(null)) {
524             errln("en_us->equals(null) returned true");
525         }
526     }
527     catch (Exception e) {
528         errln("en_us->equals(null) threw " + e.toString());
529     }
530 */
531 }
532 
533 // @bug 4078588
534 //
535 // RuleBasedCollator breaks on "< a < bb" rule
536 //
Test4078588()537 void CollationRegressionTest::Test4078588(/* char *par */)
538 {
539     UErrorCode status = U_ZERO_ERROR;
540     RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
541 
542     if (rbc == NULL || U_FAILURE(status))
543     {
544         errln("Failed to create RuleBasedCollator.");
545         delete rbc;
546         return;
547     }
548 
549     Collator::EComparisonResult result = rbc->compare("a","bb");
550 
551     if (result != Collator::LESS)
552     {
553         errln((UnicodeString)"Compare(a,bb) returned " + (int)result
554             + (UnicodeString)"; expected -1");
555     }
556 
557     delete rbc;
558 }
559 
560 // @bug 4081866
561 //
562 // Combining characters in different classes not reordered properly.
563 //
Test4081866()564 void CollationRegressionTest::Test4081866(/* char* par */)
565 {
566     // These combining characters are all in different classes,
567     // so they should be reordered and the strings should compare as equal.
568     static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
569     static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
570 
571     UErrorCode status = U_ZERO_ERROR;
572     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
573     c->setStrength(Collator::TERTIARY);
574 
575     // Now that the default collators are set to NO_DECOMPOSITION
576     // (as a result of fixing bug 4114077), we must set it explicitly
577     // when we're testing reordering behavior.  -- lwerner, 5/5/98
578     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
579 
580     if (c->compare(s1,s2) != 0)
581     {
582         errln("Combining chars were not reordered");
583     }
584 
585     delete c;
586 }
587 
588 // @bug 4087241
589 //
590 // string comparison errors in Scandinavian collators
591 //
Test4087241()592 void CollationRegressionTest::Test4087241(/* char* par */)
593 {
594     UErrorCode status = U_ZERO_ERROR;
595     Locale da_DK("da", "DK");
596     RuleBasedCollator *c = NULL;
597 
598     c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
599 
600     if (c == NULL || U_FAILURE(status))
601     {
602         errln("Failed to create collator for da_DK locale");
603         delete c;
604         return;
605     }
606 
607     c->setStrength(Collator::SECONDARY);
608 
609     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
610     {
611         {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
612         {0x61, 0x0308, 0},  {0x3c, 0}, {0x61, 0x030A, 0},      // a-umlaut < a-ring
613         {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
614     };
615 
616     compareArray(*c, tests, ARRAY_LENGTH(tests));
617 
618     delete c;
619 }
620 
621 // @bug 4087243
622 //
623 // CollationKey takes ignorable strings into account when it shouldn't
624 //
Test4087243()625 void CollationRegressionTest::Test4087243(/* char* par */)
626 {
627     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
628     c->setStrength(Collator::TERTIARY);
629 
630     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
631     {
632         {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
633     };
634 
635     compareArray(*c, tests, ARRAY_LENGTH(tests));
636 
637     delete c;
638 }
639 
640 // @bug 4092260
641 //
642 // Mu/micro conflict
643 // Micro symbol and greek lowercase letter Mu should sort identically
644 //
Test4092260()645 void CollationRegressionTest::Test4092260(/* char* par */)
646 {
647     UErrorCode status = U_ZERO_ERROR;
648     Locale el("el", "");
649     Collator *c = NULL;
650 
651     c = Collator::createInstance(el, status);
652 
653     if (c == NULL || U_FAILURE(status))
654     {
655         errln("Failed to create collator for el locale.");
656         delete c;
657         return;
658     }
659 
660     // These now have tertiary differences in UCA
661     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
662 
663     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
664     {
665         {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
666     };
667 
668     compareArray(*c, tests, ARRAY_LENGTH(tests));
669 
670     delete c;
671 }
672 
673 // @bug 4095316
674 //
Test4095316()675 void CollationRegressionTest::Test4095316(/* char* par */)
676 {
677     UErrorCode status = U_ZERO_ERROR;
678     Locale el_GR("el", "GR");
679     Collator *c = Collator::createInstance(el_GR, status);
680 
681     if (c == NULL || U_FAILURE(status))
682     {
683         errln("Failed to create collator for el_GR locale");
684         delete c;
685         return;
686     }
687     // These now have tertiary differences in UCA
688     //c->setStrength(Collator::TERTIARY);
689     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
690 
691     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
692     {
693         {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
694     };
695 
696     compareArray(*c, tests, ARRAY_LENGTH(tests));
697 
698     delete c;
699 }
700 
701 // @bug 4101940
702 //
Test4101940()703 void CollationRegressionTest::Test4101940(/* char* par */)
704 {
705     UErrorCode status = U_ZERO_ERROR;
706     RuleBasedCollator *c = NULL;
707     UnicodeString rules = "&9 < a < b";
708     UnicodeString nothing = "";
709 
710     c = new RuleBasedCollator(rules, status);
711 
712     if (c == NULL || U_FAILURE(status))
713     {
714         errln("Failed to create RuleBasedCollator");
715         delete c;
716         return;
717     }
718 
719     CollationElementIterator *i = c->createCollationElementIterator(nothing);
720     i->reset();
721 
722     if (i->next(status) != CollationElementIterator::NULLORDER)
723     {
724         errln("next did not return NULLORDER");
725     }
726 
727     delete i;
728     delete c;
729 }
730 
731 // @bug 4103436
732 //
733 // Collator::compare not handling spaces properly
734 //
Test4103436()735 void CollationRegressionTest::Test4103436(/* char* par */)
736 {
737     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
738     c->setStrength(Collator::TERTIARY);
739 
740     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
741     {
742         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
743         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
744     };
745 
746     compareArray(*c, tests, ARRAY_LENGTH(tests));
747 
748     delete c;
749 }
750 
751 // @bug 4114076
752 //
753 // Collation not Unicode conformant with Hangul syllables
754 //
Test4114076()755 void CollationRegressionTest::Test4114076(/* char* par */)
756 {
757     UErrorCode status = U_ZERO_ERROR;
758     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
759     c->setStrength(Collator::TERTIARY);
760 
761     //
762     // With Canonical decomposition, Hangul syllables should get decomposed
763     // into Jamo, but Jamo characters should not be decomposed into
764     // conjoining Jamo
765     //
766     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
767     {
768         {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
769     };
770 
771     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
772     compareArray(*c, test1, ARRAY_LENGTH(test1));
773 
774     // From UTR #15:
775     // *In earlier versions of Unicode, jamo characters like ksf
776     //  had compatibility mappings to kf + sf. These mappings were
777     //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
778     // That is, the following test is obsolete as of 2.1.9
779 
780 //obsolete-    // With Full decomposition, it should go all the way down to
781 //obsolete-    // conjoining Jamo characters.
782 //obsolete-    //
783 //obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
784 //obsolete-    {
785 //obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
786 //obsolete-    };
787 //obsolete-
788 //obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
789 //obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));
790 
791     delete c;
792 }
793 
794 
795 // @bug 4124632
796 //
797 // Collator::getCollationKey was hanging on certain character sequences
798 //
Test4124632()799 void CollationRegressionTest::Test4124632(/* char* par */)
800 {
801     UErrorCode status = U_ZERO_ERROR;
802     Collator *coll = NULL;
803 
804     coll = Collator::createInstance(Locale::getJapan(), status);
805 
806     if (coll == NULL || U_FAILURE(status))
807     {
808         errln("Failed to create collator for Locale::JAPAN");
809         delete coll;
810         return;
811     }
812 
813     static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
814     CollationKey key;
815 
816     coll->getCollationKey(test, key, status);
817 
818     if (key.isBogus() || U_FAILURE(status))
819     {
820         errln("CollationKey creation failed.");
821     }
822 
823     delete coll;
824 }
825 
826 // @bug 4132736
827 //
828 // sort order of french words with multiple accents has errors
829 //
Test4132736()830 void CollationRegressionTest::Test4132736(/* char* par */)
831 {
832     UErrorCode status = U_ZERO_ERROR;
833 
834     Collator *c = NULL;
835 
836     c = Collator::createInstance(Locale::getCanadaFrench(), status);
837     c->setStrength(Collator::TERTIARY);
838 
839     if (c == NULL || U_FAILURE(status))
840     {
841         errln("Failed to create a collator for Locale::getCanadaFrench()");
842         delete c;
843         return;
844     }
845 
846     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
847     {
848         {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
849         {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
850     };
851 
852     compareArray(*c, test1, ARRAY_LENGTH(test1));
853 
854     delete c;
855 }
856 
857 // @bug 4133509
858 //
859 // The sorting using java.text.CollationKey is not in the exact order
860 //
Test4133509()861 void CollationRegressionTest::Test4133509(/* char* par */)
862 {
863     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
864     {
865         {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
866         {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
867         {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
868     };
869 
870     compareArray(*en_us, test1, ARRAY_LENGTH(test1));
871 }
872 
873 // @bug 4114077
874 //
875 // Collation with decomposition off doesn't work for Europe
876 //
Test4114077()877 void CollationRegressionTest::Test4114077(/* char* par */)
878 {
879     // Ensure that we get the same results with decomposition off
880     // as we do with it on....
881 
882     UErrorCode status = U_ZERO_ERROR;
883     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
884     c->setStrength(Collator::TERTIARY);
885 
886     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
887     {
888         {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
889         {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
890         {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
891         {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
892                                                 //   -> a, ring, acute
893         {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
894     };
895 
896     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
897     compareArray(*c, test1, ARRAY_LENGTH(test1));
898 
899     static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
900     {
901         {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
902     };
903 
904     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
905     compareArray(*c, test2, ARRAY_LENGTH(test2));
906 
907     delete c;
908 }
909 
910 // @bug 4141640
911 //
912 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
913 //
Test4141640()914 void CollationRegressionTest::Test4141640(/* char* par */)
915 {
916     //
917     // Rather than just creating a Swedish collator, we might as well
918     // try to instantiate one for every locale available on the system
919     // in order to prevent this sort of bug from cropping up in the future
920     //
921     UErrorCode status = U_ZERO_ERROR;
922     int32_t i, localeCount;
923     const Locale *locales = Locale::getAvailableLocales(localeCount);
924 
925     for (i = 0; i < localeCount; i += 1)
926     {
927         Collator *c = NULL;
928 
929         status = U_ZERO_ERROR;
930         c = Collator::createInstance(locales[i], status);
931 
932         if (c == NULL || U_FAILURE(status))
933         {
934             UnicodeString msg, localeName;
935 
936             msg += "Could not create collator for locale ";
937             msg += locales[i].getName();
938 
939             errln(msg);
940         }
941 
942         delete c;
943     }
944 }
945 
946 // @bug 4139572
947 //
948 // getCollationKey throws exception for spanish text
949 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
950 //
Test4139572()951 void CollationRegressionTest::Test4139572(/* char* par */)
952 {
953     //
954     // Code pasted straight from the bug report
955     // (and then translated to C++ ;-)
956     //
957     // create spanish locale and collator
958     UErrorCode status = U_ZERO_ERROR;
959     Locale l("es", "es");
960     Collator *col = NULL;
961 
962     col = Collator::createInstance(l, status);
963 
964     if (col == NULL || U_FAILURE(status))
965     {
966         errln("Failed to create a collator for es_es locale.");
967         delete col;
968         return;
969     }
970 
971     CollationKey key;
972 
973     // this spanish phrase kills it!
974     col->getCollationKey("Nombre De Objeto", key, status);
975 
976     if (key.isBogus() || U_FAILURE(status))
977     {
978         errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
979     }
980 
981     delete col;
982 }
983 /* HSYS : RuleBasedCollator::compare() performance enhancements
984           compare() does not create CollationElementIterator() anymore.*/
985 
986 class My4146160Collator : public RuleBasedCollator
987 {
988 public:
989     My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
990     ~My4146160Collator();
991 
992     CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
993 
994     CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
995 
996     static int32_t count;
997 };
998 
999 int32_t My4146160Collator::count = 0;
1000 
My4146160Collator(RuleBasedCollator & rbc,UErrorCode & status)1001 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1002   : RuleBasedCollator(rbc.getRules(), status)
1003 {
1004 }
1005 
~My4146160Collator()1006 My4146160Collator::~My4146160Collator()
1007 {
1008 }
1009 
createCollationElementIterator(const UnicodeString & text) const1010 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1011 {
1012     count += 1;
1013     return RuleBasedCollator::createCollationElementIterator(text);
1014 }
1015 
createCollationElementIterator(const CharacterIterator & text) const1016 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1017 {
1018     count += 1;
1019     return RuleBasedCollator::createCollationElementIterator(text);
1020 }
1021 
1022 // @bug 4146160
1023 //
1024 // RuleBasedCollator doesn't use createCollationElementIterator internally
1025 //
Test4146160()1026 void CollationRegressionTest::Test4146160(/* char* par */)
1027 {
1028 #if 0
1029     //
1030     // Use a custom collator class whose createCollationElementIterator
1031     // methods increment a count....
1032     //
1033     UErrorCode status = U_ZERO_ERROR;
1034     CollationKey key;
1035 
1036     My4146160Collator::count = 0;
1037     My4146160Collator *mc = NULL;
1038 
1039     mc = new My4146160Collator(*en_us, status);
1040 
1041     if (mc == NULL || U_FAILURE(status))
1042     {
1043         errln("Failed to create a My4146160Collator.");
1044         delete mc;
1045         return;
1046     }
1047 
1048     mc->getCollationKey("1", key, status);
1049 
1050     if (key.isBogus() || U_FAILURE(status))
1051     {
1052         errln("Failure to get a CollationKey from a My4146160Collator.");
1053         delete mc;
1054         return;
1055     }
1056 
1057     if (My4146160Collator::count < 1)
1058     {
1059         errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1060     }
1061 
1062     My4146160Collator::count = 0;
1063     mc->compare("1", "2");
1064 
1065     if (My4146160Collator::count < 1)
1066     {
1067         errln("My4146160Collator::createtCollationElementIterator not called for compare");
1068     }
1069 
1070     delete mc;
1071 #endif
1072 }
1073 
Test4179216()1074 void CollationRegressionTest::Test4179216() {
1075     // you can position a CollationElementIterator in the middle of
1076     // a contracting character sequence, yielding a bogus collation
1077     // element
1078     IcuTestErrorCode errorCode(*this, "Test4179216");
1079     RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
1080     UnicodeString testText = "church church catcatcher runcrunchynchy";
1081     CollationElementIterator *iter = coll.createCollationElementIterator(testText);
1082 
1083     // test that the "ch" combination works properly
1084     iter->setOffset(4, errorCode);
1085     int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1086 
1087     iter->reset();
1088     int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1089 
1090     iter->setOffset(5, errorCode);
1091     int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1092 
1093     // Compares and prints only 16-bit primary weights.
1094     if (elt4 != elt0 || elt5 != elt0) {
1095         errln("The collation elements at positions 0 (0x%04x), "
1096                 "4 (0x%04x), and 5 (0x%04x) don't match.",
1097                 elt0, elt4, elt5);
1098     }
1099 
1100     // test that the "cat" combination works properly
1101     iter->setOffset(14, errorCode);
1102     int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1103 
1104     iter->setOffset(15, errorCode);
1105     int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1106 
1107     iter->setOffset(16, errorCode);
1108     int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1109 
1110     iter->setOffset(17, errorCode);
1111     int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1112 
1113     iter->setOffset(18, errorCode);
1114     int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1115 
1116     iter->setOffset(19, errorCode);
1117     int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1118 
1119     // Compares and prints only 16-bit primary weights.
1120     if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
1121             || elt14 != elt18 || elt14 != elt19) {
1122         errln("\"cat\" elements don't match: elt14 = 0x%04x, "
1123                 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
1124                 "elt18 = 0x%04x, elt19 = 0x%04x",
1125                 elt14, elt15, elt16, elt17, elt18, elt19);
1126     }
1127 
1128     // now generate a complete list of the collation elements,
1129     // first using next() and then using setOffset(), and
1130     // make sure both interfaces return the same set of elements
1131     iter->reset();
1132 
1133     int32_t elt = iter->next(errorCode);
1134     int32_t count = 0;
1135     while (elt != CollationElementIterator::NULLORDER) {
1136         ++count;
1137         elt = iter->next(errorCode);
1138     }
1139 
1140     LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
1141     LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
1142     int32_t lastPos = 0;
1143 
1144     iter->reset();
1145     elt = iter->next(errorCode);
1146     count = 0;
1147     while (elt != CollationElementIterator::NULLORDER) {
1148         nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1149         lastPos = iter->getOffset();
1150         elt = iter->next(errorCode);
1151     }
1152     int32_t nextElementsLength = count;
1153     count = 0;
1154     for (int32_t i = 0; i < testText.length(); ) {
1155         iter->setOffset(i, errorCode);
1156         lastPos = iter->getOffset();
1157         elt = iter->next(errorCode);
1158         setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1159         i = iter->getOffset();
1160     }
1161     for (int32_t i = 0; i < nextElementsLength; i++) {
1162         if (nextElements[i] == setOffsetElements[i]) {
1163             logln(nextElements[i]);
1164         } else {
1165             errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
1166                 ", but setOffset() yielded " + setOffsetElements[i]);
1167         }
1168     }
1169     delete iter;
1170 }
1171 
1172 // Ticket 7189
1173 //
1174 // nextSortKeyPart incorrect for EO_S1 collation
calcKeyIncremental(UCollator * coll,const UChar * text,int32_t len,uint8_t * keyBuf,int32_t,UErrorCode & status)1175 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1176     UCharIterator uiter;
1177     uint32_t state[2] = { 0, 0 };
1178     int32_t keyLen;
1179     int32_t count = 8;
1180 
1181     uiter_setString(&uiter, text, len);
1182     keyLen = 0;
1183     while (TRUE) {
1184         int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1185         if (U_FAILURE(status)) {
1186             return -1;
1187         }
1188         if (keyPartLen == 0) {
1189             break;
1190         }
1191         keyLen += keyPartLen;
1192     }
1193     return keyLen;
1194 }
1195 
TestT7189()1196 void CollationRegressionTest::TestT7189() {
1197     UErrorCode status = U_ZERO_ERROR;
1198     UCollator *coll;
1199     uint32_t i;
1200 
1201     static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1202     // "Achter De Hoven"
1203         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1204         // "ABC"
1205         { 0x41, 0x42, 0x43, 0x00 },
1206         // "HELLO world!"
1207         { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1208     };
1209 
1210     static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1211     // "Achter de Hoven"
1212         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1213         // "abc"
1214         { 0x61, 0x62, 0x63, 0x00 },
1215         // "hello world!"
1216         { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1217     };
1218 
1219     // Open the collator
1220     coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
1221     if (U_FAILURE(status)) {
1222         errln("Failed to create a collator for short string EO_S1");
1223         return;
1224     }
1225 
1226     for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
1227         uint8_t key1[100], key2[100];
1228         int32_t len1, len2;
1229 
1230         len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1231         if (U_FAILURE(status)) {
1232             errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1233             break;
1234         }
1235         len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1236         if (U_FAILURE(status)) {
1237             errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1238             break;
1239         }
1240 
1241         if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1242             errln(UnicodeString("Failed: Identical key\n") + "    text1: " + text1[i] + "\n" + "    text2: " + text2[i] + "\n" + "    key  : " + TestUtility::hex(key1, len1));
1243         } else {
1244             logln(UnicodeString("Keys produced -\n") + "    text1: " + text1[i] + "\n" + "    key1 : " + TestUtility::hex(key1, len1) + "\n" + "    text2: " + text2[i] + "\n" + "    key2 : "
1245                     + TestUtility::hex(key2, len2));
1246         }
1247     }
1248     ucol_close(coll);
1249 }
1250 
TestCaseFirstCompression()1251 void CollationRegressionTest::TestCaseFirstCompression() {
1252     RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone();
1253     UErrorCode status = U_ZERO_ERROR;
1254 
1255     // default
1256     caseFirstCompressionSub(col, "default");
1257 
1258     // Upper first
1259     col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
1260     if (U_FAILURE(status)) {
1261         errln("Failed to set UCOL_UPPER_FIRST");
1262         return;
1263     }
1264     caseFirstCompressionSub(col, "upper first");
1265 
1266     // Lower first
1267     col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
1268     if (U_FAILURE(status)) {
1269         errln("Failed to set UCOL_LOWER_FIRST");
1270         return;
1271     }
1272     caseFirstCompressionSub(col, "lower first");
1273 
1274     delete col;
1275 }
1276 
caseFirstCompressionSub(Collator * col,UnicodeString opt)1277 void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
1278     const int32_t maxLength = 50;
1279 
1280     UChar str1[maxLength];
1281     UChar str2[maxLength];
1282 
1283     CollationKey key1, key2;
1284 
1285     for (int32_t len = 1; len <= maxLength; len++) {
1286         int32_t i = 0;
1287         for (; i < len - 1; i++) {
1288             str1[i] = str2[i] = (UChar)0x61; // 'a'
1289         }
1290         str1[i] = (UChar)0x41; // 'A'
1291         str2[i] = (UChar)0x61; // 'a'
1292 
1293         UErrorCode status = U_ZERO_ERROR;
1294         col->getCollationKey(str1, len, key1, status);
1295         col->getCollationKey(str2, len, key2, status);
1296 
1297         UCollationResult cmpKey = key1.compareTo(key2, status);
1298         UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
1299 
1300         if (U_FAILURE(status)) {
1301             errln("Error in caseFirstCompressionSub");
1302         } else if (cmpKey != cmpCol) {
1303             errln((UnicodeString)"Inconsistent comparison(" + opt
1304                 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
1305                 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
1306         }
1307     }
1308 }
1309 
TestTrailingComment()1310 void CollationRegressionTest::TestTrailingComment() {
1311     // ICU ticket #8070:
1312     // Check that the rule parser handles a comment without terminating end-of-line.
1313     IcuTestErrorCode errorCode(*this, "TestTrailingComment");
1314     RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
1315     UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63);
1316     assertTrue("c<b", coll.compare(c, b) < 0);
1317     assertTrue("b<a", coll.compare(b, a) < 0);
1318 }
1319 
TestBeforeWithTooStrongAfter()1320 void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
1321     // ICU ticket #9959:
1322     // Forbid rules with a before-reset followed by a stronger relation.
1323     IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
1324     RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
1325     if(errorCode.isSuccess()) {
1326         errln("should forbid before-2-reset followed by primary relation");
1327     } else {
1328         errorCode.reset();
1329     }
1330     RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
1331     if(errorCode.isSuccess()) {
1332         errln("should forbid before-3-reset followed by primary or secondary relation");
1333     } else {
1334         errorCode.reset();
1335     }
1336 }
1337 
compareArray(Collator & c,const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],int32_t testCount)1338 void CollationRegressionTest::compareArray(Collator &c,
1339                                            const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1340                                            int32_t testCount)
1341 {
1342     int32_t i;
1343     Collator::EComparisonResult expectedResult = Collator::EQUAL;
1344 
1345     for (i = 0; i < testCount; i += 3)
1346     {
1347         UnicodeString source(tests[i]);
1348         UnicodeString comparison(tests[i + 1]);
1349         UnicodeString target(tests[i + 2]);
1350 
1351         if (comparison == "<")
1352         {
1353             expectedResult = Collator::LESS;
1354         }
1355         else if (comparison == ">")
1356         {
1357             expectedResult = Collator::GREATER;
1358         }
1359         else if (comparison == "=")
1360         {
1361             expectedResult = Collator::EQUAL;
1362         }
1363         else
1364         {
1365             UnicodeString bogus1("Bogus comparison string \"");
1366             UnicodeString bogus2("\"");
1367             errln(bogus1 + comparison + bogus2);
1368         }
1369 
1370         Collator::EComparisonResult compareResult = c.compare(source, target);
1371 
1372         CollationKey sourceKey, targetKey;
1373         UErrorCode status = U_ZERO_ERROR;
1374 
1375         c.getCollationKey(source, sourceKey, status);
1376 
1377         if (U_FAILURE(status))
1378         {
1379             errln("Couldn't get collationKey for source");
1380             continue;
1381         }
1382 
1383         c.getCollationKey(target, targetKey, status);
1384 
1385         if (U_FAILURE(status))
1386         {
1387             errln("Couldn't get collationKey for target");
1388             continue;
1389         }
1390 
1391         Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1392 
1393         reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1394 
1395     }
1396 }
1397 
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)1398 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1399 {
1400     int32_t c1, c2, count = 0;
1401     UErrorCode status = U_ZERO_ERROR;
1402 
1403     do
1404     {
1405         c1 = i1.next(status);
1406         c2 = i2.next(status);
1407 
1408         if (c1 != c2)
1409         {
1410             UnicodeString msg, msg1("    ");
1411 
1412             msg += msg1 + count;
1413             msg += ": strength(0x";
1414             appendHex(c1, 8, msg);
1415             msg += ") != strength(0x";
1416             appendHex(c2, 8, msg);
1417             msg += ")";
1418 
1419             errln(msg);
1420             break;
1421         }
1422 
1423         count += 1;
1424     }
1425     while (c1 != CollationElementIterator::NULLORDER);
1426 }
1427 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1428 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1429 {
1430     if (exec)
1431     {
1432         logln("Collation Regression Tests: ");
1433     }
1434 
1435     if(en_us == NULL) {
1436         dataerrln("Class collator not instantiated");
1437         name = "";
1438         return;
1439     }
1440     TESTCASE_AUTO_BEGIN;
1441     TESTCASE_AUTO(Test4048446);
1442     TESTCASE_AUTO(Test4051866);
1443     TESTCASE_AUTO(Test4053636);
1444     TESTCASE_AUTO(Test4054238);
1445     TESTCASE_AUTO(Test4054734);
1446     TESTCASE_AUTO(Test4054736);
1447     TESTCASE_AUTO(Test4058613);
1448     TESTCASE_AUTO(Test4059820);
1449     TESTCASE_AUTO(Test4060154);
1450     TESTCASE_AUTO(Test4062418);
1451     TESTCASE_AUTO(Test4065540);
1452     TESTCASE_AUTO(Test4066189);
1453     TESTCASE_AUTO(Test4066696);
1454     TESTCASE_AUTO(Test4076676);
1455     TESTCASE_AUTO(Test4078588);
1456     TESTCASE_AUTO(Test4079231);
1457     TESTCASE_AUTO(Test4081866);
1458     TESTCASE_AUTO(Test4087241);
1459     TESTCASE_AUTO(Test4087243);
1460     TESTCASE_AUTO(Test4092260);
1461     TESTCASE_AUTO(Test4095316);
1462     TESTCASE_AUTO(Test4101940);
1463     TESTCASE_AUTO(Test4103436);
1464     TESTCASE_AUTO(Test4114076);
1465     TESTCASE_AUTO(Test4114077);
1466     TESTCASE_AUTO(Test4124632);
1467     TESTCASE_AUTO(Test4132736);
1468     TESTCASE_AUTO(Test4133509);
1469     TESTCASE_AUTO(Test4139572);
1470     TESTCASE_AUTO(Test4141640);
1471     TESTCASE_AUTO(Test4146160);
1472     TESTCASE_AUTO(Test4179216);
1473     TESTCASE_AUTO(TestT7189);
1474     TESTCASE_AUTO(TestCaseFirstCompression);
1475     TESTCASE_AUTO(TestTrailingComment);
1476     TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
1477     TESTCASE_AUTO_END;
1478 }
1479 
1480 #endif /* #if !UCONFIG_NO_COLLATION */
1481