• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 #include "unicode/utypes.h"
10 
11 #if !UCONFIG_NO_COLLATION
12 
13 #include "unicode/coll.h"
14 #include "unicode/localpointer.h"
15 #include "unicode/tblcoll.h"
16 #include "unicode/unistr.h"
17 #include "unicode/sortkey.h"
18 #include "regcoll.h"
19 #include "sfwdchit.h"
20 #include "testutil.h"
21 #include "cmemory.h"
22 
CollationRegressionTest()23 CollationRegressionTest::CollationRegressionTest()
24 {
25     UErrorCode status = U_ZERO_ERROR;
26 
27     en_us = dynamic_cast<RuleBasedCollator*>(Collator::createInstance(Locale::getUS(), status));
28     if(U_FAILURE(status)) {
29       delete en_us;
30       en_us = nullptr;
31       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
32       return;
33     }
34 }
35 
~CollationRegressionTest()36 CollationRegressionTest::~CollationRegressionTest()
37 {
38     delete en_us;
39 }
40 
41 
42     // @bug 4048446
43 //
44 // CollationElementIterator.reset() doesn't work
45 //
Test4048446()46 void CollationRegressionTest::Test4048446(/* char* par */)
47 {
48     const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
49     const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
50     CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
51     CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
52     UErrorCode status = U_ZERO_ERROR;
53 
54     if (i1 == nullptr|| i2 == nullptr)
55     {
56         errln("Could not create CollationElementIterator's");
57         delete i1;
58         delete i2;
59         return;
60     }
61 
62     while (i1->next(status) != CollationElementIterator::NULLORDER)
63     {
64         if (U_FAILURE(status))
65         {
66             errln("error calling next()");
67 
68             delete i1;
69             delete i2;
70             return;
71         }
72     }
73 
74     i1->reset();
75 
76     assertEqual(*i1, *i2);
77 
78     delete i1;
79     delete i2;
80 }
81 
82 // @bug 4051866
83 //
84 // Collator -> rules -> Collator round-trip broken for expanding characters
85 //
Test4051866()86 void CollationRegressionTest::Test4051866(/* char* par */)
87 {
88     UnicodeString rules;
89     UErrorCode status = U_ZERO_ERROR;
90 
91     rules += "&n < o ";
92     rules += "& oe ,o";
93     rules += (char16_t)0x3080;
94     rules += "& oe ,";
95     rules += (char16_t)0x1530;
96     rules += " ,O";
97     rules += "& OE ,O";
98     rules += (char16_t)0x3080;
99     rules += "& OE ,";
100     rules += (char16_t)0x1520;
101     rules += "< p ,P";
102 
103     // Build a collator containing expanding characters
104     LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), status);
105     if (U_FAILURE(status)) {
106         errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
107         return;
108     }
109 
110     // Build another using the rules from  the first
111     LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status), status);
112     if (U_FAILURE(status)) {
113         errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_errorName(status));
114         return;
115     }
116 
117     // Make sure they're the same
118     if (!(c1->getRules() == c2->getRules()))
119     {
120         errln("Rules are not equal");
121     }
122 }
123 
124 // @bug 4053636
125 //
126 // Collator thinks "black-bird" == "black"
127 //
Test4053636()128 void CollationRegressionTest::Test4053636(/* char* par */)
129 {
130     if (en_us->equals("black_bird", "black"))
131     {
132         errln("black-bird == black");
133     }
134 }
135 
136 // @bug 4054238
137 //
138 // CollationElementIterator will not work correctly if the associated
139 // Collator object's mode is changed
140 //
Test4054238()141 void CollationRegressionTest::Test4054238(/* char* par */)
142 {
143     const char16_t chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
144     const UnicodeString test3(chars3);
145     RuleBasedCollator *c = en_us->clone();
146 
147     // NOTE: The Java code uses en_us to create the CollationElementIterators
148     // but I'm pretty sure that's wrong, so I've changed this to use c.
149     UErrorCode status = U_ZERO_ERROR;
150     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
151     CollationElementIterator *i1 = c->createCollationElementIterator(test3);
152     delete i1;
153     delete c;
154 }
155 
156 // @bug 4054734
157 //
158 // Collator::IDENTICAL documented but not implemented
159 //
Test4054734()160 void CollationRegressionTest::Test4054734(/* char* par */)
161 {
162     /*
163         Here's the original Java:
164 
165         String[] decomp = {
166             "\u0001",   "<",    "\u0002",
167             "\u0001",   "=",    "\u0001",
168             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
169             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
170         };
171 
172         String[] nodecomp = {
173             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
174         };
175     */
176 
177     static const char16_t decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
178     {
179         {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
180         {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
181         {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
182         {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
183     };
184 
185 
186     UErrorCode status = U_ZERO_ERROR;
187     RuleBasedCollator *c = en_us->clone();
188 
189     c->setStrength(Collator::IDENTICAL);
190 
191     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
192     compareArray(*c, decomp, UPRV_LENGTHOF(decomp));
193 
194     delete c;
195 }
196 
197 // @bug 4054736
198 //
199 // Full Decomposition mode not implemented
200 //
Test4054736()201 void CollationRegressionTest::Test4054736(/* char* par */)
202 {
203     UErrorCode status = U_ZERO_ERROR;
204     RuleBasedCollator *c = en_us->clone();
205 
206     c->setStrength(Collator::SECONDARY);
207     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
208 
209     static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
210     {
211         {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
212     };
213 
214     compareArray(*c, tests, UPRV_LENGTHOF(tests));
215 
216     delete c;
217 }
218 
219 // @bug 4058613
220 //
221 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
222 //
Test4058613()223 void CollationRegressionTest::Test4058613(/* char* par */)
224 {
225     // Creating a default collator doesn't work when Korean is the default
226     // locale
227 
228     Locale oldDefault = Locale::getDefault();
229     UErrorCode status = U_ZERO_ERROR;
230 
231     Locale::setDefault(Locale::getKorean(), status);
232 
233     if (U_FAILURE(status))
234     {
235         errln("Could not set default locale to Locale::KOREAN");
236         return;
237     }
238 
239     Collator *c = nullptr;
240 
241     c = Collator::createInstance("en_US", status);
242 
243     if (c == nullptr || U_FAILURE(status))
244     {
245         errln("Could not create a Korean collator");
246         Locale::setDefault(oldDefault, status);
247         delete c;
248         return;
249     }
250 
251     // Since the fix to this bug was to turn off decomposition for Korean collators,
252     // ensure that's what we got
253     if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
254     {
255       errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
256     }
257 
258     delete c;
259 
260     Locale::setDefault(oldDefault, status);
261 }
262 
263 // @bug 4059820
264 //
265 // RuleBasedCollator.getRules does not return the exact pattern as input
266 // for expanding character sequences
267 //
Test4059820()268 void CollationRegressionTest::Test4059820(/* char* par */)
269 {
270     UErrorCode status = U_ZERO_ERROR;
271 
272     RuleBasedCollator *c = nullptr;
273     UnicodeString rules = "&9 < a < b , c/a < d < z";
274 
275     c = new RuleBasedCollator(rules, status);
276 
277     if (c == nullptr || U_FAILURE(status))
278     {
279         errln("Failure building a collator.");
280         delete c;
281         return;
282     }
283 
284     if ( c->getRules().indexOf("c/a") == -1)
285     {
286         errln("returned rules do not contain 'c/a'");
287     }
288 
289     delete c;
290 }
291 
292 // @bug 4060154
293 //
294 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
295 //
Test4060154()296 void CollationRegressionTest::Test4060154(/* char* par */)
297 {
298     UErrorCode status = U_ZERO_ERROR;
299     UnicodeString rules;
300 
301     rules += "&f < g, G < h, H < i, I < j, J";
302     rules +=  " & H < ";
303     rules += (char16_t)0x0131;
304     rules += ", ";
305     rules += (char16_t)0x0130;
306     rules += ", i, I";
307 
308     RuleBasedCollator *c = nullptr;
309 
310     c = new RuleBasedCollator(rules, status);
311 
312     if (c == nullptr || U_FAILURE(status))
313     {
314         errln("failure building collator.");
315         delete c;
316         return;
317     }
318 
319     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
320 
321  /*
322     String[] tertiary = {
323         "A",        "<",    "B",
324         "H",        "<",    "\u0131",
325         "H",        "<",    "I",
326         "\u0131",   "<",    "\u0130",
327         "\u0130",   "<",    "i",
328         "\u0130",   ">",    "H",
329     };
330 */
331 
332     static const char16_t tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
333     {
334         {0x41, 0},    {0x3c, 0}, {0x42, 0},
335         {0x48, 0},    {0x3c, 0}, {0x0131, 0},
336         {0x48, 0},    {0x3c, 0}, {0x49, 0},
337         {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
338         {0x0130, 0}, {0x3c, 0}, {0x69, 0},
339         {0x0130, 0}, {0x3e, 0}, {0x48, 0}
340     };
341 
342     c->setStrength(Collator::TERTIARY);
343     compareArray(*c, tertiary, UPRV_LENGTHOF(tertiary));
344 
345     /*
346     String[] secondary = {
347         "H",        "<",    "I",
348         "\u0131",   "=",    "\u0130",
349     };
350 */
351     static const char16_t secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
352     {
353         {0x48, 0},    {0x3c, 0}, {0x49, 0},
354         {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
355     };
356 
357     c->setStrength(Collator::PRIMARY);
358     compareArray(*c, secondary, UPRV_LENGTHOF(secondary));
359 
360     delete c;
361 }
362 
363 // @bug 4062418
364 //
365 // Secondary/Tertiary comparison incorrect in French Secondary
366 //
Test4062418()367 void CollationRegressionTest::Test4062418(/* char* par */)
368 {
369     UErrorCode status = U_ZERO_ERROR;
370 
371     RuleBasedCollator *c = nullptr;
372 
373     c = dynamic_cast<RuleBasedCollator*>(Collator::createInstance(Locale::getCanadaFrench(), status));
374 
375     if (c == nullptr || U_FAILURE(status))
376     {
377         errln("Failed to create collator for Locale::getCanadaFrench()");
378         delete c;
379         return;
380     }
381 
382     c->setStrength(Collator::SECONDARY);
383 
384 /*
385     String[] tests = {
386             "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
387     };
388 */
389     static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
390     {
391         {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
392     };
393 
394     compareArray(*c, tests, UPRV_LENGTHOF(tests));
395 
396     delete c;
397 }
398 
399 // @bug 4065540
400 //
401 // Collator::compare() method broken if either string contains spaces
402 //
Test4065540()403 void CollationRegressionTest::Test4065540(/* char* par */)
404 {
405     if (en_us->compare("abcd e", "abcd f") == 0)
406     {
407         errln("'abcd e' == 'abcd f'");
408     }
409 }
410 
411 // @bug 4066189
412 //
413 // Unicode characters need to be recursively decomposed to get the
414 // correct result. For example,
415 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
416 //
Test4066189()417 void CollationRegressionTest::Test4066189(/* char* par */)
418 {
419     static const char16_t chars1[] = {0x1EB1, 0};
420     static const char16_t chars2[] = {0x61, 0x0306, 0x0300, 0};
421     const UnicodeString test1(chars1);
422     const UnicodeString test2(chars2);
423     UErrorCode status = U_ZERO_ERROR;
424 
425     // NOTE: The java code used en_us to create the
426     // CollationElementIterator's. I'm pretty sure that
427     // was wrong, so I've change the code to use c1 and c2
428     RuleBasedCollator *c1 = en_us->clone();
429     c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
430     CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
431 
432     RuleBasedCollator *c2 = en_us->clone();
433     c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
434     CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
435 
436     assertEqual(*i1, *i2);
437 
438     delete i2;
439     delete c2;
440     delete i1;
441     delete c1;
442 }
443 
444 // @bug 4066696
445 //
446 // French secondary collation checking at the end of compare iteration fails
447 //
Test4066696()448 void CollationRegressionTest::Test4066696(/* char* par */)
449 {
450     UErrorCode status = U_ZERO_ERROR;
451     RuleBasedCollator *c = nullptr;
452 
453     c = dynamic_cast<RuleBasedCollator*>(Collator::createInstance(Locale::getCanadaFrench(), status));
454 
455     if (c == nullptr || U_FAILURE(status))
456     {
457         errln("Failure creating collator for Locale::getCanadaFrench()");
458         delete c;
459         return;
460     }
461 
462     c->setStrength(Collator::SECONDARY);
463 
464 /*
465     String[] tests = {
466         "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
467     };
468 
469   should be:
470 
471     String[] tests = {
472         "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
473     };
474 
475 */
476 
477     static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
478     {
479         {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
480     };
481 
482     compareArray(*c, tests, UPRV_LENGTHOF(tests));
483 
484     delete c;
485 }
486 
487 // @bug 4076676
488 //
489 // Bad canonicalization of same-class combining characters
490 //
Test4076676()491 void CollationRegressionTest::Test4076676(/* char* par */)
492 {
493     // These combining characters are all in the same class, so they should not
494     // be reordered, and they should compare as unequal.
495     static const char16_t s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
496     static const char16_t s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
497 
498     RuleBasedCollator *c = en_us->clone();
499     c->setStrength(Collator::TERTIARY);
500 
501     if (c->compare(s1,s2) == 0)
502     {
503         errln("Same-class combining chars were reordered");
504     }
505 
506     delete c;
507 }
508 
509 // @bug 4079231
510 //
511 // RuleBasedCollator::operator==(nullptr) throws NullPointerException
512 //
Test4079231()513 void CollationRegressionTest::Test4079231(/* char* par */)
514 {
515     // I don't think there's any way to write this test
516     // in C++. The following is equivalent to the Java,
517     // but doesn't compile 'cause nullptr can't be converted
518     // to Collator&
519     //
520     // if (en_us->operator==(nullptr))
521     // {
522     //     errln("en_us->operator==(nullptr) returned true");
523     // }
524 
525  /*
526    try {
527         if (en_us->equals(null)) {
528             errln("en_us->equals(null) returned true");
529         }
530     }
531     catch (Exception e) {
532         errln("en_us->equals(null) threw " + e.toString());
533     }
534 */
535 }
536 
537 // @bug 4078588
538 //
539 // RuleBasedCollator breaks on "< a < bb" rule
540 //
Test4078588()541 void CollationRegressionTest::Test4078588(/* char *par */)
542 {
543     UErrorCode status = U_ZERO_ERROR;
544     RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
545 
546     if (rbc == nullptr || U_FAILURE(status))
547     {
548         errln("Failed to create RuleBasedCollator.");
549         delete rbc;
550         return;
551     }
552 
553     Collator::EComparisonResult result = rbc->compare("a","bb");
554 
555     if (result != Collator::LESS)
556     {
557         errln((UnicodeString)"Compare(a,bb) returned " + (int)result
558             + (UnicodeString)"; expected -1");
559     }
560 
561     delete rbc;
562 }
563 
564 // @bug 4081866
565 //
566 // Combining characters in different classes not reordered properly.
567 //
Test4081866()568 void CollationRegressionTest::Test4081866(/* char* par */)
569 {
570     // These combining characters are all in different classes,
571     // so they should be reordered and the strings should compare as equal.
572     static const char16_t s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
573     static const char16_t s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
574 
575     UErrorCode status = U_ZERO_ERROR;
576     RuleBasedCollator *c = en_us->clone();
577     c->setStrength(Collator::TERTIARY);
578 
579     // Now that the default collators are set to NO_DECOMPOSITION
580     // (as a result of fixing bug 4114077), we must set it explicitly
581     // when we're testing reordering behavior.  -- lwerner, 5/5/98
582     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
583 
584     if (c->compare(s1,s2) != 0)
585     {
586         errln("Combining chars were not reordered");
587     }
588 
589     delete c;
590 }
591 
592 // @bug 4087241
593 //
594 // string comparison errors in Scandinavian collators
595 //
Test4087241()596 void CollationRegressionTest::Test4087241(/* char* par */)
597 {
598     UErrorCode status = U_ZERO_ERROR;
599     Locale da_DK("da", "DK");
600     RuleBasedCollator *c = nullptr;
601 
602     c = dynamic_cast<RuleBasedCollator*>(Collator::createInstance(da_DK, status));
603 
604     if (c == nullptr || U_FAILURE(status))
605     {
606         errln("Failed to create collator for da_DK locale");
607         delete c;
608         return;
609     }
610 
611     c->setStrength(Collator::SECONDARY);
612 
613     static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
614     {
615         {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
616         {0x61, 0x0308, 0},  {0x3c, 0}, {0x61, 0x030A, 0},      // a-umlaut < a-ring
617         {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
618     };
619 
620     compareArray(*c, tests, UPRV_LENGTHOF(tests));
621 
622     delete c;
623 }
624 
625 // @bug 4087243
626 //
627 // CollationKey takes ignorable strings into account when it shouldn't
628 //
Test4087243()629 void CollationRegressionTest::Test4087243(/* char* par */)
630 {
631     RuleBasedCollator *c = en_us->clone();
632     c->setStrength(Collator::TERTIARY);
633 
634     static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
635     {
636         {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
637     };
638 
639     compareArray(*c, tests, UPRV_LENGTHOF(tests));
640 
641     delete c;
642 }
643 
644 // @bug 4092260
645 //
646 // Mu/micro conflict
647 // Micro symbol and greek lowercase letter Mu should sort identically
648 //
Test4092260()649 void CollationRegressionTest::Test4092260(/* char* par */)
650 {
651     UErrorCode status = U_ZERO_ERROR;
652     Locale el("el", "");
653     Collator *c = nullptr;
654 
655     c = Collator::createInstance(el, status);
656 
657     if (c == nullptr || U_FAILURE(status))
658     {
659         errln("Failed to create collator for el locale.");
660         delete c;
661         return;
662     }
663 
664     // These now have tertiary differences in UCA
665     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
666 
667     static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
668     {
669         {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
670     };
671 
672     compareArray(*c, tests, UPRV_LENGTHOF(tests));
673 
674     delete c;
675 }
676 
677 // @bug 4095316
678 //
Test4095316()679 void CollationRegressionTest::Test4095316(/* char* par */)
680 {
681     UErrorCode status = U_ZERO_ERROR;
682     Locale el_GR("el", "GR");
683     Collator *c = Collator::createInstance(el_GR, status);
684 
685     if (c == nullptr || U_FAILURE(status))
686     {
687         errln("Failed to create collator for el_GR locale");
688         delete c;
689         return;
690     }
691     // These now have tertiary differences in UCA
692     //c->setStrength(Collator::TERTIARY);
693     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
694 
695     static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
696     {
697         {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
698     };
699 
700     compareArray(*c, tests, UPRV_LENGTHOF(tests));
701 
702     delete c;
703 }
704 
705 // @bug 4101940
706 //
Test4101940()707 void CollationRegressionTest::Test4101940(/* char* par */)
708 {
709     UErrorCode status = U_ZERO_ERROR;
710     RuleBasedCollator *c = nullptr;
711     UnicodeString rules = "&9 < a < b";
712     UnicodeString nothing = "";
713 
714     c = new RuleBasedCollator(rules, status);
715 
716     if (c == nullptr || U_FAILURE(status))
717     {
718         errln("Failed to create RuleBasedCollator");
719         delete c;
720         return;
721     }
722 
723     CollationElementIterator *i = c->createCollationElementIterator(nothing);
724     i->reset();
725 
726     if (i->next(status) != CollationElementIterator::NULLORDER)
727     {
728         errln("next did not return NULLORDER");
729     }
730 
731     delete i;
732     delete c;
733 }
734 
735 // @bug 4103436
736 //
737 // Collator::compare not handling spaces properly
738 //
Test4103436()739 void CollationRegressionTest::Test4103436(/* char* par */)
740 {
741     RuleBasedCollator *c = en_us->clone();
742     c->setStrength(Collator::TERTIARY);
743 
744     static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
745     {
746         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
747         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
748     };
749 
750     compareArray(*c, tests, UPRV_LENGTHOF(tests));
751 
752     delete c;
753 }
754 
755 // @bug 4114076
756 //
757 // Collation not Unicode conformant with Hangul syllables
758 //
Test4114076()759 void CollationRegressionTest::Test4114076(/* char* par */)
760 {
761     UErrorCode status = U_ZERO_ERROR;
762     RuleBasedCollator *c = en_us->clone();
763     c->setStrength(Collator::TERTIARY);
764 
765     //
766     // With Canonical decomposition, Hangul syllables should get decomposed
767     // into Jamo, but Jamo characters should not be decomposed into
768     // conjoining Jamo
769     //
770     static const char16_t test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
771     {
772         {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
773     };
774 
775     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
776     compareArray(*c, test1, UPRV_LENGTHOF(test1));
777 
778     // From UTR #15:
779     // *In earlier versions of Unicode, jamo characters like ksf
780     //  had compatibility mappings to kf + sf. These mappings were
781     //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
782     // That is, the following test is obsolete as of 2.1.9
783 
784 //obsolete-    // With Full decomposition, it should go all the way down to
785 //obsolete-    // conjoining Jamo characters.
786 //obsolete-    //
787 //obsolete-    static const char16_t test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
788 //obsolete-    {
789 //obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
790 //obsolete-    };
791 //obsolete-
792 //obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
793 //obsolete-    compareArray(*c, test2, UPRV_LENGTHOF(test2));
794 
795     delete c;
796 }
797 
798 
799 // @bug 4124632
800 //
801 // Collator::getCollationKey was hanging on certain character sequences
802 //
Test4124632()803 void CollationRegressionTest::Test4124632(/* char* par */)
804 {
805     UErrorCode status = U_ZERO_ERROR;
806     Collator *coll = nullptr;
807 
808     coll = Collator::createInstance(Locale::getJapan(), status);
809 
810     if (coll == nullptr || U_FAILURE(status))
811     {
812         errln("Failed to create collator for Locale::JAPAN");
813         delete coll;
814         return;
815     }
816 
817     static const char16_t test[] = {0x41, 0x0308, 0x62, 0x63, 0};
818     CollationKey key;
819 
820     coll->getCollationKey(test, key, status);
821 
822     if (key.isBogus() || U_FAILURE(status))
823     {
824         errln("CollationKey creation failed.");
825     }
826 
827     delete coll;
828 }
829 
830 // @bug 4132736
831 //
832 // sort order of french words with multiple accents has errors
833 //
Test4132736()834 void CollationRegressionTest::Test4132736(/* char* par */)
835 {
836     UErrorCode status = U_ZERO_ERROR;
837 
838     Collator *c = nullptr;
839 
840     c = Collator::createInstance(Locale::getCanadaFrench(), status);
841     c->setStrength(Collator::TERTIARY);
842 
843     if (c == nullptr || U_FAILURE(status))
844     {
845         errln("Failed to create a collator for Locale::getCanadaFrench()");
846         delete c;
847         return;
848     }
849 
850     static const char16_t test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
851     {
852         {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
853         {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
854     };
855 
856     compareArray(*c, test1, UPRV_LENGTHOF(test1));
857 
858     delete c;
859 }
860 
861 // @bug 4133509
862 //
863 // The sorting using java.text.CollationKey is not in the exact order
864 //
Test4133509()865 void CollationRegressionTest::Test4133509(/* char* par */)
866 {
867     static const char16_t test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
868     {
869         {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
870         {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
871         {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
872     };
873 
874     compareArray(*en_us, test1, UPRV_LENGTHOF(test1));
875 }
876 
877 // @bug 4114077
878 //
879 // Collation with decomposition off doesn't work for Europe
880 //
Test4114077()881 void CollationRegressionTest::Test4114077(/* char* par */)
882 {
883     // Ensure that we get the same results with decomposition off
884     // as we do with it on....
885 
886     UErrorCode status = U_ZERO_ERROR;
887     RuleBasedCollator *c = en_us->clone();
888     c->setStrength(Collator::TERTIARY);
889 
890     static const char16_t test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
891     {
892         {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
893         {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
894         {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
895         {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
896                                                 //   -> a, ring, acute
897         {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
898     };
899 
900     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
901     compareArray(*c, test1, UPRV_LENGTHOF(test1));
902 
903     static const char16_t test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
904     {
905         {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
906     };
907 
908     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
909     compareArray(*c, test2, UPRV_LENGTHOF(test2));
910 
911     delete c;
912 }
913 
914 // @bug 4141640
915 //
916 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
917 //
Test4141640()918 void CollationRegressionTest::Test4141640(/* char* par */)
919 {
920     //
921     // Rather than just creating a Swedish collator, we might as well
922     // try to instantiate one for every locale available on the system
923     // in order to prevent this sort of bug from cropping up in the future
924     //
925     UErrorCode status = U_ZERO_ERROR;
926     int32_t i, localeCount;
927     const Locale *locales = Locale::getAvailableLocales(localeCount);
928 
929     for (i = 0; i < localeCount; i += 1)
930     {
931         Collator *c = nullptr;
932 
933         status = U_ZERO_ERROR;
934         c = Collator::createInstance(locales[i], status);
935 
936         if (c == nullptr || U_FAILURE(status))
937         {
938             UnicodeString msg, localeName;
939 
940             msg += "Could not create collator for locale ";
941             msg += locales[i].getName();
942 
943             errln(msg);
944         }
945 
946         delete c;
947     }
948 }
949 
950 // @bug 4139572
951 //
952 // getCollationKey throws exception for spanish text
953 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
954 //
Test4139572()955 void CollationRegressionTest::Test4139572(/* char* par */)
956 {
957     //
958     // Code pasted straight from the bug report
959     // (and then translated to C++ ;-)
960     //
961     // create spanish locale and collator
962     UErrorCode status = U_ZERO_ERROR;
963     Locale l("es", "es");
964     Collator *col = nullptr;
965 
966     col = Collator::createInstance(l, status);
967 
968     if (col == nullptr || U_FAILURE(status))
969     {
970         errln("Failed to create a collator for es_es locale.");
971         delete col;
972         return;
973     }
974 
975     CollationKey key;
976 
977     // this spanish phrase kills it!
978     col->getCollationKey("Nombre De Objeto", key, status);
979 
980     if (key.isBogus() || U_FAILURE(status))
981     {
982         errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
983     }
984 
985     delete col;
986 }
987 
Test4179216()988 void CollationRegressionTest::Test4179216() {
989     // you can position a CollationElementIterator in the middle of
990     // a contracting character sequence, yielding a bogus collation
991     // element
992     IcuTestErrorCode errorCode(*this, "Test4179216");
993     RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
994     UnicodeString testText = "church church catcatcher runcrunchynchy";
995     CollationElementIterator *iter = coll.createCollationElementIterator(testText);
996 
997     // test that the "ch" combination works properly
998     iter->setOffset(4, errorCode);
999     int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1000 
1001     iter->reset();
1002     int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1003 
1004     iter->setOffset(5, errorCode);
1005     int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1006 
1007     // Compares and prints only 16-bit primary weights.
1008     if (elt4 != elt0 || elt5 != elt0) {
1009         errln("The collation elements at positions 0 (0x%04x), "
1010                 "4 (0x%04x), and 5 (0x%04x) don't match.",
1011                 elt0, elt4, elt5);
1012     }
1013 
1014     // test that the "cat" combination works properly
1015     iter->setOffset(14, errorCode);
1016     int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1017 
1018     iter->setOffset(15, errorCode);
1019     int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1020 
1021     iter->setOffset(16, errorCode);
1022     int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1023 
1024     iter->setOffset(17, errorCode);
1025     int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1026 
1027     iter->setOffset(18, errorCode);
1028     int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1029 
1030     iter->setOffset(19, errorCode);
1031     int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1032 
1033     // Compares and prints only 16-bit primary weights.
1034     if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
1035             || elt14 != elt18 || elt14 != elt19) {
1036         errln("\"cat\" elements don't match: elt14 = 0x%04x, "
1037                 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
1038                 "elt18 = 0x%04x, elt19 = 0x%04x",
1039                 elt14, elt15, elt16, elt17, elt18, elt19);
1040     }
1041 
1042     // now generate a complete list of the collation elements,
1043     // first using next() and then using setOffset(), and
1044     // make sure both interfaces return the same set of elements
1045     iter->reset();
1046 
1047     int32_t elt = iter->next(errorCode);
1048     int32_t count = 0;
1049     while (elt != CollationElementIterator::NULLORDER) {
1050         ++count;
1051         elt = iter->next(errorCode);
1052     }
1053 
1054     LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
1055     LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
1056     int32_t lastPos = 0;
1057 
1058     iter->reset();
1059     elt = iter->next(errorCode);
1060     count = 0;
1061     while (elt != CollationElementIterator::NULLORDER) {
1062         nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1063         lastPos = iter->getOffset();
1064         elt = iter->next(errorCode);
1065     }
1066     int32_t nextElementsLength = count;
1067     count = 0;
1068     for (int32_t i = 0; i < testText.length(); ) {
1069         iter->setOffset(i, errorCode);
1070         lastPos = iter->getOffset();
1071         elt = iter->next(errorCode);
1072         setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1073         i = iter->getOffset();
1074     }
1075     for (int32_t i = 0; i < nextElementsLength; i++) {
1076         if (nextElements[i] == setOffsetElements[i]) {
1077             logln(nextElements[i]);
1078         } else {
1079             errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
1080                 ", but setOffset() yielded " + setOffsetElements[i]);
1081         }
1082     }
1083     delete iter;
1084 }
1085 
1086 // Ticket 7189
1087 //
1088 // nextSortKeyPart incorrect for EO_S1 collation
calcKeyIncremental(UCollator * coll,const char16_t * text,int32_t len,uint8_t * keyBuf,int32_t,UErrorCode & status)1089 static int32_t calcKeyIncremental(UCollator *coll, const char16_t* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1090     UCharIterator uiter;
1091     uint32_t state[2] = { 0, 0 };
1092     int32_t keyLen;
1093     int32_t count = 8;
1094 
1095     uiter_setString(&uiter, text, len);
1096     keyLen = 0;
1097     while (true) {
1098         int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1099         if (U_FAILURE(status)) {
1100             return -1;
1101         }
1102         if (keyPartLen == 0) {
1103             break;
1104         }
1105         keyLen += keyPartLen;
1106     }
1107     return keyLen;
1108 }
1109 
TestT7189()1110 void CollationRegressionTest::TestT7189() {
1111     UErrorCode status = U_ZERO_ERROR;
1112     UCollator *coll;
1113     uint32_t i;
1114 
1115     static const char16_t text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1116     // "Achter De Hoven"
1117         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1118         // "ABC"
1119         { 0x41, 0x42, 0x43, 0x00 },
1120         // "HELLO world!"
1121         { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1122     };
1123 
1124     static const char16_t text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1125     // "Achter de Hoven"
1126         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1127         // "abc"
1128         { 0x61, 0x62, 0x63, 0x00 },
1129         // "hello world!"
1130         { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1131     };
1132 
1133     // Open the collator
1134     coll = ucol_openFromShortString("EO_S1", false, nullptr, &status);
1135     if (U_FAILURE(status)) {
1136         errln("Failed to create a collator for short string EO_S1");
1137         return;
1138     }
1139 
1140     for (i = 0; i < UPRV_LENGTHOF(text1); i++) {
1141         uint8_t key1[100], key2[100];
1142         int32_t len1, len2;
1143 
1144         len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1145         if (U_FAILURE(status)) {
1146             errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1147             break;
1148         }
1149         len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1150         if (U_FAILURE(status)) {
1151             errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1152             break;
1153         }
1154 
1155         if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1156             errln(UnicodeString("Failed: Identical key\n") + "    text1: " + text1[i] + "\n" + "    text2: " + text2[i] + "\n" + "    key  : " + TestUtility::hex(key1, len1));
1157         } else {
1158             logln(UnicodeString("Keys produced -\n") + "    text1: " + text1[i] + "\n" + "    key1 : " + TestUtility::hex(key1, len1) + "\n" + "    text2: " + text2[i] + "\n" + "    key2 : "
1159                     + TestUtility::hex(key2, len2));
1160         }
1161     }
1162     ucol_close(coll);
1163 }
1164 
TestCaseFirstCompression()1165 void CollationRegressionTest::TestCaseFirstCompression() {
1166     RuleBasedCollator *col = en_us->clone();
1167     UErrorCode status = U_ZERO_ERROR;
1168 
1169     // default
1170     caseFirstCompressionSub(col, "default");
1171 
1172     // Upper first
1173     col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
1174     if (U_FAILURE(status)) {
1175         errln("Failed to set UCOL_UPPER_FIRST");
1176         return;
1177     }
1178     caseFirstCompressionSub(col, "upper first");
1179 
1180     // Lower first
1181     col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
1182     if (U_FAILURE(status)) {
1183         errln("Failed to set UCOL_LOWER_FIRST");
1184         return;
1185     }
1186     caseFirstCompressionSub(col, "lower first");
1187 
1188     delete col;
1189 }
1190 
caseFirstCompressionSub(Collator * col,UnicodeString opt)1191 void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
1192     const int32_t maxLength = 50;
1193 
1194     char16_t str1[maxLength];
1195     char16_t str2[maxLength];
1196 
1197     CollationKey key1, key2;
1198 
1199     for (int32_t len = 1; len <= maxLength; len++) {
1200         int32_t i = 0;
1201         for (; i < len - 1; i++) {
1202             str1[i] = str2[i] = (char16_t)0x61; // 'a'
1203         }
1204         str1[i] = (char16_t)0x41; // 'A'
1205         str2[i] = (char16_t)0x61; // 'a'
1206 
1207         UErrorCode status = U_ZERO_ERROR;
1208         col->getCollationKey(str1, len, key1, status);
1209         col->getCollationKey(str2, len, key2, status);
1210 
1211         UCollationResult cmpKey = key1.compareTo(key2, status);
1212         UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
1213 
1214         if (U_FAILURE(status)) {
1215             errln("Error in caseFirstCompressionSub");
1216         } else if (cmpKey != cmpCol) {
1217             errln((UnicodeString)"Inconsistent comparison(" + opt
1218                 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
1219                 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
1220         }
1221     }
1222 }
1223 
TestTrailingComment()1224 void CollationRegressionTest::TestTrailingComment() {
1225     // ICU ticket #8070:
1226     // Check that the rule parser handles a comment without terminating end-of-line.
1227     IcuTestErrorCode errorCode(*this, "TestTrailingComment");
1228     RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
1229     UnicodeString a((char16_t)0x61), b((char16_t)0x62), c((char16_t)0x63);
1230     assertTrue("c<b", coll.compare(c, b) < 0);
1231     assertTrue("b<a", coll.compare(b, a) < 0);
1232 }
1233 
TestBeforeWithTooStrongAfter()1234 void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
1235     // ICU ticket #9959:
1236     // Forbid rules with a before-reset followed by a stronger relation.
1237     IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
1238     RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
1239     if(errorCode.isSuccess()) {
1240         errln("should forbid before-2-reset followed by primary relation");
1241     } else {
1242         errorCode.reset();
1243     }
1244     RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
1245     if(errorCode.isSuccess()) {
1246         errln("should forbid before-3-reset followed by primary or secondary relation");
1247     } else {
1248         errorCode.reset();
1249     }
1250 }
1251 
TestICU22555InfinityLoop()1252 void CollationRegressionTest::TestICU22555InfinityLoop() {
1253     char16_t data[] = {
1254         0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99,
1255         0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a,
1256         0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026,
1257         0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055,
1258         0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053,
1259         0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f,
1260         0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750,
1261         0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303,
1262         0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
1263         0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
1264     };
1265     icu::UnicodeString rule(false, data, sizeof(data)/sizeof(char16_t));
1266     UErrorCode status = U_ZERO_ERROR;
1267     icu::LocalPointer<icu::RuleBasedCollator> col1(
1268         new icu::RuleBasedCollator(rule, status));
1269 }
1270 
TestICU22517()1271 void CollationRegressionTest::TestICU22517() {
1272     IcuTestErrorCode errorCode(*this, "TestICU22517");
1273     char16_t data[] = u"&a=b쫊쫊쫊쫊쫊쫊쫊쫊";
1274     icu::UnicodeString rule(true, data, -1);
1275     int length = quick ? rule.length()-2 : rule.length();
1276     for (int i = 4; i <= length; i++) {
1277       UErrorCode status = U_ZERO_ERROR;
1278       icu::LocalPointer<icu::RuleBasedCollator> col1(
1279           new icu::RuleBasedCollator(rule.tempSubString(0, i), status));
1280     }
1281 }
1282 
TestICU22277()1283 void CollationRegressionTest::TestICU22277() {
1284     IcuTestErrorCode errorCode(*this, "TestICU22277");
1285     UErrorCode status = U_ZERO_ERROR;
1286 
1287     Collator* c = Collator::createInstance("JA-u-Co-priVatE-KANa", status);
1288     if(c != nullptr || U_SUCCESS(status)) {
1289       errcheckln(status, "Collator should have failed with MemorySanitizer: use-of-uninitialized-value error - %s",
1290                  u_errorName(status));
1291       delete c;
1292       return;
1293     }
1294     c = Collator::createInstance("hE-U-cO-pRIVate-UNihan", status);
1295     if(c != nullptr || U_SUCCESS(status)) {
1296       errcheckln(status, "Collator should have failed with MemorySanitizer: use-of-uninitialized-value error - %s",
1297                  u_errorName(status));
1298       delete c;
1299       return;
1300     }
1301 }
1302 
compareArray(Collator & c,const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN],int32_t testCount)1303 void CollationRegressionTest::compareArray(Collator &c,
1304                                            const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1305                                            int32_t testCount)
1306 {
1307     int32_t i;
1308     Collator::EComparisonResult expectedResult = Collator::EQUAL;
1309 
1310     for (i = 0; i < testCount; i += 3)
1311     {
1312         UnicodeString source(tests[i]);
1313         UnicodeString comparison(tests[i + 1]);
1314         UnicodeString target(tests[i + 2]);
1315 
1316         if (comparison == "<")
1317         {
1318             expectedResult = Collator::LESS;
1319         }
1320         else if (comparison == ">")
1321         {
1322             expectedResult = Collator::GREATER;
1323         }
1324         else if (comparison == "=")
1325         {
1326             expectedResult = Collator::EQUAL;
1327         }
1328         else
1329         {
1330             UnicodeString bogus1("Bogus comparison string \"");
1331             UnicodeString bogus2("\"");
1332             errln(bogus1 + comparison + bogus2);
1333         }
1334 
1335         Collator::EComparisonResult compareResult = c.compare(source, target);
1336 
1337         CollationKey sourceKey, targetKey;
1338         UErrorCode status = U_ZERO_ERROR;
1339 
1340         c.getCollationKey(source, sourceKey, status);
1341 
1342         if (U_FAILURE(status))
1343         {
1344             errln("Couldn't get collationKey for source");
1345             continue;
1346         }
1347 
1348         c.getCollationKey(target, targetKey, status);
1349 
1350         if (U_FAILURE(status))
1351         {
1352             errln("Couldn't get collationKey for target");
1353             continue;
1354         }
1355 
1356         Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1357 
1358         reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1359 
1360     }
1361 }
1362 
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)1363 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1364 {
1365     int32_t c1, c2, count = 0;
1366     UErrorCode status = U_ZERO_ERROR;
1367 
1368     do
1369     {
1370         c1 = i1.next(status);
1371         c2 = i2.next(status);
1372 
1373         if (c1 != c2)
1374         {
1375             UnicodeString msg, msg1("    ");
1376 
1377             msg += msg1 + count;
1378             msg += ": strength(0x";
1379             appendHex(c1, 8, msg);
1380             msg += ") != strength(0x";
1381             appendHex(c2, 8, msg);
1382             msg += ")";
1383 
1384             errln(msg);
1385             break;
1386         }
1387 
1388         count += 1;
1389     }
1390     while (c1 != CollationElementIterator::NULLORDER);
1391 }
1392 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1393 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1394 {
1395     if (exec)
1396     {
1397         logln("Collation Regression Tests: ");
1398     }
1399 
1400     if(en_us == nullptr) {
1401         dataerrln("Class collator not instantiated");
1402         name = "";
1403         return;
1404     }
1405     TESTCASE_AUTO_BEGIN;
1406     TESTCASE_AUTO(Test4048446);
1407     TESTCASE_AUTO(Test4051866);
1408     TESTCASE_AUTO(Test4053636);
1409     TESTCASE_AUTO(Test4054238);
1410     TESTCASE_AUTO(Test4054734);
1411     TESTCASE_AUTO(Test4054736);
1412     TESTCASE_AUTO(Test4058613);
1413     TESTCASE_AUTO(Test4059820);
1414     TESTCASE_AUTO(Test4060154);
1415     TESTCASE_AUTO(Test4062418);
1416     TESTCASE_AUTO(Test4065540);
1417     TESTCASE_AUTO(Test4066189);
1418     TESTCASE_AUTO(Test4066696);
1419     TESTCASE_AUTO(Test4076676);
1420     TESTCASE_AUTO(Test4078588);
1421     TESTCASE_AUTO(Test4079231);
1422     TESTCASE_AUTO(Test4081866);
1423     TESTCASE_AUTO(Test4087241);
1424     TESTCASE_AUTO(Test4087243);
1425     TESTCASE_AUTO(Test4092260);
1426     TESTCASE_AUTO(Test4095316);
1427     TESTCASE_AUTO(Test4101940);
1428     TESTCASE_AUTO(Test4103436);
1429     TESTCASE_AUTO(Test4114076);
1430     TESTCASE_AUTO(Test4114077);
1431     TESTCASE_AUTO(Test4124632);
1432     TESTCASE_AUTO(Test4132736);
1433     TESTCASE_AUTO(Test4133509);
1434     TESTCASE_AUTO(Test4139572);
1435     TESTCASE_AUTO(Test4141640);
1436     TESTCASE_AUTO(Test4179216);
1437     TESTCASE_AUTO(TestT7189);
1438     TESTCASE_AUTO(TestCaseFirstCompression);
1439     TESTCASE_AUTO(TestTrailingComment);
1440     TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
1441     TESTCASE_AUTO(TestICU22277);
1442     TESTCASE_AUTO(TestICU22517);
1443     TESTCASE_AUTO(TestICU22555InfinityLoop);
1444     TESTCASE_AUTO_END;
1445 }
1446 
1447 #endif /* #if !UCONFIG_NO_COLLATION */
1448