1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9 #include "unicode/utypes.h"
10
11 #if !UCONFIG_NO_COLLATION
12
13 #include "unicode/coll.h"
14 #include "unicode/localpointer.h"
15 #include "unicode/tblcoll.h"
16 #include "unicode/unistr.h"
17 #include "unicode/sortkey.h"
18 #include "regcoll.h"
19 #include "sfwdchit.h"
20 #include "testutil.h"
21 #include "cmemory.h"
22
CollationRegressionTest()23 CollationRegressionTest::CollationRegressionTest()
24 {
25 UErrorCode status = U_ZERO_ERROR;
26
27 en_us = dynamic_cast<RuleBasedCollator*>(Collator::createInstance(Locale::getUS(), status));
28 if(U_FAILURE(status)) {
29 delete en_us;
30 en_us = nullptr;
31 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
32 return;
33 }
34 }
35
~CollationRegressionTest()36 CollationRegressionTest::~CollationRegressionTest()
37 {
38 delete en_us;
39 }
40
41
42 // @bug 4048446
43 //
44 // CollationElementIterator.reset() doesn't work
45 //
Test4048446()46 void CollationRegressionTest::Test4048446(/* char* par */)
47 {
48 const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
49 const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
50 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
51 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
52 UErrorCode status = U_ZERO_ERROR;
53
54 if (i1 == nullptr|| i2 == nullptr)
55 {
56 errln("Could not create CollationElementIterator's");
57 delete i1;
58 delete i2;
59 return;
60 }
61
62 while (i1->next(status) != CollationElementIterator::NULLORDER)
63 {
64 if (U_FAILURE(status))
65 {
66 errln("error calling next()");
67
68 delete i1;
69 delete i2;
70 return;
71 }
72 }
73
74 i1->reset();
75
76 assertEqual(*i1, *i2);
77
78 delete i1;
79 delete i2;
80 }
81
82 // @bug 4051866
83 //
84 // Collator -> rules -> Collator round-trip broken for expanding characters
85 //
Test4051866()86 void CollationRegressionTest::Test4051866(/* char* par */)
87 {
88 UnicodeString rules;
89 UErrorCode status = U_ZERO_ERROR;
90
91 rules += "&n < o ";
92 rules += "& oe ,o";
93 rules += (char16_t)0x3080;
94 rules += "& oe ,";
95 rules += (char16_t)0x1530;
96 rules += " ,O";
97 rules += "& OE ,O";
98 rules += (char16_t)0x3080;
99 rules += "& OE ,";
100 rules += (char16_t)0x1520;
101 rules += "< p ,P";
102
103 // Build a collator containing expanding characters
104 LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), status);
105 if (U_FAILURE(status)) {
106 errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
107 return;
108 }
109
110 // Build another using the rules from the first
111 LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status), status);
112 if (U_FAILURE(status)) {
113 errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_errorName(status));
114 return;
115 }
116
117 // Make sure they're the same
118 if (!(c1->getRules() == c2->getRules()))
119 {
120 errln("Rules are not equal");
121 }
122 }
123
124 // @bug 4053636
125 //
126 // Collator thinks "black-bird" == "black"
127 //
Test4053636()128 void CollationRegressionTest::Test4053636(/* char* par */)
129 {
130 if (en_us->equals("black_bird", "black"))
131 {
132 errln("black-bird == black");
133 }
134 }
135
136 // @bug 4054238
137 //
138 // CollationElementIterator will not work correctly if the associated
139 // Collator object's mode is changed
140 //
Test4054238()141 void CollationRegressionTest::Test4054238(/* char* par */)
142 {
143 const char16_t chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
144 const UnicodeString test3(chars3);
145 RuleBasedCollator *c = en_us->clone();
146
147 // NOTE: The Java code uses en_us to create the CollationElementIterators
148 // but I'm pretty sure that's wrong, so I've changed this to use c.
149 UErrorCode status = U_ZERO_ERROR;
150 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
151 CollationElementIterator *i1 = c->createCollationElementIterator(test3);
152 delete i1;
153 delete c;
154 }
155
156 // @bug 4054734
157 //
158 // Collator::IDENTICAL documented but not implemented
159 //
Test4054734()160 void CollationRegressionTest::Test4054734(/* char* par */)
161 {
162 /*
163 Here's the original Java:
164
165 String[] decomp = {
166 "\u0001", "<", "\u0002",
167 "\u0001", "=", "\u0001",
168 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
169 "\u00C0", "=", "A\u0300" // Decomp should make these equal
170 };
171
172 String[] nodecomp = {
173 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
174 };
175 */
176
177 static const char16_t decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
178 {
179 {0x0001, 0}, {0x3c, 0}, {0x0002, 0},
180 {0x0001, 0}, {0x3d, 0}, {0x0001, 0},
181 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
182 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
183 };
184
185
186 UErrorCode status = U_ZERO_ERROR;
187 RuleBasedCollator *c = en_us->clone();
188
189 c->setStrength(Collator::IDENTICAL);
190
191 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
192 compareArray(*c, decomp, UPRV_LENGTHOF(decomp));
193
194 delete c;
195 }
196
197 // @bug 4054736
198 //
199 // Full Decomposition mode not implemented
200 //
Test4054736()201 void CollationRegressionTest::Test4054736(/* char* par */)
202 {
203 UErrorCode status = U_ZERO_ERROR;
204 RuleBasedCollator *c = en_us->clone();
205
206 c->setStrength(Collator::SECONDARY);
207 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
208
209 static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
210 {
211 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed
212 };
213
214 compareArray(*c, tests, UPRV_LENGTHOF(tests));
215
216 delete c;
217 }
218
219 // @bug 4058613
220 //
221 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
222 //
Test4058613()223 void CollationRegressionTest::Test4058613(/* char* par */)
224 {
225 // Creating a default collator doesn't work when Korean is the default
226 // locale
227
228 Locale oldDefault = Locale::getDefault();
229 UErrorCode status = U_ZERO_ERROR;
230
231 Locale::setDefault(Locale::getKorean(), status);
232
233 if (U_FAILURE(status))
234 {
235 errln("Could not set default locale to Locale::KOREAN");
236 return;
237 }
238
239 Collator *c = nullptr;
240
241 c = Collator::createInstance("en_US", status);
242
243 if (c == nullptr || U_FAILURE(status))
244 {
245 errln("Could not create a Korean collator");
246 Locale::setDefault(oldDefault, status);
247 delete c;
248 return;
249 }
250
251 // Since the fix to this bug was to turn off decomposition for Korean collators,
252 // ensure that's what we got
253 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
254 {
255 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
256 }
257
258 delete c;
259
260 Locale::setDefault(oldDefault, status);
261 }
262
263 // @bug 4059820
264 //
265 // RuleBasedCollator.getRules does not return the exact pattern as input
266 // for expanding character sequences
267 //
Test4059820()268 void CollationRegressionTest::Test4059820(/* char* par */)
269 {
270 UErrorCode status = U_ZERO_ERROR;
271
272 RuleBasedCollator *c = nullptr;
273 UnicodeString rules = "&9 < a < b , c/a < d < z";
274
275 c = new RuleBasedCollator(rules, status);
276
277 if (c == nullptr || U_FAILURE(status))
278 {
279 errln("Failure building a collator.");
280 delete c;
281 return;
282 }
283
284 if ( c->getRules().indexOf("c/a") == -1)
285 {
286 errln("returned rules do not contain 'c/a'");
287 }
288
289 delete c;
290 }
291
292 // @bug 4060154
293 //
294 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
295 //
Test4060154()296 void CollationRegressionTest::Test4060154(/* char* par */)
297 {
298 UErrorCode status = U_ZERO_ERROR;
299 UnicodeString rules;
300
301 rules += "&f < g, G < h, H < i, I < j, J";
302 rules += " & H < ";
303 rules += (char16_t)0x0131;
304 rules += ", ";
305 rules += (char16_t)0x0130;
306 rules += ", i, I";
307
308 RuleBasedCollator *c = nullptr;
309
310 c = new RuleBasedCollator(rules, status);
311
312 if (c == nullptr || U_FAILURE(status))
313 {
314 errln("failure building collator.");
315 delete c;
316 return;
317 }
318
319 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
320
321 /*
322 String[] tertiary = {
323 "A", "<", "B",
324 "H", "<", "\u0131",
325 "H", "<", "I",
326 "\u0131", "<", "\u0130",
327 "\u0130", "<", "i",
328 "\u0130", ">", "H",
329 };
330 */
331
332 static const char16_t tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
333 {
334 {0x41, 0}, {0x3c, 0}, {0x42, 0},
335 {0x48, 0}, {0x3c, 0}, {0x0131, 0},
336 {0x48, 0}, {0x3c, 0}, {0x49, 0},
337 {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
338 {0x0130, 0}, {0x3c, 0}, {0x69, 0},
339 {0x0130, 0}, {0x3e, 0}, {0x48, 0}
340 };
341
342 c->setStrength(Collator::TERTIARY);
343 compareArray(*c, tertiary, UPRV_LENGTHOF(tertiary));
344
345 /*
346 String[] secondary = {
347 "H", "<", "I",
348 "\u0131", "=", "\u0130",
349 };
350 */
351 static const char16_t secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
352 {
353 {0x48, 0}, {0x3c, 0}, {0x49, 0},
354 {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
355 };
356
357 c->setStrength(Collator::PRIMARY);
358 compareArray(*c, secondary, UPRV_LENGTHOF(secondary));
359
360 delete c;
361 }
362
363 // @bug 4062418
364 //
365 // Secondary/Tertiary comparison incorrect in French Secondary
366 //
Test4062418()367 void CollationRegressionTest::Test4062418(/* char* par */)
368 {
369 UErrorCode status = U_ZERO_ERROR;
370
371 RuleBasedCollator *c = nullptr;
372
373 c = dynamic_cast<RuleBasedCollator*>(Collator::createInstance(Locale::getCanadaFrench(), status));
374
375 if (c == nullptr || U_FAILURE(status))
376 {
377 errln("Failed to create collator for Locale::getCanadaFrench()");
378 delete c;
379 return;
380 }
381
382 c->setStrength(Collator::SECONDARY);
383
384 /*
385 String[] tests = {
386 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
387 };
388 */
389 static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
390 {
391 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
392 };
393
394 compareArray(*c, tests, UPRV_LENGTHOF(tests));
395
396 delete c;
397 }
398
399 // @bug 4065540
400 //
401 // Collator::compare() method broken if either string contains spaces
402 //
Test4065540()403 void CollationRegressionTest::Test4065540(/* char* par */)
404 {
405 if (en_us->compare("abcd e", "abcd f") == 0)
406 {
407 errln("'abcd e' == 'abcd f'");
408 }
409 }
410
411 // @bug 4066189
412 //
413 // Unicode characters need to be recursively decomposed to get the
414 // correct result. For example,
415 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
416 //
Test4066189()417 void CollationRegressionTest::Test4066189(/* char* par */)
418 {
419 static const char16_t chars1[] = {0x1EB1, 0};
420 static const char16_t chars2[] = {0x61, 0x0306, 0x0300, 0};
421 const UnicodeString test1(chars1);
422 const UnicodeString test2(chars2);
423 UErrorCode status = U_ZERO_ERROR;
424
425 // NOTE: The java code used en_us to create the
426 // CollationElementIterator's. I'm pretty sure that
427 // was wrong, so I've change the code to use c1 and c2
428 RuleBasedCollator *c1 = en_us->clone();
429 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
430 CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
431
432 RuleBasedCollator *c2 = en_us->clone();
433 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
434 CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
435
436 assertEqual(*i1, *i2);
437
438 delete i2;
439 delete c2;
440 delete i1;
441 delete c1;
442 }
443
444 // @bug 4066696
445 //
446 // French secondary collation checking at the end of compare iteration fails
447 //
Test4066696()448 void CollationRegressionTest::Test4066696(/* char* par */)
449 {
450 UErrorCode status = U_ZERO_ERROR;
451 RuleBasedCollator *c = nullptr;
452
453 c = dynamic_cast<RuleBasedCollator*>(Collator::createInstance(Locale::getCanadaFrench(), status));
454
455 if (c == nullptr || U_FAILURE(status))
456 {
457 errln("Failure creating collator for Locale::getCanadaFrench()");
458 delete c;
459 return;
460 }
461
462 c->setStrength(Collator::SECONDARY);
463
464 /*
465 String[] tests = {
466 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
467 };
468
469 should be:
470
471 String[] tests = {
472 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
473 };
474
475 */
476
477 static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
478 {
479 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
480 };
481
482 compareArray(*c, tests, UPRV_LENGTHOF(tests));
483
484 delete c;
485 }
486
487 // @bug 4076676
488 //
489 // Bad canonicalization of same-class combining characters
490 //
Test4076676()491 void CollationRegressionTest::Test4076676(/* char* par */)
492 {
493 // These combining characters are all in the same class, so they should not
494 // be reordered, and they should compare as unequal.
495 static const char16_t s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
496 static const char16_t s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
497
498 RuleBasedCollator *c = en_us->clone();
499 c->setStrength(Collator::TERTIARY);
500
501 if (c->compare(s1,s2) == 0)
502 {
503 errln("Same-class combining chars were reordered");
504 }
505
506 delete c;
507 }
508
509 // @bug 4079231
510 //
511 // RuleBasedCollator::operator==(nullptr) throws NullPointerException
512 //
Test4079231()513 void CollationRegressionTest::Test4079231(/* char* par */)
514 {
515 // I don't think there's any way to write this test
516 // in C++. The following is equivalent to the Java,
517 // but doesn't compile 'cause nullptr can't be converted
518 // to Collator&
519 //
520 // if (en_us->operator==(nullptr))
521 // {
522 // errln("en_us->operator==(nullptr) returned true");
523 // }
524
525 /*
526 try {
527 if (en_us->equals(null)) {
528 errln("en_us->equals(null) returned true");
529 }
530 }
531 catch (Exception e) {
532 errln("en_us->equals(null) threw " + e.toString());
533 }
534 */
535 }
536
537 // @bug 4078588
538 //
539 // RuleBasedCollator breaks on "< a < bb" rule
540 //
Test4078588()541 void CollationRegressionTest::Test4078588(/* char *par */)
542 {
543 UErrorCode status = U_ZERO_ERROR;
544 RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
545
546 if (rbc == nullptr || U_FAILURE(status))
547 {
548 errln("Failed to create RuleBasedCollator.");
549 delete rbc;
550 return;
551 }
552
553 Collator::EComparisonResult result = rbc->compare("a","bb");
554
555 if (result != Collator::LESS)
556 {
557 errln((UnicodeString)"Compare(a,bb) returned " + (int)result
558 + (UnicodeString)"; expected -1");
559 }
560
561 delete rbc;
562 }
563
564 // @bug 4081866
565 //
566 // Combining characters in different classes not reordered properly.
567 //
Test4081866()568 void CollationRegressionTest::Test4081866(/* char* par */)
569 {
570 // These combining characters are all in different classes,
571 // so they should be reordered and the strings should compare as equal.
572 static const char16_t s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
573 static const char16_t s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
574
575 UErrorCode status = U_ZERO_ERROR;
576 RuleBasedCollator *c = en_us->clone();
577 c->setStrength(Collator::TERTIARY);
578
579 // Now that the default collators are set to NO_DECOMPOSITION
580 // (as a result of fixing bug 4114077), we must set it explicitly
581 // when we're testing reordering behavior. -- lwerner, 5/5/98
582 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
583
584 if (c->compare(s1,s2) != 0)
585 {
586 errln("Combining chars were not reordered");
587 }
588
589 delete c;
590 }
591
592 // @bug 4087241
593 //
594 // string comparison errors in Scandinavian collators
595 //
Test4087241()596 void CollationRegressionTest::Test4087241(/* char* par */)
597 {
598 UErrorCode status = U_ZERO_ERROR;
599 Locale da_DK("da", "DK");
600 RuleBasedCollator *c = nullptr;
601
602 c = dynamic_cast<RuleBasedCollator*>(Collator::createInstance(da_DK, status));
603
604 if (c == nullptr || U_FAILURE(status))
605 {
606 errln("Failed to create collator for da_DK locale");
607 delete c;
608 return;
609 }
610
611 c->setStrength(Collator::SECONDARY);
612
613 static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
614 {
615 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae
616 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring
617 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut
618 };
619
620 compareArray(*c, tests, UPRV_LENGTHOF(tests));
621
622 delete c;
623 }
624
625 // @bug 4087243
626 //
627 // CollationKey takes ignorable strings into account when it shouldn't
628 //
Test4087243()629 void CollationRegressionTest::Test4087243(/* char* par */)
630 {
631 RuleBasedCollator *c = en_us->clone();
632 c->setStrength(Collator::TERTIARY);
633
634 static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
635 {
636 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A
637 };
638
639 compareArray(*c, tests, UPRV_LENGTHOF(tests));
640
641 delete c;
642 }
643
644 // @bug 4092260
645 //
646 // Mu/micro conflict
647 // Micro symbol and greek lowercase letter Mu should sort identically
648 //
Test4092260()649 void CollationRegressionTest::Test4092260(/* char* par */)
650 {
651 UErrorCode status = U_ZERO_ERROR;
652 Locale el("el", "");
653 Collator *c = nullptr;
654
655 c = Collator::createInstance(el, status);
656
657 if (c == nullptr || U_FAILURE(status))
658 {
659 errln("Failed to create collator for el locale.");
660 delete c;
661 return;
662 }
663
664 // These now have tertiary differences in UCA
665 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
666
667 static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
668 {
669 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
670 };
671
672 compareArray(*c, tests, UPRV_LENGTHOF(tests));
673
674 delete c;
675 }
676
677 // @bug 4095316
678 //
Test4095316()679 void CollationRegressionTest::Test4095316(/* char* par */)
680 {
681 UErrorCode status = U_ZERO_ERROR;
682 Locale el_GR("el", "GR");
683 Collator *c = Collator::createInstance(el_GR, status);
684
685 if (c == nullptr || U_FAILURE(status))
686 {
687 errln("Failed to create collator for el_GR locale");
688 delete c;
689 return;
690 }
691 // These now have tertiary differences in UCA
692 //c->setStrength(Collator::TERTIARY);
693 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
694
695 static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
696 {
697 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
698 };
699
700 compareArray(*c, tests, UPRV_LENGTHOF(tests));
701
702 delete c;
703 }
704
705 // @bug 4101940
706 //
Test4101940()707 void CollationRegressionTest::Test4101940(/* char* par */)
708 {
709 UErrorCode status = U_ZERO_ERROR;
710 RuleBasedCollator *c = nullptr;
711 UnicodeString rules = "&9 < a < b";
712 UnicodeString nothing = "";
713
714 c = new RuleBasedCollator(rules, status);
715
716 if (c == nullptr || U_FAILURE(status))
717 {
718 errln("Failed to create RuleBasedCollator");
719 delete c;
720 return;
721 }
722
723 CollationElementIterator *i = c->createCollationElementIterator(nothing);
724 i->reset();
725
726 if (i->next(status) != CollationElementIterator::NULLORDER)
727 {
728 errln("next did not return NULLORDER");
729 }
730
731 delete i;
732 delete c;
733 }
734
735 // @bug 4103436
736 //
737 // Collator::compare not handling spaces properly
738 //
Test4103436()739 void CollationRegressionTest::Test4103436(/* char* par */)
740 {
741 RuleBasedCollator *c = en_us->clone();
742 c->setStrength(Collator::TERTIARY);
743
744 static const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
745 {
746 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
747 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
748 };
749
750 compareArray(*c, tests, UPRV_LENGTHOF(tests));
751
752 delete c;
753 }
754
755 // @bug 4114076
756 //
757 // Collation not Unicode conformant with Hangul syllables
758 //
Test4114076()759 void CollationRegressionTest::Test4114076(/* char* par */)
760 {
761 UErrorCode status = U_ZERO_ERROR;
762 RuleBasedCollator *c = en_us->clone();
763 c->setStrength(Collator::TERTIARY);
764
765 //
766 // With Canonical decomposition, Hangul syllables should get decomposed
767 // into Jamo, but Jamo characters should not be decomposed into
768 // conjoining Jamo
769 //
770 static const char16_t test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
771 {
772 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
773 };
774
775 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
776 compareArray(*c, test1, UPRV_LENGTHOF(test1));
777
778 // From UTR #15:
779 // *In earlier versions of Unicode, jamo characters like ksf
780 // had compatibility mappings to kf + sf. These mappings were
781 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
782 // That is, the following test is obsolete as of 2.1.9
783
784 //obsolete- // With Full decomposition, it should go all the way down to
785 //obsolete- // conjoining Jamo characters.
786 //obsolete- //
787 //obsolete- static const char16_t test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
788 //obsolete- {
789 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
790 //obsolete- };
791 //obsolete-
792 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
793 //obsolete- compareArray(*c, test2, UPRV_LENGTHOF(test2));
794
795 delete c;
796 }
797
798
799 // @bug 4124632
800 //
801 // Collator::getCollationKey was hanging on certain character sequences
802 //
Test4124632()803 void CollationRegressionTest::Test4124632(/* char* par */)
804 {
805 UErrorCode status = U_ZERO_ERROR;
806 Collator *coll = nullptr;
807
808 coll = Collator::createInstance(Locale::getJapan(), status);
809
810 if (coll == nullptr || U_FAILURE(status))
811 {
812 errln("Failed to create collator for Locale::JAPAN");
813 delete coll;
814 return;
815 }
816
817 static const char16_t test[] = {0x41, 0x0308, 0x62, 0x63, 0};
818 CollationKey key;
819
820 coll->getCollationKey(test, key, status);
821
822 if (key.isBogus() || U_FAILURE(status))
823 {
824 errln("CollationKey creation failed.");
825 }
826
827 delete coll;
828 }
829
830 // @bug 4132736
831 //
832 // sort order of french words with multiple accents has errors
833 //
Test4132736()834 void CollationRegressionTest::Test4132736(/* char* par */)
835 {
836 UErrorCode status = U_ZERO_ERROR;
837
838 Collator *c = nullptr;
839
840 c = Collator::createInstance(Locale::getCanadaFrench(), status);
841 c->setStrength(Collator::TERTIARY);
842
843 if (c == nullptr || U_FAILURE(status))
844 {
845 errln("Failed to create a collator for Locale::getCanadaFrench()");
846 delete c;
847 return;
848 }
849
850 static const char16_t test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
851 {
852 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
853 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
854 };
855
856 compareArray(*c, test1, UPRV_LENGTHOF(test1));
857
858 delete c;
859 }
860
861 // @bug 4133509
862 //
863 // The sorting using java.text.CollationKey is not in the exact order
864 //
Test4133509()865 void CollationRegressionTest::Test4133509(/* char* par */)
866 {
867 static const char16_t test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
868 {
869 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
870 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
871 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
872 };
873
874 compareArray(*en_us, test1, UPRV_LENGTHOF(test1));
875 }
876
877 // @bug 4114077
878 //
879 // Collation with decomposition off doesn't work for Europe
880 //
Test4114077()881 void CollationRegressionTest::Test4114077(/* char* par */)
882 {
883 // Ensure that we get the same results with decomposition off
884 // as we do with it on....
885
886 UErrorCode status = U_ZERO_ERROR;
887 RuleBasedCollator *c = en_us->clone();
888 c->setStrength(Collator::TERTIARY);
889
890 static const char16_t test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
891 {
892 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
893 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
894 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0},
895 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute
896 // -> a, ring, acute
897 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal
898 };
899
900 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
901 compareArray(*c, test1, UPRV_LENGTHOF(test1));
902
903 static const char16_t test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
904 {
905 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal
906 };
907
908 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
909 compareArray(*c, test2, UPRV_LENGTHOF(test2));
910
911 delete c;
912 }
913
914 // @bug 4141640
915 //
916 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
917 //
Test4141640()918 void CollationRegressionTest::Test4141640(/* char* par */)
919 {
920 //
921 // Rather than just creating a Swedish collator, we might as well
922 // try to instantiate one for every locale available on the system
923 // in order to prevent this sort of bug from cropping up in the future
924 //
925 UErrorCode status = U_ZERO_ERROR;
926 int32_t i, localeCount;
927 const Locale *locales = Locale::getAvailableLocales(localeCount);
928
929 for (i = 0; i < localeCount; i += 1)
930 {
931 Collator *c = nullptr;
932
933 status = U_ZERO_ERROR;
934 c = Collator::createInstance(locales[i], status);
935
936 if (c == nullptr || U_FAILURE(status))
937 {
938 UnicodeString msg, localeName;
939
940 msg += "Could not create collator for locale ";
941 msg += locales[i].getName();
942
943 errln(msg);
944 }
945
946 delete c;
947 }
948 }
949
950 // @bug 4139572
951 //
952 // getCollationKey throws exception for spanish text
953 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
954 //
Test4139572()955 void CollationRegressionTest::Test4139572(/* char* par */)
956 {
957 //
958 // Code pasted straight from the bug report
959 // (and then translated to C++ ;-)
960 //
961 // create spanish locale and collator
962 UErrorCode status = U_ZERO_ERROR;
963 Locale l("es", "es");
964 Collator *col = nullptr;
965
966 col = Collator::createInstance(l, status);
967
968 if (col == nullptr || U_FAILURE(status))
969 {
970 errln("Failed to create a collator for es_es locale.");
971 delete col;
972 return;
973 }
974
975 CollationKey key;
976
977 // this spanish phrase kills it!
978 col->getCollationKey("Nombre De Objeto", key, status);
979
980 if (key.isBogus() || U_FAILURE(status))
981 {
982 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
983 }
984
985 delete col;
986 }
987
Test4179216()988 void CollationRegressionTest::Test4179216() {
989 // you can position a CollationElementIterator in the middle of
990 // a contracting character sequence, yielding a bogus collation
991 // element
992 IcuTestErrorCode errorCode(*this, "Test4179216");
993 RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
994 UnicodeString testText = "church church catcatcher runcrunchynchy";
995 CollationElementIterator *iter = coll.createCollationElementIterator(testText);
996
997 // test that the "ch" combination works properly
998 iter->setOffset(4, errorCode);
999 int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1000
1001 iter->reset();
1002 int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1003
1004 iter->setOffset(5, errorCode);
1005 int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1006
1007 // Compares and prints only 16-bit primary weights.
1008 if (elt4 != elt0 || elt5 != elt0) {
1009 errln("The collation elements at positions 0 (0x%04x), "
1010 "4 (0x%04x), and 5 (0x%04x) don't match.",
1011 elt0, elt4, elt5);
1012 }
1013
1014 // test that the "cat" combination works properly
1015 iter->setOffset(14, errorCode);
1016 int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1017
1018 iter->setOffset(15, errorCode);
1019 int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1020
1021 iter->setOffset(16, errorCode);
1022 int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1023
1024 iter->setOffset(17, errorCode);
1025 int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1026
1027 iter->setOffset(18, errorCode);
1028 int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1029
1030 iter->setOffset(19, errorCode);
1031 int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1032
1033 // Compares and prints only 16-bit primary weights.
1034 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
1035 || elt14 != elt18 || elt14 != elt19) {
1036 errln("\"cat\" elements don't match: elt14 = 0x%04x, "
1037 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
1038 "elt18 = 0x%04x, elt19 = 0x%04x",
1039 elt14, elt15, elt16, elt17, elt18, elt19);
1040 }
1041
1042 // now generate a complete list of the collation elements,
1043 // first using next() and then using setOffset(), and
1044 // make sure both interfaces return the same set of elements
1045 iter->reset();
1046
1047 int32_t elt = iter->next(errorCode);
1048 int32_t count = 0;
1049 while (elt != CollationElementIterator::NULLORDER) {
1050 ++count;
1051 elt = iter->next(errorCode);
1052 }
1053
1054 LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
1055 LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
1056 int32_t lastPos = 0;
1057
1058 iter->reset();
1059 elt = iter->next(errorCode);
1060 count = 0;
1061 while (elt != CollationElementIterator::NULLORDER) {
1062 nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1063 lastPos = iter->getOffset();
1064 elt = iter->next(errorCode);
1065 }
1066 int32_t nextElementsLength = count;
1067 count = 0;
1068 for (int32_t i = 0; i < testText.length(); ) {
1069 iter->setOffset(i, errorCode);
1070 lastPos = iter->getOffset();
1071 elt = iter->next(errorCode);
1072 setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1073 i = iter->getOffset();
1074 }
1075 for (int32_t i = 0; i < nextElementsLength; i++) {
1076 if (nextElements[i] == setOffsetElements[i]) {
1077 logln(nextElements[i]);
1078 } else {
1079 errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
1080 ", but setOffset() yielded " + setOffsetElements[i]);
1081 }
1082 }
1083 delete iter;
1084 }
1085
1086 // Ticket 7189
1087 //
1088 // nextSortKeyPart incorrect for EO_S1 collation
calcKeyIncremental(UCollator * coll,const char16_t * text,int32_t len,uint8_t * keyBuf,int32_t,UErrorCode & status)1089 static int32_t calcKeyIncremental(UCollator *coll, const char16_t* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1090 UCharIterator uiter;
1091 uint32_t state[2] = { 0, 0 };
1092 int32_t keyLen;
1093 int32_t count = 8;
1094
1095 uiter_setString(&uiter, text, len);
1096 keyLen = 0;
1097 while (true) {
1098 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1099 if (U_FAILURE(status)) {
1100 return -1;
1101 }
1102 if (keyPartLen == 0) {
1103 break;
1104 }
1105 keyLen += keyPartLen;
1106 }
1107 return keyLen;
1108 }
1109
TestT7189()1110 void CollationRegressionTest::TestT7189() {
1111 UErrorCode status = U_ZERO_ERROR;
1112 UCollator *coll;
1113 uint32_t i;
1114
1115 static const char16_t text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1116 // "Achter De Hoven"
1117 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1118 // "ABC"
1119 { 0x41, 0x42, 0x43, 0x00 },
1120 // "HELLO world!"
1121 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1122 };
1123
1124 static const char16_t text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1125 // "Achter de Hoven"
1126 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1127 // "abc"
1128 { 0x61, 0x62, 0x63, 0x00 },
1129 // "hello world!"
1130 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1131 };
1132
1133 // Open the collator
1134 coll = ucol_openFromShortString("EO_S1", false, nullptr, &status);
1135 if (U_FAILURE(status)) {
1136 errln("Failed to create a collator for short string EO_S1");
1137 return;
1138 }
1139
1140 for (i = 0; i < UPRV_LENGTHOF(text1); i++) {
1141 uint8_t key1[100], key2[100];
1142 int32_t len1, len2;
1143
1144 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1145 if (U_FAILURE(status)) {
1146 errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1147 break;
1148 }
1149 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1150 if (U_FAILURE(status)) {
1151 errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1152 break;
1153 }
1154
1155 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1156 errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1));
1157 } else {
1158 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : "
1159 + TestUtility::hex(key2, len2));
1160 }
1161 }
1162 ucol_close(coll);
1163 }
1164
TestCaseFirstCompression()1165 void CollationRegressionTest::TestCaseFirstCompression() {
1166 RuleBasedCollator *col = en_us->clone();
1167 UErrorCode status = U_ZERO_ERROR;
1168
1169 // default
1170 caseFirstCompressionSub(col, "default");
1171
1172 // Upper first
1173 col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
1174 if (U_FAILURE(status)) {
1175 errln("Failed to set UCOL_UPPER_FIRST");
1176 return;
1177 }
1178 caseFirstCompressionSub(col, "upper first");
1179
1180 // Lower first
1181 col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
1182 if (U_FAILURE(status)) {
1183 errln("Failed to set UCOL_LOWER_FIRST");
1184 return;
1185 }
1186 caseFirstCompressionSub(col, "lower first");
1187
1188 delete col;
1189 }
1190
caseFirstCompressionSub(Collator * col,UnicodeString opt)1191 void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
1192 const int32_t maxLength = 50;
1193
1194 char16_t str1[maxLength];
1195 char16_t str2[maxLength];
1196
1197 CollationKey key1, key2;
1198
1199 for (int32_t len = 1; len <= maxLength; len++) {
1200 int32_t i = 0;
1201 for (; i < len - 1; i++) {
1202 str1[i] = str2[i] = (char16_t)0x61; // 'a'
1203 }
1204 str1[i] = (char16_t)0x41; // 'A'
1205 str2[i] = (char16_t)0x61; // 'a'
1206
1207 UErrorCode status = U_ZERO_ERROR;
1208 col->getCollationKey(str1, len, key1, status);
1209 col->getCollationKey(str2, len, key2, status);
1210
1211 UCollationResult cmpKey = key1.compareTo(key2, status);
1212 UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
1213
1214 if (U_FAILURE(status)) {
1215 errln("Error in caseFirstCompressionSub");
1216 } else if (cmpKey != cmpCol) {
1217 errln((UnicodeString)"Inconsistent comparison(" + opt
1218 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
1219 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
1220 }
1221 }
1222 }
1223
TestTrailingComment()1224 void CollationRegressionTest::TestTrailingComment() {
1225 // ICU ticket #8070:
1226 // Check that the rule parser handles a comment without terminating end-of-line.
1227 IcuTestErrorCode errorCode(*this, "TestTrailingComment");
1228 RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
1229 UnicodeString a((char16_t)0x61), b((char16_t)0x62), c((char16_t)0x63);
1230 assertTrue("c<b", coll.compare(c, b) < 0);
1231 assertTrue("b<a", coll.compare(b, a) < 0);
1232 }
1233
TestBeforeWithTooStrongAfter()1234 void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
1235 // ICU ticket #9959:
1236 // Forbid rules with a before-reset followed by a stronger relation.
1237 IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
1238 RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
1239 if(errorCode.isSuccess()) {
1240 errln("should forbid before-2-reset followed by primary relation");
1241 } else {
1242 errorCode.reset();
1243 }
1244 RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
1245 if(errorCode.isSuccess()) {
1246 errln("should forbid before-3-reset followed by primary or secondary relation");
1247 } else {
1248 errorCode.reset();
1249 }
1250 }
1251
TestICU22555InfinityLoop()1252 void CollationRegressionTest::TestICU22555InfinityLoop() {
1253 char16_t data[] = {
1254 0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99,
1255 0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a,
1256 0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026,
1257 0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055,
1258 0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053,
1259 0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f,
1260 0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750,
1261 0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303,
1262 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
1263 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
1264 };
1265 icu::UnicodeString rule(false, data, sizeof(data)/sizeof(char16_t));
1266 UErrorCode status = U_ZERO_ERROR;
1267 icu::LocalPointer<icu::RuleBasedCollator> col1(
1268 new icu::RuleBasedCollator(rule, status));
1269 }
1270
TestICU22517()1271 void CollationRegressionTest::TestICU22517() {
1272 IcuTestErrorCode errorCode(*this, "TestICU22517");
1273 char16_t data[] = u"&a=b쫊쫊쫊쫊쫊쫊쫊쫊";
1274 icu::UnicodeString rule(true, data, -1);
1275 int length = quick ? rule.length()-2 : rule.length();
1276 for (int i = 4; i <= length; i++) {
1277 UErrorCode status = U_ZERO_ERROR;
1278 icu::LocalPointer<icu::RuleBasedCollator> col1(
1279 new icu::RuleBasedCollator(rule.tempSubString(0, i), status));
1280 }
1281 }
1282
TestICU22277()1283 void CollationRegressionTest::TestICU22277() {
1284 IcuTestErrorCode errorCode(*this, "TestICU22277");
1285 UErrorCode status = U_ZERO_ERROR;
1286
1287 Collator* c = Collator::createInstance("JA-u-Co-priVatE-KANa", status);
1288 if(c != nullptr || U_SUCCESS(status)) {
1289 errcheckln(status, "Collator should have failed with MemorySanitizer: use-of-uninitialized-value error - %s",
1290 u_errorName(status));
1291 delete c;
1292 return;
1293 }
1294 c = Collator::createInstance("hE-U-cO-pRIVate-UNihan", status);
1295 if(c != nullptr || U_SUCCESS(status)) {
1296 errcheckln(status, "Collator should have failed with MemorySanitizer: use-of-uninitialized-value error - %s",
1297 u_errorName(status));
1298 delete c;
1299 return;
1300 }
1301 }
1302
compareArray(Collator & c,const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN],int32_t testCount)1303 void CollationRegressionTest::compareArray(Collator &c,
1304 const char16_t tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1305 int32_t testCount)
1306 {
1307 int32_t i;
1308 Collator::EComparisonResult expectedResult = Collator::EQUAL;
1309
1310 for (i = 0; i < testCount; i += 3)
1311 {
1312 UnicodeString source(tests[i]);
1313 UnicodeString comparison(tests[i + 1]);
1314 UnicodeString target(tests[i + 2]);
1315
1316 if (comparison == "<")
1317 {
1318 expectedResult = Collator::LESS;
1319 }
1320 else if (comparison == ">")
1321 {
1322 expectedResult = Collator::GREATER;
1323 }
1324 else if (comparison == "=")
1325 {
1326 expectedResult = Collator::EQUAL;
1327 }
1328 else
1329 {
1330 UnicodeString bogus1("Bogus comparison string \"");
1331 UnicodeString bogus2("\"");
1332 errln(bogus1 + comparison + bogus2);
1333 }
1334
1335 Collator::EComparisonResult compareResult = c.compare(source, target);
1336
1337 CollationKey sourceKey, targetKey;
1338 UErrorCode status = U_ZERO_ERROR;
1339
1340 c.getCollationKey(source, sourceKey, status);
1341
1342 if (U_FAILURE(status))
1343 {
1344 errln("Couldn't get collationKey for source");
1345 continue;
1346 }
1347
1348 c.getCollationKey(target, targetKey, status);
1349
1350 if (U_FAILURE(status))
1351 {
1352 errln("Couldn't get collationKey for target");
1353 continue;
1354 }
1355
1356 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1357
1358 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1359
1360 }
1361 }
1362
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)1363 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1364 {
1365 int32_t c1, c2, count = 0;
1366 UErrorCode status = U_ZERO_ERROR;
1367
1368 do
1369 {
1370 c1 = i1.next(status);
1371 c2 = i2.next(status);
1372
1373 if (c1 != c2)
1374 {
1375 UnicodeString msg, msg1(" ");
1376
1377 msg += msg1 + count;
1378 msg += ": strength(0x";
1379 appendHex(c1, 8, msg);
1380 msg += ") != strength(0x";
1381 appendHex(c2, 8, msg);
1382 msg += ")";
1383
1384 errln(msg);
1385 break;
1386 }
1387
1388 count += 1;
1389 }
1390 while (c1 != CollationElementIterator::NULLORDER);
1391 }
1392
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1393 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1394 {
1395 if (exec)
1396 {
1397 logln("Collation Regression Tests: ");
1398 }
1399
1400 if(en_us == nullptr) {
1401 dataerrln("Class collator not instantiated");
1402 name = "";
1403 return;
1404 }
1405 TESTCASE_AUTO_BEGIN;
1406 TESTCASE_AUTO(Test4048446);
1407 TESTCASE_AUTO(Test4051866);
1408 TESTCASE_AUTO(Test4053636);
1409 TESTCASE_AUTO(Test4054238);
1410 TESTCASE_AUTO(Test4054734);
1411 TESTCASE_AUTO(Test4054736);
1412 TESTCASE_AUTO(Test4058613);
1413 TESTCASE_AUTO(Test4059820);
1414 TESTCASE_AUTO(Test4060154);
1415 TESTCASE_AUTO(Test4062418);
1416 TESTCASE_AUTO(Test4065540);
1417 TESTCASE_AUTO(Test4066189);
1418 TESTCASE_AUTO(Test4066696);
1419 TESTCASE_AUTO(Test4076676);
1420 TESTCASE_AUTO(Test4078588);
1421 TESTCASE_AUTO(Test4079231);
1422 TESTCASE_AUTO(Test4081866);
1423 TESTCASE_AUTO(Test4087241);
1424 TESTCASE_AUTO(Test4087243);
1425 TESTCASE_AUTO(Test4092260);
1426 TESTCASE_AUTO(Test4095316);
1427 TESTCASE_AUTO(Test4101940);
1428 TESTCASE_AUTO(Test4103436);
1429 TESTCASE_AUTO(Test4114076);
1430 TESTCASE_AUTO(Test4114077);
1431 TESTCASE_AUTO(Test4124632);
1432 TESTCASE_AUTO(Test4132736);
1433 TESTCASE_AUTO(Test4133509);
1434 TESTCASE_AUTO(Test4139572);
1435 TESTCASE_AUTO(Test4141640);
1436 TESTCASE_AUTO(Test4179216);
1437 TESTCASE_AUTO(TestT7189);
1438 TESTCASE_AUTO(TestCaseFirstCompression);
1439 TESTCASE_AUTO(TestTrailingComment);
1440 TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
1441 TESTCASE_AUTO(TestICU22277);
1442 TESTCASE_AUTO(TestICU22517);
1443 TESTCASE_AUTO(TestICU22555InfinityLoop);
1444 TESTCASE_AUTO_END;
1445 }
1446
1447 #endif /* #if !UCONFIG_NO_COLLATION */
1448