• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2001-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File cmsccoll.C
11 *
12 *******************************************************************************/
13 /**
14  * These are the tests specific to ICU 1.8 and above, that I didn't know where
15  * to fit.
16  */
17 
18 #include <stdio.h>
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_COLLATION
23 
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
27 #include "cintltst.h"
28 #include "ccolltst.h"
29 #include "callcoll.h"
30 #include "unicode/ustring.h"
31 #include "string.h"
32 #include "ucol_imp.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
41 #include "uparse.h"
42 #include "putilimp.h"
43 
44 
45 #define MAX_TOKEN_LEN 16
46 
47 typedef UCollationResult tst_strcoll(void *collator, const int object,
48                         const UChar *source, const int sLen,
49                         const UChar *target, const int tLen);
50 
51 
52 
53 const static char cnt1[][10] = {
54 
55   "AA",
56   "AC",
57   "AZ",
58   "AQ",
59   "AB",
60   "ABZ",
61   "ABQ",
62   "Z",
63   "ABC",
64   "Q",
65   "B"
66 };
67 
68 const static char cnt2[][10] = {
69   "DA",
70   "DAD",
71   "DAZ",
72   "MAR",
73   "Z",
74   "DAVIS",
75   "MARK",
76   "DAV",
77   "DAVI"
78 };
79 
IncompleteCntTest(void)80 static void IncompleteCntTest(void)
81 {
82   UErrorCode status = U_ZERO_ERROR;
83   UChar temp[90];
84   UChar t1[90];
85   UChar t2[90];
86 
87   UCollator *coll =  NULL;
88   uint32_t i = 0, j = 0;
89   uint32_t size = 0;
90 
91   u_uastrcpy(temp, " & Z < ABC < Q < B");
92 
93   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
94 
95   if(U_SUCCESS(status)) {
96     size = UPRV_LENGTHOF(cnt1);
97     for(i = 0; i < size-1; i++) {
98       for(j = i+1; j < size; j++) {
99         UCollationElements *iter;
100         u_uastrcpy(t1, cnt1[i]);
101         u_uastrcpy(t2, cnt1[j]);
102         doTest(coll, t1, t2, UCOL_LESS);
103         /* synwee : added collation element iterator test */
104         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
105         if (U_FAILURE(status)) {
106           log_err("Creation of iterator failed\n");
107           break;
108         }
109         backAndForth(iter);
110         ucol_closeElements(iter);
111       }
112     }
113   }
114 
115   ucol_close(coll);
116 
117 
118   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
119   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
120 
121   if(U_SUCCESS(status)) {
122     size = UPRV_LENGTHOF(cnt2);
123     for(i = 0; i < size-1; i++) {
124       for(j = i+1; j < size; j++) {
125         UCollationElements *iter;
126         u_uastrcpy(t1, cnt2[i]);
127         u_uastrcpy(t2, cnt2[j]);
128         doTest(coll, t1, t2, UCOL_LESS);
129 
130         /* synwee : added collation element iterator test */
131         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
132         if (U_FAILURE(status)) {
133           log_err("Creation of iterator failed\n");
134           break;
135         }
136         backAndForth(iter);
137         ucol_closeElements(iter);
138       }
139     }
140   }
141 
142   ucol_close(coll);
143 
144 
145 }
146 
147 const static char shifted[][20] = {
148   "black bird",
149   "black-bird",
150   "blackbird",
151   "black Bird",
152   "black-Bird",
153   "blackBird",
154   "black birds",
155   "black-birds",
156   "blackbirds"
157 };
158 
159 const static UCollationResult shiftedTert[] = {
160   UCOL_EQUAL,
161   UCOL_EQUAL,
162   UCOL_EQUAL,
163   UCOL_LESS,
164   UCOL_EQUAL,
165   UCOL_EQUAL,
166   UCOL_LESS,
167   UCOL_EQUAL,
168   UCOL_EQUAL
169 };
170 
171 const static char nonignorable[][20] = {
172   "black bird",
173   "black Bird",
174   "black birds",
175   "black-bird",
176   "black-Bird",
177   "black-birds",
178   "blackbird",
179   "blackBird",
180   "blackbirds"
181 };
182 
BlackBirdTest(void)183 static void BlackBirdTest(void) {
184   UErrorCode status = U_ZERO_ERROR;
185   UChar t1[90];
186   UChar t2[90];
187 
188   uint32_t i = 0, j = 0;
189   uint32_t size = 0;
190   UCollator *coll = ucol_open("en_US", &status);
191 
192   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
193   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
194 
195   if(U_SUCCESS(status)) {
196     size = UPRV_LENGTHOF(nonignorable);
197     for(i = 0; i < size-1; i++) {
198       for(j = i+1; j < size; j++) {
199         u_uastrcpy(t1, nonignorable[i]);
200         u_uastrcpy(t2, nonignorable[j]);
201         doTest(coll, t1, t2, UCOL_LESS);
202       }
203     }
204   }
205 
206   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
207   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
208 
209   if(U_SUCCESS(status)) {
210     size = UPRV_LENGTHOF(shifted);
211     for(i = 0; i < size-1; i++) {
212       for(j = i+1; j < size; j++) {
213         u_uastrcpy(t1, shifted[i]);
214         u_uastrcpy(t2, shifted[j]);
215         doTest(coll, t1, t2, UCOL_LESS);
216       }
217     }
218   }
219 
220   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
221   if(U_SUCCESS(status)) {
222     size = UPRV_LENGTHOF(shifted);
223     for(i = 1; i < size; i++) {
224       u_uastrcpy(t1, shifted[i-1]);
225       u_uastrcpy(t2, shifted[i]);
226       doTest(coll, t1, t2, shiftedTert[i]);
227     }
228   }
229 
230   ucol_close(coll);
231 }
232 
233 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
234     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236     {0x0041/*'A'*/, 0x0300, 0x0000},
237     {0x00C0, 0x0301, 0x0000},
238     /* this would work with forced normalization */
239     {0x00C0, 0x0316, 0x0000}
240 };
241 
242 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
243     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
245     {0x00C0, 0},
246     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247     /* this would work with forced normalization */
248     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249 };
250 
251 const static UCollationResult results[] = {
252     UCOL_GREATER,
253     UCOL_EQUAL,
254     UCOL_EQUAL,
255     UCOL_GREATER,
256     UCOL_EQUAL
257 };
258 
FunkyATest(void)259 static void FunkyATest(void)
260 {
261 
262     int32_t i;
263     UErrorCode status = U_ZERO_ERROR;
264     UCollator  *myCollation;
265     myCollation = ucol_open("en_US", &status);
266     if(U_FAILURE(status)){
267         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
268         return;
269     }
270     log_verbose("Testing some A letters, for some reason\n");
271     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
272     ucol_setStrength(myCollation, UCOL_TERTIARY);
273     for (i = 0; i < 4 ; i++)
274     {
275         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
276     }
277     ucol_close(myCollation);
278 }
279 
280 UColAttributeValue caseFirst[] = {
281     UCOL_OFF,
282     UCOL_LOWER_FIRST,
283     UCOL_UPPER_FIRST
284 };
285 
286 
287 UColAttributeValue alternateHandling[] = {
288     UCOL_NON_IGNORABLE,
289     UCOL_SHIFTED
290 };
291 
292 UColAttributeValue caseLevel[] = {
293     UCOL_OFF,
294     UCOL_ON
295 };
296 
297 UColAttributeValue strengths[] = {
298     UCOL_PRIMARY,
299     UCOL_SECONDARY,
300     UCOL_TERTIARY,
301     UCOL_QUATERNARY,
302     UCOL_IDENTICAL
303 };
304 
305 #if 0
306 static const char * strengthsC[] = {
307     "UCOL_PRIMARY",
308     "UCOL_SECONDARY",
309     "UCOL_TERTIARY",
310     "UCOL_QUATERNARY",
311     "UCOL_IDENTICAL"
312 };
313 
314 static const char * caseFirstC[] = {
315     "UCOL_OFF",
316     "UCOL_LOWER_FIRST",
317     "UCOL_UPPER_FIRST"
318 };
319 
320 
321 static const char * alternateHandlingC[] = {
322     "UCOL_NON_IGNORABLE",
323     "UCOL_SHIFTED"
324 };
325 
326 static const char * caseLevelC[] = {
327     "UCOL_OFF",
328     "UCOL_ON"
329 };
330 
331 /* not used currently - does not test only prints */
332 static void PrintMarkDavis(void)
333 {
334   UErrorCode status = U_ZERO_ERROR;
335   UChar m[256];
336   uint8_t sortkey[256];
337   UCollator *coll = ucol_open("en_US", &status);
338   uint32_t h,i,j,k, sortkeysize;
339   uint32_t sizem = 0;
340   char buffer[512];
341   uint32_t len = 512;
342 
343   log_verbose("PrintMarkDavis");
344 
345   u_uastrcpy(m, "Mark Davis");
346   sizem = u_strlen(m);
347 
348 
349   m[1] = 0xe4;
350 
351   for(i = 0; i<sizem; i++) {
352     fprintf(stderr, "\\u%04X ", m[i]);
353   }
354   fprintf(stderr, "\n");
355 
356   for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
357     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
358     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
359 
360     for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
361       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
362       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
363 
364       for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
365         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
366         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
367 
368         for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
369           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
370           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
371           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
372           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
373         }
374 
375       }
376 
377     }
378 
379   }
380 }
381 #endif
382 
BillFairmanTest(void)383 static void BillFairmanTest(void) {
384 /*
385 ** check for actual locale via ICU resource bundles
386 **
387 ** lp points to the original locale ("fr_FR_....")
388 */
389 
390     UResourceBundle *lr,*cr;
391     UErrorCode              lec = U_ZERO_ERROR;
392     const char *lp = "fr_FR_you_ll_never_find_this_locale";
393 
394     log_verbose("BillFairmanTest\n");
395 
396     lr = ures_open(NULL,lp,&lec);
397     if (lr) {
398         cr = ures_getByKey(lr,"collations",0,&lec);
399         if (cr) {
400             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
401             if (lp) {
402                 if (U_SUCCESS(lec)) {
403                     if(strcmp(lp, "fr") != 0) {
404                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
405                     }
406                 }
407             }
408             ures_close(cr);
409         }
410         ures_close(lr);
411     }
412 }
413 
414 const static char chTest[][20] = {
415   "c",
416   "C",
417   "ca", "cb", "cx", "cy", "CZ",
418   "c\\u030C", "C\\u030C",
419   "h",
420   "H",
421   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422   "ch", "cH", "Ch", "CH",
423   "cha", "charly", "che", "chh", "chch", "chr",
424   "i", "I", "iarly",
425   "r", "R",
426   "r\\u030C", "R\\u030C",
427   "s",
428   "S",
429   "s\\u030C", "S\\u030C",
430   "z", "Z",
431   "z\\u030C", "Z\\u030C"
432 };
433 
TestChMove(void)434 static void TestChMove(void) {
435     UChar t1[256] = {0};
436     UChar t2[256] = {0};
437 
438     uint32_t i = 0, j = 0;
439     uint32_t size = 0;
440     UErrorCode status = U_ZERO_ERROR;
441 
442     UCollator *coll = ucol_open("cs", &status);
443 
444     if(U_SUCCESS(status)) {
445         size = UPRV_LENGTHOF(chTest);
446         for(i = 0; i < size-1; i++) {
447             for(j = i+1; j < size; j++) {
448                 u_unescape(chTest[i], t1, 256);
449                 u_unescape(chTest[j], t2, 256);
450                 doTest(coll, t1, t2, UCOL_LESS);
451             }
452         }
453     }
454     else {
455         log_data_err("Can't open collator");
456     }
457     ucol_close(coll);
458 }
459 
460 
461 
462 
463 /*
464 const static char impTest[][20] = {
465   "\\u4e00",
466     "a",
467     "A",
468     "b",
469     "B",
470     "\\u4e01"
471 };
472 */
473 
474 
TestImplicitTailoring(void)475 static void TestImplicitTailoring(void) {
476   static const struct {
477     const char *rules;
478     const char *data[10];
479     const uint32_t len;
480   } tests[] = {
481       {
482         /* Tailor b and c before U+4E00. */
483         "&[before 1]\\u4e00 < b < c "
484         /* Now, before U+4E00 is c; put d and e after that. */
485         "&[before 1]\\u4e00 < d < e",
486         { "b", "c", "d", "e", "\\u4e00"}, 5 },
487       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
490   };
491 
492   int32_t i = 0;
493 
494   for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
495       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
496   }
497 
498 /*
499   UChar t1[256] = {0};
500   UChar t2[256] = {0};
501 
502   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
503 
504   uint32_t i = 0, j = 0;
505   uint32_t size = 0;
506   uint32_t ruleLen = 0;
507   UErrorCode status = U_ZERO_ERROR;
508   UCollator *coll = NULL;
509   ruleLen = u_unescape(rule, t1, 256);
510 
511   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
512 
513   if(U_SUCCESS(status)) {
514     size = UPRV_LENGTHOF(impTest);
515     for(i = 0; i < size-1; i++) {
516       for(j = i+1; j < size; j++) {
517         u_unescape(impTest[i], t1, 256);
518         u_unescape(impTest[j], t2, 256);
519         doTest(coll, t1, t2, UCOL_LESS);
520       }
521     }
522   }
523   else {
524     log_err("Can't open collator");
525   }
526   ucol_close(coll);
527   */
528 }
529 
TestFCDProblem(void)530 static void TestFCDProblem(void) {
531   UChar t1[256] = {0};
532   UChar t2[256] = {0};
533 
534   const char *s1 = "\\u0430\\u0306\\u0325";
535   const char *s2 = "\\u04D1\\u0325";
536 
537   UErrorCode status = U_ZERO_ERROR;
538   UCollator *coll = ucol_open("", &status);
539   u_unescape(s1, t1, 256);
540   u_unescape(s2, t2, 256);
541 
542   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
543   doTest(coll, t1, t2, UCOL_EQUAL);
544 
545   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
546   doTest(coll, t1, t2, UCOL_EQUAL);
547 
548   ucol_close(coll);
549 }
550 
551 /*
552 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553 We're only using NFC/NFD in this test.
554 */
555 #define NORM_BUFFER_TEST_LEN 18
556 typedef struct {
557   UChar32 u;
558   UChar NFC[NORM_BUFFER_TEST_LEN];
559   UChar NFD[NORM_BUFFER_TEST_LEN];
560 } tester;
561 
TestComposeDecompose(void)562 static void TestComposeDecompose(void) {
563     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564     static const UChar UNICODESET_STR[] = {
565         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568     };
569     int32_t noOfLoc;
570     int32_t i = 0, j = 0;
571 
572     UErrorCode status = U_ZERO_ERROR;
573     const char *locName = NULL;
574     uint32_t nfcSize;
575     uint32_t nfdSize;
576     tester **t;
577     uint32_t noCases = 0;
578     UCollator *coll = NULL;
579     UChar32 u = 0;
580     UChar comp[NORM_BUFFER_TEST_LEN];
581     uint32_t len = 0;
582     UCollationElements *iter;
583     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
584     int32_t charsToTestSize;
585 
586     noOfLoc = uloc_countAvailable();
587 
588     coll = ucol_open("", &status);
589     if (U_FAILURE(status)) {
590         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
591         return;
592     }
593     charsToTestSize = uset_size(charsToTest);
594     if (charsToTestSize <= 0) {
595         log_err("Set was zero. Missing data?\n");
596         return;
597     }
598     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
599     t[0] = (tester *)malloc(sizeof(tester));
600     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
601 
602     for(u = 0; u < charsToTestSize; u++) {
603         UChar32 ch = uset_charAt(charsToTest, u);
604         len = 0;
605         U16_APPEND_UNSAFE(comp, len, ch);
606         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
607         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
608 
609         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
610           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
611             t[noCases]->u = ch;
612             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
613                 u_strncpy(t[noCases]->NFC, comp, len);
614                 t[noCases]->NFC[len] = 0;
615             }
616             noCases++;
617             t[noCases] = (tester *)malloc(sizeof(tester));
618             uprv_memset(t[noCases], 0, sizeof(tester));
619         }
620     }
621     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
622     uset_close(charsToTest);
623     charsToTest = NULL;
624 
625     for(u=0; u<(UChar32)noCases; u++) {
626         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
627             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
628             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
629         }
630     }
631     /*
632     for(u = 0; u < charsToTestSize; u++) {
633       if(!(u&0xFFFF)) {
634         log_verbose("%08X ", u);
635       }
636       uprv_memset(t[noCases], 0, sizeof(tester));
637       t[noCases]->u = u;
638       len = 0;
639       U16_APPEND_UNSAFE(comp, len, u);
640       comp[len] = 0;
641       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
642       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
643       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
644       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
645     }
646     */
647 
648     ucol_close(coll);
649 
650     log_verbose("Testing locales, number of cases = %i\n", noCases);
651     for(i = 0; i<noOfLoc; i++) {
652         status = U_ZERO_ERROR;
653         locName = uloc_getAvailable(i);
654         if(hasCollationElements(locName)) {
655             char cName[256];
656             UChar name[256];
657             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
658 
659             for(j = 0; j<nameSize; j++) {
660                 cName[j] = (char)name[j];
661             }
662             cName[nameSize] = 0;
663             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
664 
665             coll = ucol_open(locName, &status);
666             ucol_setStrength(coll, UCOL_IDENTICAL);
667             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
668 
669             for(u=0; u<(UChar32)noCases; u++) {
670                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
671                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
672                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
673                     log_verbose("Testing NFC\n");
674                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
675                     backAndForth(iter);
676                     log_verbose("Testing NFD\n");
677                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
678                     backAndForth(iter);
679                 }
680             }
681             ucol_closeElements(iter);
682             ucol_close(coll);
683         }
684     }
685     for(u = 0; u <= (UChar32)noCases; u++) {
686         free(t[u]);
687     }
688     free(t);
689 }
690 
TestEmptyRule(void)691 static void TestEmptyRule(void) {
692   UErrorCode status = U_ZERO_ERROR;
693   UChar rulez[] = { 0 };
694   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
695 
696   ucol_close(coll);
697 }
698 
TestUCARules(void)699 static void TestUCARules(void) {
700   UErrorCode status = U_ZERO_ERROR;
701   UChar b[256];
702   UChar *rules = b;
703   uint32_t ruleLen = 0;
704   UCollator *UCAfromRules = NULL;
705   UCollator *coll = ucol_open("", &status);
706   if(status == U_FILE_ACCESS_ERROR) {
707     log_data_err("Is your data around?\n");
708     return;
709   } else if(U_FAILURE(status)) {
710     log_err("Error opening collator\n");
711     return;
712   }
713   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
714 
715   log_verbose("TestUCARules\n");
716   if(ruleLen > 256) {
717     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
718     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
719   }
720   log_verbose("Rules length is %d\n", ruleLen);
721   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
722   if(U_SUCCESS(status)) {
723     ucol_close(UCAfromRules);
724   } else {
725     log_verbose("Unable to create a collator from UCARules!\n");
726   }
727 /*
728   u_unescape(blah, b, 256);
729   ucol_getSortKey(coll, b, 1, res, 256);
730 */
731   ucol_close(coll);
732   if(rules != b) {
733     free(rules);
734   }
735 }
736 
737 
738 /* Pinyin tonal order */
739 /*
740     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
741           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
742     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
743     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
744     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
745     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
746       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
747 .. (\u00fc)
748 
749 However, in testing we got the following order:
750     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
751           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
752     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
753 .. (\u0113)
754     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
755     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
756     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
757 .. (\u01d8)
758       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
759 */
760 
TestBefore(void)761 static void TestBefore(void) {
762   const static char *data[] = {
763       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
764       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
765       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
766       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
767       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
768       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
769   };
770   genericRulesStarter(
771     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
772     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
773     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
774     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
775     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
776     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
777     data, UPRV_LENGTHOF(data));
778 }
779 
780 #if 0
781 /* superceded by TestBeforePinyin */
782 static void TestJ784(void) {
783   const static char *data[] = {
784       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
785       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
786       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
787       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
788       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
789       "\\u00fc",
790            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
791   };
792   genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
793 }
794 #endif
795 
796 #if 0
797 /* superceded by the changes to the lv locale */
798 static void TestJ831(void) {
799   const static char *data[] = {
800     "I",
801       "i",
802       "Y",
803       "y"
804   };
805   genericLocaleStarter("lv", data, UPRV_LENGTHOF(data));
806 }
807 #endif
808 
TestJ815(void)809 static void TestJ815(void) {
810   const static char *data[] = {
811     "aa",
812       "Aa",
813       "ab",
814       "Ab",
815       "ad",
816       "Ad",
817       "ae",
818       "Ae",
819       "\\u00e6",
820       "\\u00c6",
821       "af",
822       "Af",
823       "b",
824       "B"
825   };
826   genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
827   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
828 }
829 
830 
TestCase(void)831 static void TestCase(void)
832 {
833     const static UChar gRules[MAX_TOKEN_LEN] =
834     /*" & 0 < 1,\u2461<a,A"*/
835     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
836 
837     const static UChar testCase[][MAX_TOKEN_LEN] =
838     {
839         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
840         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
841         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
842         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
843     };
844 
845     const static UCollationResult caseTestResults[][9] =
846     {
847         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
848         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
849         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
850         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
851     };
852 
853     const static UColAttributeValue caseTestAttributes[][2] =
854     {
855         { UCOL_LOWER_FIRST, UCOL_OFF},
856         { UCOL_UPPER_FIRST, UCOL_OFF},
857         { UCOL_LOWER_FIRST, UCOL_ON},
858         { UCOL_UPPER_FIRST, UCOL_ON}
859     };
860     int32_t i,j,k;
861     UErrorCode status = U_ZERO_ERROR;
862     UCollationElements *iter;
863     UCollator  *myCollation;
864     myCollation = ucol_open("en_US", &status);
865 
866     if(U_FAILURE(status)){
867         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
868         return;
869     }
870     log_verbose("Testing different case settings\n");
871     ucol_setStrength(myCollation, UCOL_TERTIARY);
872 
873     for(k = 0; k<4; k++) {
874       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
875       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
876       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
877       for (i = 0; i < 3 ; i++) {
878         for(j = i+1; j<4; j++) {
879           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
880         }
881       }
882     }
883     ucol_close(myCollation);
884 
885     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
886     if(U_FAILURE(status)){
887         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
888         return;
889     }
890     log_verbose("Testing different case settings with custom rules\n");
891     ucol_setStrength(myCollation, UCOL_TERTIARY);
892 
893     for(k = 0; k<4; k++) {
894       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
895       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
896       for (i = 0; i < 3 ; i++) {
897         for(j = i+1; j<4; j++) {
898           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
899           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
900           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
901           backAndForth(iter);
902           ucol_closeElements(iter);
903           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
904           backAndForth(iter);
905           ucol_closeElements(iter);
906         }
907       }
908     }
909     ucol_close(myCollation);
910     {
911       const static char *lowerFirst[] = {
912         "h",
913         "H",
914         "ch",
915         "Ch",
916         "CH",
917         "cha",
918         "chA",
919         "Cha",
920         "ChA",
921         "CHa",
922         "CHA",
923         "i",
924         "I"
925       };
926 
927       const static char *upperFirst[] = {
928         "H",
929         "h",
930         "CH",
931         "Ch",
932         "ch",
933         "CHA",
934         "CHa",
935         "ChA",
936         "Cha",
937         "chA",
938         "cha",
939         "I",
940         "i"
941       };
942       log_verbose("mixed case test\n");
943       log_verbose("lower first, case level off\n");
944       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
945       log_verbose("upper first, case level off\n");
946       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
947       log_verbose("lower first, case level on\n");
948       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
949       log_verbose("upper first, case level on\n");
950       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
951     }
952 
953 }
954 
TestIncrementalNormalize(void)955 static void TestIncrementalNormalize(void) {
956 
957     /*UChar baseA     =0x61;*/
958     UChar baseA     =0x41;
959 /*    UChar baseB     = 0x42;*/
960     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
961     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
962     /*
963         0x316 is combining grave accent below, cc=220
964         0x321 is combining palatalized hook below, cc=202
965         0x300 is combining grave accent, cc=230
966     */
967 
968 #define MAXSLEN 2000
969     /*int          maxSLen   = 64000;*/
970     int          sLen;
971     int          i;
972 
973     UCollator        *coll;
974     UErrorCode       status = U_ZERO_ERROR;
975     UCollationResult result;
976 
977     int32_t myQ = getTestOption(QUICK_OPTION);
978 
979     if(getTestOption(QUICK_OPTION) < 0) {
980         setTestOption(QUICK_OPTION, 1);
981     }
982 
983     {
984         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
985         /*          most buffers along the way.*/
986         UChar            strA[MAXSLEN+1];
987         UChar            strB[MAXSLEN+1];
988 
989         coll = ucol_open("en_US", &status);
990         if(status == U_FILE_ACCESS_ERROR) {
991           log_data_err("Is your data around?\n");
992           return;
993         } else if(U_FAILURE(status)) {
994           log_err("Error opening collator\n");
995           return;
996         }
997         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
998 
999         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
1000         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1001         /*for (sLen = 1000; sLen<1001; sLen++) {*/
1002         for (sLen = 500; sLen<501; sLen++) {
1003         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1004             strA[0] = baseA;
1005             strB[0] = baseA;
1006             for (i=1; i<=sLen-1; i++) {
1007                 strA[i] = ccMix[i % 3];
1008                 strB[sLen-i] = ccMix[i % 3];
1009             }
1010             strA[sLen]   = 0;
1011             strB[sLen]   = 0;
1012 
1013             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1014             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1015             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1016             doTest(coll, strA, strB, UCOL_EQUAL);
1017         }
1018     }
1019 
1020     setTestOption(QUICK_OPTION, myQ);
1021 
1022 
1023     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1024     /*         of the string.  Checks a couple of edge cases.*/
1025 
1026     {
1027         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1028         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1029         ucol_setStrength(coll, UCOL_TERTIARY);
1030         doTest(coll, strA, strB, UCOL_EQUAL);
1031     }
1032 
1033     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1034 
1035     {
1036       /* New UCA  3.1.1.
1037        * test below used a code point from Desseret, which sorts differently
1038        * than d800 dc00
1039        */
1040         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1041         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1042         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1043         ucol_setStrength(coll, UCOL_TERTIARY);
1044         doTest(coll, strA, strB, UCOL_GREATER);
1045     }
1046 
1047     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1048 
1049     {
1050         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1051         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1052         char  sortKeyA[50];
1053         char  sortKeyAz[50];
1054         char  sortKeyB[50];
1055         char  sortKeyBz[50];
1056         int   r;
1057 
1058         /* there used to be -3 here. Hmmmm.... */
1059         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1060         result = ucol_strcoll(coll, strA, 3, strB, 3);
1061         if (result != UCOL_GREATER) {
1062             log_err("ERROR 1 in test 4\n");
1063         }
1064         result = ucol_strcoll(coll, strA, -1, strB, -1);
1065         if (result != UCOL_EQUAL) {
1066             log_err("ERROR 2 in test 4\n");
1067         }
1068 
1069         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1070         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1071         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1072         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1073 
1074         r = strcmp(sortKeyA, sortKeyAz);
1075         if (r <= 0) {
1076             log_err("Error 3 in test 4\n");
1077         }
1078         r = strcmp(sortKeyA, sortKeyB);
1079         if (r <= 0) {
1080             log_err("Error 4 in test 4\n");
1081         }
1082         r = strcmp(sortKeyAz, sortKeyBz);
1083         if (r != 0) {
1084             log_err("Error 5 in test 4\n");
1085         }
1086 
1087         ucol_setStrength(coll, UCOL_IDENTICAL);
1088         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1089         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1090         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1091         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1092 
1093         r = strcmp(sortKeyA, sortKeyAz);
1094         if (r <= 0) {
1095             log_err("Error 6 in test 4\n");
1096         }
1097         r = strcmp(sortKeyA, sortKeyB);
1098         if (r <= 0) {
1099             log_err("Error 7 in test 4\n");
1100         }
1101         r = strcmp(sortKeyAz, sortKeyBz);
1102         if (r != 0) {
1103             log_err("Error 8 in test 4\n");
1104         }
1105         ucol_setStrength(coll, UCOL_TERTIARY);
1106     }
1107 
1108 
1109     /*  Test 5:  Null characters in non-normal source strings.*/
1110 
1111     {
1112         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1113         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1114         char  sortKeyA[50];
1115         char  sortKeyAz[50];
1116         char  sortKeyB[50];
1117         char  sortKeyBz[50];
1118         int   r;
1119 
1120         result = ucol_strcoll(coll, strA, 6, strB, 6);
1121         if (result != UCOL_GREATER) {
1122             log_err("ERROR 1 in test 5\n");
1123         }
1124         result = ucol_strcoll(coll, strA, -1, strB, -1);
1125         if (result != UCOL_EQUAL) {
1126             log_err("ERROR 2 in test 5\n");
1127         }
1128 
1129         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1130         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1131         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1132         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1133 
1134         r = strcmp(sortKeyA, sortKeyAz);
1135         if (r <= 0) {
1136             log_err("Error 3 in test 5\n");
1137         }
1138         r = strcmp(sortKeyA, sortKeyB);
1139         if (r <= 0) {
1140             log_err("Error 4 in test 5\n");
1141         }
1142         r = strcmp(sortKeyAz, sortKeyBz);
1143         if (r != 0) {
1144             log_err("Error 5 in test 5\n");
1145         }
1146 
1147         ucol_setStrength(coll, UCOL_IDENTICAL);
1148         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1149         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1150         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1151         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1152 
1153         r = strcmp(sortKeyA, sortKeyAz);
1154         if (r <= 0) {
1155             log_err("Error 6 in test 5\n");
1156         }
1157         r = strcmp(sortKeyA, sortKeyB);
1158         if (r <= 0) {
1159             log_err("Error 7 in test 5\n");
1160         }
1161         r = strcmp(sortKeyAz, sortKeyBz);
1162         if (r != 0) {
1163             log_err("Error 8 in test 5\n");
1164         }
1165         ucol_setStrength(coll, UCOL_TERTIARY);
1166     }
1167 
1168 
1169     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1170 
1171     {
1172         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1173         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1174 
1175         result = ucol_strcoll(coll, strA, 5, strB, 5);
1176         if (result != UCOL_LESS) {
1177             log_err("Error 1 in test 6\n");
1178         }
1179         result = ucol_strcoll(coll, strA, -1, strB, -1);
1180         if (result != UCOL_EQUAL) {
1181             log_err("Error 2 in test 6\n");
1182         }
1183     }
1184 
1185     ucol_close(coll);
1186 }
1187 
1188 
1189 
1190 #if 0
1191 static void TestGetCaseBit(void) {
1192   static const char *caseBitData[] = {
1193     "a", "A", "ch", "Ch", "CH",
1194       "\\uFF9E", "\\u0009"
1195   };
1196 
1197   static const uint8_t results[] = {
1198     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1199       UCOL_UPPER_CASE, UCOL_LOWER_CASE
1200   };
1201 
1202   uint32_t i, blen = 0;
1203   UChar b[256] = {0};
1204   UErrorCode status = U_ZERO_ERROR;
1205   UCollator *UCA = ucol_open("", &status);
1206   uint8_t res = 0;
1207 
1208   for(i = 0; i<UPRV_LENGTHOF(results); i++) {
1209     blen = u_unescape(caseBitData[i], b, 256);
1210     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1211     if(results[i] != res) {
1212       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1213     }
1214   }
1215 }
1216 #endif
1217 
TestHangulTailoring(void)1218 static void TestHangulTailoring(void) {
1219     static const char *koreanData[] = {
1220         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1221             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1222             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1223             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1224             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1225             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1226     };
1227 
1228     const char *rules =
1229         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1230         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1231         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1232         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1233         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1234         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1235 
1236 
1237   UErrorCode status = U_ZERO_ERROR;
1238   UChar rlz[2048] = { 0 };
1239   uint32_t rlen = u_unescape(rules, rlz, 2048);
1240 
1241   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1242   if(status == U_FILE_ACCESS_ERROR) {
1243     log_data_err("Is your data around?\n");
1244     return;
1245   } else if(U_FAILURE(status)) {
1246     log_err("Error opening collator\n");
1247     return;
1248   }
1249 
1250   log_verbose("Using start of korean rules\n");
1251 
1252   if(U_SUCCESS(status)) {
1253     genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
1254   } else {
1255     log_err("Unable to open collator with rules %s\n", rules);
1256   }
1257 
1258   ucol_close(coll);
1259 
1260   log_verbose("Using ko__LOTUS locale\n");
1261   genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
1262 }
1263 
1264 /*
1265  * The secondary/tertiary compression middle byte
1266  * as used by the current implementation.
1267  * Subject to change as the sort key compression changes.
1268  * See class CollationKeys.
1269  */
1270 enum {
1271     SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1272     TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1273 };
1274 
TestCompressOverlap(void)1275 static void TestCompressOverlap(void) {
1276     UChar       secstr[150];
1277     UChar       tertstr[150];
1278     UErrorCode  status = U_ZERO_ERROR;
1279     UCollator  *coll;
1280     uint8_t     result[500];
1281     uint32_t    resultlen;
1282     int         count = 0;
1283     uint8_t    *tempptr;
1284 
1285     coll = ucol_open("", &status);
1286 
1287     if (U_FAILURE(status)) {
1288         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1289         return;
1290     }
1291     while (count < 149) {
1292         secstr[count] = 0x0020; /* [06, 05, 05] */
1293         tertstr[count] = 0x0020;
1294         count ++;
1295     }
1296 
1297     /* top down compression ----------------------------------- */
1298     secstr[count] = 0x0332; /* [, 87, 05] */
1299     tertstr[count] = 0x3000; /* [06, 05, 07] */
1300 
1301     /* no compression secstr should have 150 secondary bytes, tertstr should
1302     have 150 tertiary bytes.
1303     with correct compression, secstr should have 6 secondary
1304     bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1305     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1306     (void)resultlen;    /* Suppress set but not used warning. */
1307     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1308     while (*(tempptr + 1) != 1) {
1309         /* the last secondary collation element is not checked since it is not
1310         part of the compression */
1311         if (*tempptr < SEC_COMMON_MIDDLE) {
1312             log_err("Secondary top down compression overlapped\n");
1313         }
1314         tempptr ++;
1315     }
1316 
1317     /* tertiary top/bottom/common for en_US is similar to the secondary
1318     top/bottom/common */
1319     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1320     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1321     while (*(tempptr + 1) != 0) {
1322         /* the last secondary collation element is not checked since it is not
1323         part of the compression */
1324         if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1325             log_err("Tertiary top down compression overlapped\n");
1326         }
1327         tempptr ++;
1328     }
1329 
1330     /* bottom up compression ------------------------------------- */
1331     secstr[count] = 0;
1332     tertstr[count] = 0;
1333     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1334     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1335     while (*(tempptr + 1) != 1) {
1336         /* the last secondary collation element is not checked since it is not
1337         part of the compression */
1338         if (*tempptr > SEC_COMMON_MIDDLE) {
1339             log_err("Secondary bottom up compression overlapped\n");
1340         }
1341         tempptr ++;
1342     }
1343 
1344     /* tertiary top/bottom/common for en_US is similar to the secondary
1345     top/bottom/common */
1346     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1347     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1348     while (*(tempptr + 1) != 0) {
1349         /* the last secondary collation element is not checked since it is not
1350         part of the compression */
1351         if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1352             log_err("Tertiary bottom up compression overlapped\n");
1353         }
1354         tempptr ++;
1355     }
1356 
1357     ucol_close(coll);
1358 }
1359 
TestCyrillicTailoring(void)1360 static void TestCyrillicTailoring(void) {
1361   static const char *test[] = {
1362     "\\u0410b",
1363       "\\u0410\\u0306a",
1364       "\\u04d0A"
1365   };
1366 
1367     /* Russian overrides contractions, so this test is not valid anymore */
1368     /*genericLocaleStarter("ru", test, 3);*/
1369 
1370     // Most of the following are commented out because UCA 8.0
1371     // drops most of the Cyrillic contractions from the default order.
1372     // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1373 
1374     // genericLocaleStarter("root", test, 3);
1375     // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1376     // genericRulesStarter("&Z < \\u0410", test, 3);
1377     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1378     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1379     // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1380     // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1381 }
1382 
TestSuppressContractions(void)1383 static void TestSuppressContractions(void) {
1384 
1385   static const char *testNoCont2[] = {
1386       "\\u0410\\u0302a",
1387       "\\u0410\\u0306b",
1388       "\\u0410c"
1389   };
1390   static const char *testNoCont[] = {
1391       "a\\u0410",
1392       "A\\u0410\\u0306",
1393       "\\uFF21\\u0410\\u0302"
1394   };
1395 
1396   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1397   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1398 }
1399 
TestContraction(void)1400 static void TestContraction(void) {
1401     const static char *testrules[] = {
1402         "&A = AB / B",
1403         "&A = A\\u0306/\\u0306",
1404         "&c = ch / h"
1405     };
1406     const static UChar testdata[][2] = {
1407         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1408         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1409         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1410     };
1411     const static UChar testdata2[][2] = {
1412         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1413         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1414         {0x0063 /* 'c' */, 0x006C /* 'l' */}
1415     };
1416 #if 0
1417     /*
1418      * These pairs of rule strings are not guaranteed to yield the very same mappings.
1419      * In fact, LDML 24 recommends an improved way of creating mappings
1420      * which always yields different mappings for such pairs. See
1421      * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1422      */
1423     const static char *testrules3[] = {
1424         "&z < xyz &xyzw << B",
1425         "&z < xyz &xyz << B / w",
1426         "&z < ch &achm << B",
1427         "&z < ch &a << B / chm",
1428         "&\\ud800\\udc00w << B",
1429         "&\\ud800\\udc00 << B / w",
1430         "&a\\ud800\\udc00m << B",
1431         "&a << B / \\ud800\\udc00m",
1432     };
1433 #endif
1434 
1435     UErrorCode  status   = U_ZERO_ERROR;
1436     UCollator  *coll;
1437     UChar       rule[256] = {0};
1438     uint32_t    rlen     = 0;
1439     int         i;
1440 
1441     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1442         UCollationElements *iter1;
1443         int j = 0;
1444         log_verbose("Rule %s for testing\n", testrules[i]);
1445         rlen = u_unescape(testrules[i], rule, 32);
1446         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1447         if (U_FAILURE(status)) {
1448             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1449             return;
1450         }
1451         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1452         if (U_FAILURE(status)) {
1453             log_err("Collation iterator creation failed\n");
1454             return;
1455         }
1456         while (j < 2) {
1457             UCollationElements *iter2 = ucol_openElements(coll,
1458                                                          &(testdata[i][j]),
1459                                                          1, &status);
1460             uint32_t ce;
1461             if (U_FAILURE(status)) {
1462                 log_err("Collation iterator creation failed\n");
1463                 return;
1464             }
1465             ce = ucol_next(iter2, &status);
1466             while (ce != UCOL_NULLORDER) {
1467                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
1468                     log_err("Collation elements in contraction split does not match\n");
1469                     return;
1470                 }
1471                 ce = ucol_next(iter2, &status);
1472             }
1473             j ++;
1474             ucol_closeElements(iter2);
1475         }
1476         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1477             log_err("Collation elements not exhausted\n");
1478             return;
1479         }
1480         ucol_closeElements(iter1);
1481         ucol_close(coll);
1482     }
1483 
1484     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1485     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1486     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1487         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1488                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1489                 testdata2[1][1]);
1490         return;
1491     }
1492     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1493         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1494                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1495                 testdata2[2][1]);
1496         return;
1497     }
1498     ucol_close(coll);
1499 #if 0  /* see above */
1500     for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
1501         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1502         UCollator          *coll1,
1503                            *coll2;
1504         UCollationElements *iter1,
1505                            *iter2;
1506         UChar               ch = 0x0042 /* 'B' */;
1507         uint32_t            ce;
1508         rlen = u_unescape(testrules3[i], rule, 32);
1509         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1510         rlen = u_unescape(testrules3[i + 1], rule, 32);
1511         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1512         if (U_FAILURE(status)) {
1513             log_err("Collator creation failed %s\n", testrules[i]);
1514             return;
1515         }
1516         iter1 = ucol_openElements(coll1, &ch, 1, &status);
1517         iter2 = ucol_openElements(coll2, &ch, 1, &status);
1518         if (U_FAILURE(status)) {
1519             log_err("Collation iterator creation failed\n");
1520             return;
1521         }
1522         ce = ucol_next(iter1, &status);
1523         if (U_FAILURE(status)) {
1524             log_err("Retrieving ces failed\n");
1525             return;
1526         }
1527         while (ce != UCOL_NULLORDER) {
1528             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1529             if (ce == ce2) {
1530                 log_verbose("CEs match: %08x\n", ce);
1531             } else {
1532                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1533                 return;
1534             }
1535             ce = ucol_next(iter1, &status);
1536             if (U_FAILURE(status)) {
1537                 log_err("Retrieving ces failed\n");
1538                 return;
1539             }
1540         }
1541         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1542             log_err("CEs not exhausted\n");
1543             return;
1544         }
1545         ucol_closeElements(iter1);
1546         ucol_closeElements(iter2);
1547         ucol_close(coll1);
1548         ucol_close(coll2);
1549     }
1550 #endif
1551 }
1552 
TestExpansion(void)1553 static void TestExpansion(void) {
1554     const static char *testrules[] = {
1555 #if 0
1556         /*
1557          * This seems to have tested that M was not mapped to an expansion.
1558          * I believe the old builder just did that because it computed the extension CEs
1559          * at the very end, which was a bug.
1560          * Among other problems, it violated the core tailoring principle
1561          * by making an earlier rule depend on a later one.
1562          * And, of course, if M did not get an expansion, then it was primary different from K,
1563          * unlike what the rule &K<<M says.
1564          */
1565         "&J << K / B & K << M",
1566 #endif
1567         "&J << K / B << M"
1568     };
1569     const static UChar testdata[][3] = {
1570         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1571         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1572         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1573         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1574         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1575         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1576     };
1577 
1578     UErrorCode  status   = U_ZERO_ERROR;
1579     UCollator  *coll;
1580     UChar       rule[256] = {0};
1581     uint32_t    rlen     = 0;
1582     int         i;
1583 
1584     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1585         int j = 0;
1586         log_verbose("Rule %s for testing\n", testrules[i]);
1587         rlen = u_unescape(testrules[i], rule, 32);
1588         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1589         if (U_FAILURE(status)) {
1590             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1591             return;
1592         }
1593 
1594         for (j = 0; j < 5; j ++) {
1595             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1596         }
1597         ucol_close(coll);
1598     }
1599 }
1600 
1601 #if 0
1602 /* this test tests the current limitations of the engine */
1603 /* it always fail, so it is disabled by default */
1604 static void TestLimitations(void) {
1605   /* recursive expansions */
1606   {
1607     static const char *rule = "&a=b/c&d=c/e";
1608     static const char *tlimit01[] = {"add","b","adf"};
1609     static const char *tlimit02[] = {"aa","b","af"};
1610     log_verbose("recursive expansions\n");
1611     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1612     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1613   }
1614   /* contractions spanning expansions */
1615   {
1616     static const char *rule = "&a<<<c/e&g<<<eh";
1617     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1618     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1619     log_verbose("contractions spanning expansions\n");
1620     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1621     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1622   }
1623   /* normalization: nulls in contractions */
1624   {
1625     static const char *rule = "&a<<<\\u0000\\u0302";
1626     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1627     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1628     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1629     static const UColAttributeValue valOn[] = { UCOL_ON };
1630     static const UColAttributeValue valOff[] = { UCOL_OFF };
1631 
1632     log_verbose("NULL in contractions\n");
1633     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1634     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1635     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1636     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1637 
1638   }
1639   /* normalization: contractions spanning normalization */
1640   {
1641     static const char *rule = "&a<<<\\u0000\\u0302";
1642     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1643     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1644     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1645     static const UColAttributeValue valOn[] = { UCOL_ON };
1646     static const UColAttributeValue valOff[] = { UCOL_OFF };
1647 
1648     log_verbose("contractions spanning normalization\n");
1649     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1650     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1651     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1652     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1653 
1654   }
1655   /* variable top:  */
1656   {
1657     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1658     static const char *rule = "&\\u2010<x<[variable top]=z";
1659     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1660     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1661     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1662     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1663     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1664     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1665     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1666 
1667     log_verbose("variable top\n");
1668     genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1669     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1670     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1671     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1672     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
1673 
1674   }
1675   /* case level */
1676   {
1677     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1678     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1679     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1680     static const UColAttribute att[] = { UCOL_CASE_FIRST};
1681     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1682     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1683     log_verbose("case level\n");
1684     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1685     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1686     /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1687     /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1688   }
1689 
1690 }
1691 #endif
1692 
TestBocsuCoverage(void)1693 static void TestBocsuCoverage(void) {
1694   UErrorCode status = U_ZERO_ERROR;
1695   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1696   UChar       test[256] = {0};
1697   uint32_t    tlen     = u_unescape(testString, test, 32);
1698   uint8_t key[256]     = {0};
1699   uint32_t klen         = 0;
1700 
1701   UCollator *coll = ucol_open("", &status);
1702   if(U_SUCCESS(status)) {
1703   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1704 
1705   klen = ucol_getSortKey(coll, test, tlen, key, 256);
1706   (void)klen;    /* Suppress set but not used warning. */
1707 
1708   ucol_close(coll);
1709   } else {
1710     log_data_err("Couldn't open UCA\n");
1711   }
1712 }
1713 
TestVariableTopSetting(void)1714 static void TestVariableTopSetting(void) {
1715   UErrorCode status = U_ZERO_ERROR;
1716   uint32_t varTopOriginal = 0, varTop1, varTop2;
1717   UCollator *coll = ucol_open("", &status);
1718   if(U_SUCCESS(status)) {
1719 
1720   static const UChar nul = 0;
1721   static const UChar space = 0x20;
1722   static const UChar dot = 0x2e;  /* punctuation */
1723   static const UChar degree = 0xb0;  /* symbol */
1724   static const UChar dollar = 0x24;  /* currency symbol */
1725   static const UChar zero = 0x30;  /* digit */
1726 
1727   varTopOriginal = ucol_getVariableTop(coll, &status);
1728   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1729   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1730 
1731   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1732   varTop2 = ucol_getVariableTop(coll, &status);
1733   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1734   if(U_FAILURE(status) || varTop1 != varTop2 ||
1735       !ucol_equal(coll, &nul, 0, &space, 1) ||
1736       ucol_equal(coll, &nul, 0, &dot, 1) ||
1737       ucol_equal(coll, &nul, 0, &degree, 1) ||
1738       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1739       ucol_equal(coll, &nul, 0, &zero, 1) ||
1740       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1741     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1742   }
1743 
1744   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1745   varTop2 = ucol_getVariableTop(coll, &status);
1746   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1747   if(U_FAILURE(status) || varTop1 != varTop2 ||
1748       !ucol_equal(coll, &nul, 0, &space, 1) ||
1749       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1750       ucol_equal(coll, &nul, 0, &degree, 1) ||
1751       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1752       ucol_equal(coll, &nul, 0, &zero, 1) ||
1753       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1754     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1755   }
1756 
1757   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1758   varTop2 = ucol_getVariableTop(coll, &status);
1759   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1760   if(U_FAILURE(status) || varTop1 != varTop2 ||
1761       !ucol_equal(coll, &nul, 0, &space, 1) ||
1762       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1763       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1764       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1765       ucol_equal(coll, &nul, 0, &zero, 1) ||
1766       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1767     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1768   }
1769 
1770   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1771   varTop2 = ucol_getVariableTop(coll, &status);
1772   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1773   if(U_FAILURE(status) || varTop1 != varTop2 ||
1774       !ucol_equal(coll, &nul, 0, &space, 1) ||
1775       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1776       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1777       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1778       ucol_equal(coll, &nul, 0, &zero, 1) ||
1779       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1780     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1781   }
1782 
1783   log_verbose("Testing setting variable top to contractions\n");
1784   {
1785     UChar first[4] = { 0 };
1786     first[0] = 0x0040;
1787     first[1] = 0x0050;
1788     first[2] = 0x0000;
1789 
1790     status = U_ZERO_ERROR;
1791     ucol_setVariableTop(coll, first, -1, &status);
1792 
1793     if(U_SUCCESS(status)) {
1794       log_err("Invalid contraction succeded in setting variable top!\n");
1795     }
1796 
1797   }
1798 
1799   log_verbose("Test restoring variable top\n");
1800 
1801   status = U_ZERO_ERROR;
1802   ucol_restoreVariableTop(coll, varTopOriginal, &status);
1803   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1804     log_err("Couldn't restore old variable top\n");
1805   }
1806 
1807   log_verbose("Testing calling with error set\n");
1808 
1809   status = U_INTERNAL_PROGRAM_ERROR;
1810   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1811   varTop2 = ucol_getVariableTop(coll, &status);
1812   ucol_restoreVariableTop(coll, varTop2, &status);
1813   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1814   varTop2 = ucol_getVariableTop(NULL, &status);
1815   ucol_restoreVariableTop(NULL, varTop2, &status);
1816   if(status != U_INTERNAL_PROGRAM_ERROR) {
1817     log_err("Bad reaction to passed error!\n");
1818   }
1819   ucol_close(coll);
1820   } else {
1821     log_data_err("Couldn't open UCA collator\n");
1822   }
1823 }
1824 
TestMaxVariable()1825 static void TestMaxVariable() {
1826   UErrorCode status = U_ZERO_ERROR;
1827   UColReorderCode oldMax, max;
1828   UCollator *coll;
1829 
1830   static const UChar nul = 0;
1831   static const UChar space = 0x20;
1832   static const UChar dot = 0x2e;  /* punctuation */
1833   static const UChar degree = 0xb0;  /* symbol */
1834   static const UChar dollar = 0x24;  /* currency symbol */
1835   static const UChar zero = 0x30;  /* digit */
1836 
1837   coll = ucol_open("", &status);
1838   if(U_FAILURE(status)) {
1839     log_data_err("Couldn't open root collator\n");
1840     return;
1841   }
1842 
1843   oldMax = ucol_getMaxVariable(coll);
1844   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1845   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1846 
1847   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1848   max = ucol_getMaxVariable(coll);
1849   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1850   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1851       !ucol_equal(coll, &nul, 0, &space, 1) ||
1852       ucol_equal(coll, &nul, 0, &dot, 1) ||
1853       ucol_equal(coll, &nul, 0, &degree, 1) ||
1854       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1855       ucol_equal(coll, &nul, 0, &zero, 1) ||
1856       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1857     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1858   }
1859 
1860   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1861   max = ucol_getMaxVariable(coll);
1862   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1863   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1864       !ucol_equal(coll, &nul, 0, &space, 1) ||
1865       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1866       ucol_equal(coll, &nul, 0, &degree, 1) ||
1867       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1868       ucol_equal(coll, &nul, 0, &zero, 1) ||
1869       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1870     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1871   }
1872 
1873   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1874   max = ucol_getMaxVariable(coll);
1875   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1876   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1877       !ucol_equal(coll, &nul, 0, &space, 1) ||
1878       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1879       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1880       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1881       ucol_equal(coll, &nul, 0, &zero, 1) ||
1882       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1883     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1884   }
1885 
1886   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1887   max = ucol_getMaxVariable(coll);
1888   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1889   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1890       !ucol_equal(coll, &nul, 0, &space, 1) ||
1891       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1892       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1893       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1894       ucol_equal(coll, &nul, 0, &zero, 1) ||
1895       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1896     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1897   }
1898 
1899   log_verbose("Test restoring maxVariable\n");
1900   status = U_ZERO_ERROR;
1901   ucol_setMaxVariable(coll, oldMax, &status);
1902   if(oldMax != ucol_getMaxVariable(coll)) {
1903     log_err("Couldn't restore old maxVariable\n");
1904   }
1905 
1906   log_verbose("Testing calling with error set\n");
1907   status = U_INTERNAL_PROGRAM_ERROR;
1908   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1909   max = ucol_getMaxVariable(coll);
1910   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1911     log_err("Bad reaction to passed error!\n");
1912   }
1913   ucol_close(coll);
1914 }
1915 
TestNonChars(void)1916 static void TestNonChars(void) {
1917   static const char *test[] = {
1918       "\\u0000",  /* ignorable */
1919       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1920       "\\uFDD0", "\\uFDEF",
1921       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1922       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1923       "\\U0003FFFE", "\\U0003FFFF",
1924       "\\U0004FFFE", "\\U0004FFFF",
1925       "\\U0005FFFE", "\\U0005FFFF",
1926       "\\U0006FFFE", "\\U0006FFFF",
1927       "\\U0007FFFE", "\\U0007FFFF",
1928       "\\U0008FFFE", "\\U0008FFFF",
1929       "\\U0009FFFE", "\\U0009FFFF",
1930       "\\U000AFFFE", "\\U000AFFFF",
1931       "\\U000BFFFE", "\\U000BFFFF",
1932       "\\U000CFFFE", "\\U000CFFFF",
1933       "\\U000DFFFE", "\\U000DFFFF",
1934       "\\U000EFFFE", "\\U000EFFFF",
1935       "\\U000FFFFE", "\\U000FFFFF",
1936       "\\U0010FFFE", "\\U0010FFFF",
1937       "\\uFFFF"  /* special character with maximum primary weight */
1938   };
1939   UErrorCode status = U_ZERO_ERROR;
1940   UCollator *coll = ucol_open("en_US", &status);
1941 
1942   log_verbose("Test non characters\n");
1943 
1944   if(U_SUCCESS(status)) {
1945     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1946   } else {
1947     log_err_status(status, "Unable to open collator\n");
1948   }
1949 
1950   ucol_close(coll);
1951 }
1952 
TestExtremeCompression(void)1953 static void TestExtremeCompression(void) {
1954   static char *test[4];
1955   int32_t j = 0, i = 0;
1956 
1957   for(i = 0; i<4; i++) {
1958     test[i] = (char *)malloc(2048*sizeof(char));
1959   }
1960 
1961   for(j = 20; j < 500; j++) {
1962     for(i = 0; i<4; i++) {
1963       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1964       test[i][j-1] = (char)('a'+i);
1965       test[i][j] = 0;
1966     }
1967     genericLocaleStarter("en_US", (const char **)test, 4);
1968   }
1969 
1970 
1971   for(i = 0; i<4; i++) {
1972     free(test[i]);
1973   }
1974 }
1975 
1976 #if 0
1977 static void TestExtremeCompression(void) {
1978   static char *test[4];
1979   int32_t j = 0, i = 0;
1980   UErrorCode status = U_ZERO_ERROR;
1981   UCollator *coll = ucol_open("en_US", status);
1982   for(i = 0; i<4; i++) {
1983     test[i] = (char *)malloc(2048*sizeof(char));
1984   }
1985   for(j = 10; j < 2048; j++) {
1986     for(i = 0; i<4; i++) {
1987       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1988       test[i][j-1] = (char)('a'+i);
1989       test[i][j] = 0;
1990     }
1991   }
1992   genericLocaleStarter("en_US", (const char **)test, 4);
1993 
1994   for(j = 10; j < 2048; j++) {
1995     for(i = 0; i<1; i++) {
1996       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1997       test[i][j] = 0;
1998     }
1999   }
2000   for(i = 0; i<4; i++) {
2001     free(test[i]);
2002   }
2003 }
2004 #endif
2005 
TestSurrogates(void)2006 static void TestSurrogates(void) {
2007   static const char *test[] = {
2008     "z","\\ud900\\udc25",  "\\ud805\\udc50",
2009        "\\ud800\\udc00y",  "\\ud800\\udc00r",
2010        "\\ud800\\udc00f",  "\\ud800\\udc00",
2011        "\\ud800\\udc00c", "\\ud800\\udc00b",
2012        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2013        "\\ud800\\udc00a",
2014        "c", "b"
2015   };
2016 
2017   static const char *rule =
2018     "&z < \\ud900\\udc25   < \\ud805\\udc50"
2019        "< \\ud800\\udc00y  < \\ud800\\udc00r"
2020        "< \\ud800\\udc00f  << \\ud800\\udc00"
2021        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2022        "< \\ud800\\udc00a  < c < b" ;
2023 
2024   genericRulesStarter(rule, test, 14);
2025 }
2026 
2027 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)2028 static void TestPrefix(void) {
2029   uint32_t i;
2030 
2031   static const struct {
2032     const char *rules;
2033     const char *data[50];
2034     const uint32_t len;
2035   } tests[] = {
2036     { "&z <<< z|a",
2037       {"zz", "za"}, 2 },
2038 
2039     { "&z <<< z|   a",
2040       {"zz", "za"}, 2 },
2041     { "[strength I]"
2042       "&a=\\ud900\\udc25"
2043       "&z<<<\\ud900\\udc25|a",
2044       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2045   };
2046 
2047 
2048   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2049     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2050   }
2051 }
2052 
2053 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2054 /* JIS X 4061 collation order implementation                                   */
TestNewJapanese(void)2055 static void TestNewJapanese(void) {
2056 
2057   static const char * const test1[] = {
2058       "\\u30b7\\u30e3\\u30fc\\u30ec",
2059       "\\u30b7\\u30e3\\u30a4",
2060       "\\u30b7\\u30e4\\u30a3",
2061       "\\u30b7\\u30e3\\u30ec",
2062       "\\u3061\\u3087\\u3053",
2063       "\\u3061\\u3088\\u3053",
2064       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2065       "\\u3066\\u30fc\\u305f",
2066       "\\u30c6\\u30fc\\u30bf",
2067       "\\u30c6\\u30a7\\u30bf",
2068       "\\u3066\\u3048\\u305f",
2069       "\\u3067\\u30fc\\u305f",
2070       "\\u30c7\\u30fc\\u30bf",
2071       "\\u30c7\\u30a7\\u30bf",
2072       "\\u3067\\u3048\\u305f",
2073       "\\u3066\\u30fc\\u305f\\u30fc",
2074       "\\u30c6\\u30fc\\u30bf\\u30a1",
2075       "\\u30c6\\u30a7\\u30bf\\u30fc",
2076       "\\u3066\\u3047\\u305f\\u3041",
2077       "\\u3066\\u3048\\u305f\\u30fc",
2078       "\\u3067\\u30fc\\u305f\\u30fc",
2079       "\\u30c7\\u30fc\\u30bf\\u30a1",
2080       "\\u3067\\u30a7\\u305f\\u30a1",
2081       "\\u30c7\\u3047\\u30bf\\u3041",
2082       "\\u30c7\\u30a8\\u30bf\\u30a2",
2083       "\\u3072\\u3086",
2084       "\\u3073\\u3085\\u3042",
2085       "\\u3074\\u3085\\u3042",
2086       "\\u3073\\u3085\\u3042\\u30fc",
2087       "\\u30d3\\u30e5\\u30a2\\u30fc",
2088       "\\u3074\\u3085\\u3042\\u30fc",
2089       "\\u30d4\\u30e5\\u30a2\\u30fc",
2090       "\\u30d2\\u30e5\\u30a6",
2091       "\\u30d2\\u30e6\\u30a6",
2092       "\\u30d4\\u30e5\\u30a6\\u30a2",
2093       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2094       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2095       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2096       "\\u3072\\u3085\\u3093",
2097       "\\u3074\\u3085\\u3093",
2098       "\\u3075\\u30fc\\u308a",
2099       "\\u30d5\\u30fc\\u30ea",
2100       "\\u3075\\u3045\\u308a",
2101       "\\u3075\\u30a5\\u308a",
2102       "\\u3075\\u30a5\\u30ea",
2103       "\\u30d5\\u30a6\\u30ea",
2104       "\\u3076\\u30fc\\u308a",
2105       "\\u30d6\\u30fc\\u30ea",
2106       "\\u3076\\u3045\\u308a",
2107       "\\u30d6\\u30a5\\u308a",
2108       "\\u3077\\u3046\\u308a",
2109       "\\u30d7\\u30a6\\u30ea",
2110       "\\u3075\\u30fc\\u308a\\u30fc",
2111       "\\u30d5\\u30a5\\u30ea\\u30fc",
2112       "\\u3075\\u30a5\\u308a\\u30a3",
2113       "\\u30d5\\u3045\\u308a\\u3043",
2114       "\\u30d5\\u30a6\\u30ea\\u30fc",
2115       "\\u3075\\u3046\\u308a\\u3043",
2116       "\\u30d6\\u30a6\\u30ea\\u30a4",
2117       "\\u3077\\u30fc\\u308a\\u30fc",
2118       "\\u3077\\u30a5\\u308a\\u30a4",
2119       "\\u3077\\u3046\\u308a\\u30fc",
2120       "\\u30d7\\u30a6\\u30ea\\u30a4",
2121       "\\u30d5\\u30fd",
2122       "\\u3075\\u309e",
2123       "\\u3076\\u309d",
2124       "\\u3076\\u3075",
2125       "\\u3076\\u30d5",
2126       "\\u30d6\\u3075",
2127       "\\u30d6\\u30d5",
2128       "\\u3076\\u309e",
2129       "\\u3076\\u3077",
2130       "\\u30d6\\u3077",
2131       "\\u3077\\u309d",
2132       "\\u30d7\\u30fd",
2133       "\\u3077\\u3075",
2134 };
2135 
2136   static const char *test2[] = {
2137     "\\u306f\\u309d", /* H\\u309d */
2138     "\\u30cf\\u30fd", /* K\\u30fd */
2139     "\\u306f\\u306f", /* HH */
2140     "\\u306f\\u30cf", /* HK */
2141     "\\u30cf\\u30cf", /* KK */
2142     "\\u306f\\u309e", /* H\\u309e */
2143     "\\u30cf\\u30fe", /* K\\u30fe */
2144     "\\u306f\\u3070", /* HH\\u309b */
2145     "\\u30cf\\u30d0", /* KK\\u309b */
2146     "\\u306f\\u3071", /* HH\\u309c */
2147     "\\u30cf\\u3071", /* KH\\u309c */
2148     "\\u30cf\\u30d1", /* KK\\u309c */
2149     "\\u3070\\u309d", /* H\\u309b\\u309d */
2150     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2151     "\\u3070\\u306f", /* H\\u309bH */
2152     "\\u30d0\\u30cf", /* K\\u309bK */
2153     "\\u3070\\u309e", /* H\\u309b\\u309e */
2154     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2155     "\\u3070\\u3070", /* H\\u309bH\\u309b */
2156     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2157     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2158     "\\u3070\\u3071", /* H\\u309bH\\u309c */
2159     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2160     "\\u3071\\u309d", /* H\\u309c\\u309d */
2161     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2162     "\\u3071\\u306f", /* H\\u309cH */
2163     "\\u30d1\\u30cf", /* K\\u309cK */
2164     "\\u3071\\u3070", /* H\\u309cH\\u309b */
2165     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2166     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2167     "\\u3071\\u3071", /* H\\u309cH\\u309c */
2168     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2169   };
2170   /*
2171   static const char *test3[] = {
2172     "\\u221er\\u221e",
2173     "\\u221eR#",
2174     "\\u221et\\u221e",
2175     "#r\\u221e",
2176     "#R#",
2177     "#t%",
2178     "#T%",
2179     "8t\\u221e",
2180     "8T\\u221e",
2181     "8t#",
2182     "8T#",
2183     "8t%",
2184     "8T%",
2185     "8t8",
2186     "8T8",
2187     "\\u03c9r\\u221e",
2188     "\\u03a9R%",
2189     "rr\\u221e",
2190     "rR\\u221e",
2191     "Rr\\u221e",
2192     "RR\\u221e",
2193     "RT%",
2194     "rt8",
2195     "tr\\u221e",
2196     "tr8",
2197     "TR8",
2198     "tt8",
2199     "\\u30b7\\u30e3\\u30fc\\u30ec",
2200   };
2201   */
2202   static const UColAttribute att[] = { UCOL_STRENGTH };
2203   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2204 
2205   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2206   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2207 
2208   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2209   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2210   /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2211   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2212   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
2213 }
2214 
TestStrCollIdenticalPrefix(void)2215 static void TestStrCollIdenticalPrefix(void) {
2216   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2217   const char* test[] = {
2218     "ab\\ud9b0\\udc70",
2219     "ab\\ud9b0\\udc71"
2220   };
2221   genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
2222 }
2223 /* Contractions should have all their canonically equivalent */
2224 /* strings included */
TestContractionClosure(void)2225 static void TestContractionClosure(void) {
2226   static const struct {
2227     const char *rules;
2228     const char *data[10];
2229     const uint32_t len;
2230   } tests[] = {
2231     {   "&b=\\u00e4\\u00e4",
2232       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2233     {   "&b=\\u00C5",
2234       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2235   };
2236   uint32_t i;
2237 
2238 
2239   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2240     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2241   }
2242 }
2243 
2244 /* This tests also fails*/
TestBeforePrefixFailure(void)2245 static void TestBeforePrefixFailure(void) {
2246   static const struct {
2247     const char *rules;
2248     const char *data[10];
2249     const uint32_t len;
2250   } tests[] = {
2251     { "&g <<< a"
2252       "&[before 3]\\uff41 <<< x",
2253       {"x", "\\uff41"}, 2 },
2254     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2255         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2256         "&[before 3]\\u30a7<<<\\u30a9",
2257       {"\\u30a9", "\\u30a7"}, 2 },
2258     {   "&[before 3]\\u30a7<<<\\u30a9"
2259         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2260         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2261       {"\\u30a9", "\\u30a7"}, 2 },
2262   };
2263   uint32_t i;
2264 
2265 
2266   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2267     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2268   }
2269 
2270 #if 0
2271   const char* rule1 =
2272         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2273         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2274         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2275   const char* rule2 =
2276         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2277         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2278         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2279   const char* test[] = {
2280       "\\u30c6\\u30fc\\u30bf",
2281       "\\u30c6\\u30a7\\u30bf",
2282   };
2283   genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2284   genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
2285 /* this piece of code should be in some sort of verbose mode     */
2286 /* it gets the collation elements for elements and prints them   */
2287 /* This is useful when trying to see whether the problem is      */
2288   {
2289     UErrorCode status = U_ZERO_ERROR;
2290     uint32_t i = 0;
2291     UCollationElements *it = NULL;
2292     uint32_t CE;
2293     UChar string[256];
2294     uint32_t uStringLen;
2295     UCollator *coll = NULL;
2296 
2297     uStringLen = u_unescape(rule1, string, 256);
2298 
2299     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2300 
2301     /*coll = ucol_open("ja_JP_JIS", &status);*/
2302     it = ucol_openElements(coll, string, 0, &status);
2303 
2304     for(i = 0; i < UPRV_LENGTHOF(test); i++) {
2305       log_verbose("%s\n", test[i]);
2306       uStringLen = u_unescape(test[i], string, 256);
2307       ucol_setText(it, string, uStringLen, &status);
2308 
2309       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2310         log_verbose("%08X\n", CE);
2311       }
2312       log_verbose("\n");
2313 
2314     }
2315 
2316     ucol_closeElements(it);
2317     ucol_close(coll);
2318   }
2319 #endif
2320 }
2321 
TestPrefixCompose(void)2322 static void TestPrefixCompose(void) {
2323   const char* rule1 =
2324         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2325   /*
2326   const char* test[] = {
2327       "\\u30c6\\u30fc\\u30bf",
2328       "\\u30c6\\u30a7\\u30bf",
2329   };
2330   */
2331   {
2332     UErrorCode status = U_ZERO_ERROR;
2333     /*uint32_t i = 0;*/
2334     /*UCollationElements *it = NULL;*/
2335 /*    uint32_t CE;*/
2336     UChar string[256];
2337     uint32_t uStringLen;
2338     UCollator *coll = NULL;
2339 
2340     uStringLen = u_unescape(rule1, string, 256);
2341 
2342     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2343     ucol_close(coll);
2344   }
2345 
2346 
2347 }
2348 
2349 /*
2350 [last variable] last variable value
2351 [last primary ignorable] largest CE for primary ignorable
2352 [last secondary ignorable] largest CE for secondary ignorable
2353 [last tertiary ignorable] largest CE for tertiary ignorable
2354 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2355 */
2356 
TestRuleOptions(void)2357 static void TestRuleOptions(void) {
2358   /* values here are hardcoded and are correct for the current UCA
2359    * when the UCA changes, one might be forced to change these
2360    * values.
2361    */
2362 
2363   /*
2364    * These strings contain the last character before [variable top]
2365    * and the first and second characters (by primary weights) after it.
2366    * See FractionalUCA.txt. For example:
2367       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2368       [variable top = 0C FE]
2369       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2370      and
2371       00B4; [0D 0C, 05, 05]
2372    *
2373    * Note: Starting with UCA 6.0, the [variable top] collation element
2374    * is not the weight of any character or string,
2375    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2376    */
2377 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2378 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2379 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2380 
2381   /*
2382    * This string has to match the character that has the [last regular] weight
2383    * which changes with each UCA version.
2384    * See the bottom of FractionalUCA.txt which says something like
2385       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2386    *
2387    * Note: Starting with UCA 6.0, the [last regular] collation element
2388    * is not the weight of any character or string,
2389    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2390    */
2391 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2392 
2393   static const struct {
2394     const char *rules;
2395     const char *data[10];
2396     const uint32_t len;
2397   } tests[] = {
2398 #if 0
2399     /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2400     /* - all befores here amount to zero */
2401     { "&[before 3][first tertiary ignorable]<<<a",
2402         { "\\u0000", "a"}, 2
2403     }, /* you cannot go before first tertiary ignorable */
2404 
2405     { "&[before 3][last tertiary ignorable]<<<a",
2406         { "\\u0000", "a"}, 2
2407     }, /* you cannot go before last tertiary ignorable */
2408 #endif
2409     /*
2410      * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2411      * and it *is* possible to "go before" that.
2412      */
2413     { "&[before 3][first secondary ignorable]<<<a",
2414         { "\\u0000", "a"}, 2
2415     },
2416 
2417     { "&[before 3][last secondary ignorable]<<<a",
2418         { "\\u0000", "a"}, 2
2419     },
2420 
2421     /* 'normal' befores */
2422 
2423     /*
2424      * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2425      * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2426      * because there is no tailoring space before that boundary.
2427      * Made the tests work by tailoring to a space instead.
2428      */
2429     { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2430         {  "c", "b", "\\u0332", "a" }, 4
2431     },
2432 
2433     /* we don't have a code point that corresponds to
2434      * the last primary ignorable
2435      */
2436     { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2437         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2438     },
2439 
2440     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2441         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2442     },
2443 
2444     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2445         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2446     },
2447 
2448     { "&[first regular]<a"
2449       "&[before 1][first regular]<b",
2450       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2451     },
2452 
2453     { "&[before 1][last regular]<b"
2454       "&[last regular]<a",
2455         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2456     },
2457 
2458     { "&[before 1][first implicit]<b"
2459       "&[first implicit]<a",
2460         { "b", "\\u4e00", "a", "\\u4e01"}, 4
2461     },
2462 #if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2463     { "&[before 1][last implicit]<b"
2464       "&[last implicit]<a",
2465         { "b", "\\U0010FFFD", "a" }, 3
2466     },
2467 #endif
2468     { "&[last variable]<z"
2469       "&' '<x"  /* was &[last primary ignorable]<x, see above */
2470       "&[last secondary ignorable]<<y"
2471       "&[last tertiary ignorable]<<<w"
2472       "&[top]<u",
2473       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2474     }
2475 
2476   };
2477   uint32_t i;
2478 
2479   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2480     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2481   }
2482 }
2483 
2484 
TestOptimize(void)2485 static void TestOptimize(void) {
2486   /* this is not really a test - just trying out
2487    * whether copying of UCA contents will fail
2488    * Cannot really test, since the functionality
2489    * remains the same.
2490    */
2491   static const struct {
2492     const char *rules;
2493     const char *data[10];
2494     const uint32_t len;
2495   } tests[] = {
2496     /* - all befores here amount to zero */
2497     { "[optimize [\\uAC00-\\uD7FF]]",
2498     { "a", "b"}, 2}
2499   };
2500   uint32_t i;
2501 
2502   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2503     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2504   }
2505 }
2506 
2507 /*
2508 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2509 weiv    ucol_strcollIter?
2510 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2511 weiv    these are the input strings?
2512 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2513 weiv    will check - could be a problem with utf-8 iterator
2514 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2515 weiv    hmmm
2516 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2517 weiv    that doesn't sound right
2518 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2519 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2520 cycheng@ca.ibm.c... yes
2521 weiv    and then do the comparison
2522 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2523 weiv    utf-16 strings look like a little endian ones in the example you sent me
2524 weiv    It could be a bug - let me try to test it out
2525 cycheng@ca.ibm.c... ok
2526 cycheng@ca.ibm.c... we can wait till the conf. call
2527 cycheng@ca.ibm.c... next weke
2528 weiv    that would be great
2529 weiv    hmmm
2530 weiv    I might be wrong
2531 weiv    let me play with it some more
2532 cycheng@ca.ibm.c... ok
2533 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2534 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2535 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2536 weiv    ok
2537 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2538 weiv    thanks
2539 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2540 */
2541 #if 0
2542 static void Alexis(void) {
2543   UErrorCode status = U_ZERO_ERROR;
2544   UCollator *coll = ucol_open("", &status);
2545 
2546 
2547   const char utf16be[2][4] = {
2548     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2549     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2550   };
2551 
2552   const char utf8[2][4] = {
2553     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2554     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2555   };
2556 
2557   UCharIterator iterU161, iterU162;
2558   UCharIterator iterU81, iterU82;
2559 
2560   UCollationResult resU16, resU8;
2561 
2562   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2563   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2564 
2565   uiter_setUTF8(&iterU81, utf8[0], 4);
2566   uiter_setUTF8(&iterU82, utf8[1], 4);
2567 
2568   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2569 
2570   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2571   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2572 
2573 
2574   if(resU16 != resU8) {
2575     log_err("different results\n");
2576   }
2577 
2578   ucol_close(coll);
2579 }
2580 #endif
2581 
2582 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)2583 static void Alexis2(void) {
2584   UErrorCode status = U_ZERO_ERROR;
2585   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2586   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2587   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2588   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2589 
2590   UConverter *conv = NULL;
2591 
2592   UCharIterator U16BEItS, U16BEItT;
2593   UCharIterator U8ItS, U8ItT;
2594 
2595   UCollationResult resU16, resU16BE, resU8;
2596 
2597   static const char* const pairs[][2] = {
2598     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2599     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2600     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2601     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2602     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2603     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2604     { "\\u0020", "\\u0020\\u0000"}
2605 /*
2606 5F20 (my result here)
2607 5F204E008E3F
2608 5F20 (your result here)
2609 */
2610   };
2611 
2612   int32_t i = 0;
2613 
2614   UCollator *coll = ucol_open("", &status);
2615   if(status == U_FILE_ACCESS_ERROR) {
2616     log_data_err("Is your data around?\n");
2617     return;
2618   } else if(U_FAILURE(status)) {
2619     log_err("Error opening collator\n");
2620     return;
2621   }
2622   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2623   conv = ucnv_open("UTF16BE", &status);
2624   for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
2625     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2626     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2627 
2628     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2629 
2630     log_verbose("Result of strcoll is %i\n", resU16);
2631 
2632     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2633     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2634     (void)U16BELenS;    /* Suppress set but not used warnings. */
2635     (void)U16BELenT;
2636 
2637     /* use the original sizes, as the result from converter is in bytes */
2638     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2639     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2640 
2641     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2642 
2643     log_verbose("Result of U16BE is %i\n", resU16BE);
2644 
2645     if(resU16 != resU16BE) {
2646       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2647     }
2648 
2649     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2650     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2651 
2652     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2653     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2654 
2655     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2656 
2657     if(resU16 != resU8) {
2658       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2659     }
2660 
2661   }
2662 
2663   ucol_close(coll);
2664   ucnv_close(conv);
2665 }
2666 
TestHebrewUCA(void)2667 static void TestHebrewUCA(void) {
2668   UErrorCode status = U_ZERO_ERROR;
2669   static const char *first[] = {
2670     "d790d6b8d79cd795d6bcd7a9",
2671     "d790d79cd79ed7a7d799d799d7a1",
2672     "d790d6b4d79ed795d6bcd7a9",
2673   };
2674 
2675   char utf8String[3][256];
2676   UChar utf16String[3][256];
2677 
2678   int32_t i = 0, j = 0;
2679   int32_t sizeUTF8[3];
2680   int32_t sizeUTF16[3];
2681 
2682   UCollator *coll = ucol_open("", &status);
2683   if (U_FAILURE(status)) {
2684       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2685       return;
2686   }
2687   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2688 
2689   for(i = 0; i < UPRV_LENGTHOF(first); i++) {
2690     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2691     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2692     log_verbose("%i: ");
2693     for(j = 0; j < sizeUTF16[i]; j++) {
2694       /*log_verbose("\\u%04X", utf16String[i][j]);*/
2695       log_verbose("%04X", utf16String[i][j]);
2696     }
2697     log_verbose("\n");
2698   }
2699   for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2700     for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
2701       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2702     }
2703   }
2704 
2705   ucol_close(coll);
2706 
2707 }
2708 
TestPartialSortKeyTermination(void)2709 static void TestPartialSortKeyTermination(void) {
2710   static const char* cases[] = {
2711     "\\u1234\\u1234\\udc00",
2712     "\\udc00\\ud800\\ud800"
2713   };
2714 
2715   int32_t i;
2716 
2717   UErrorCode status = U_ZERO_ERROR;
2718 
2719   UCollator *coll = ucol_open("", &status);
2720 
2721   UCharIterator iter;
2722 
2723   UChar currCase[256];
2724   int32_t length = 0;
2725   int32_t pKeyLen = 0;
2726 
2727   uint8_t key[256];
2728 
2729   for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
2730     uint32_t state[2] = {0, 0};
2731     length = u_unescape(cases[i], currCase, 256);
2732     uiter_setString(&iter, currCase, length);
2733     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2734     (void)pKeyLen;   /* Suppress set but not used warning. */
2735 
2736     log_verbose("Done\n");
2737 
2738   }
2739   ucol_close(coll);
2740 }
2741 
TestSettings(void)2742 static void TestSettings(void) {
2743   static const char* cases[] = {
2744     "apple",
2745       "Apple"
2746   };
2747 
2748   static const char* locales[] = {
2749     "",
2750       "en"
2751   };
2752 
2753   UErrorCode status = U_ZERO_ERROR;
2754 
2755   int32_t i = 0, j = 0;
2756 
2757   UChar source[256], target[256];
2758   int32_t sLen = 0, tLen = 0;
2759 
2760   UCollator *collateObject = NULL;
2761   for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
2762     collateObject = ucol_open(locales[i], &status);
2763     ucol_setStrength(collateObject, UCOL_PRIMARY);
2764     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2765     for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
2766       sLen = u_unescape(cases[j-1], source, 256);
2767       source[sLen] = 0;
2768       tLen = u_unescape(cases[j], target, 256);
2769       source[tLen] = 0;
2770       doTest(collateObject, source, target, UCOL_EQUAL);
2771     }
2772     ucol_close(collateObject);
2773   }
2774 }
2775 
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)2776 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2777     UErrorCode status = U_ZERO_ERROR;
2778     int32_t errorNo = 0;
2779     const UChar *sourceRules = NULL;
2780     int32_t sourceRulesLen = 0;
2781     UParseError parseError;
2782     UColAttributeValue french = UCOL_OFF;
2783 
2784     if(!ucol_equals(source, target)) {
2785         log_err("Same collators, different address not equal\n");
2786         errorNo++;
2787     }
2788     ucol_close(target);
2789     if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2790         target = ucol_safeClone(source, NULL, NULL, &status);
2791         if(U_FAILURE(status)) {
2792             log_err("Error creating clone\n");
2793             errorNo++;
2794             return errorNo;
2795         }
2796         if(!ucol_equals(source, target)) {
2797             log_err("Collator different from it's clone\n");
2798             errorNo++;
2799         }
2800         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2801         if(french == UCOL_ON) {
2802             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2803         } else {
2804             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2805         }
2806         if(U_FAILURE(status)) {
2807             log_err("Error setting attributes\n");
2808             errorNo++;
2809             return errorNo;
2810         }
2811         if(ucol_equals(source, target)) {
2812             log_err("Collators same even when options changed\n");
2813             errorNo++;
2814         }
2815         ucol_close(target);
2816 
2817         sourceRules = ucol_getRules(source, &sourceRulesLen);
2818         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2819         if(U_FAILURE(status)) {
2820             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2821             errorNo++;
2822             return errorNo;
2823         }
2824         /* Note: The tailoring rule string is an optional data item. */
2825         if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2826             log_err("Collator different from collator that was created from the same rules\n");
2827             errorNo++;
2828         }
2829         ucol_close(target);
2830     }
2831     return errorNo;
2832 }
2833 
2834 
TestEquals(void)2835 static void TestEquals(void) {
2836     /* ucol_equals is not currently a public API. There is a chance that it will become
2837     * something like this.
2838     */
2839     /* test whether the two collators instantiated from the same locale are equal */
2840     UErrorCode status = U_ZERO_ERROR;
2841     UParseError parseError;
2842     int32_t noOfLoc = uloc_countAvailable();
2843     const char *locName = NULL;
2844     UCollator *source = NULL, *target = NULL;
2845     int32_t i = 0;
2846 
2847     const char* rules[] = {
2848         "&l < lj <<< Lj <<< LJ",
2849         "&n < nj <<< Nj <<< NJ",
2850         "&ae <<< \\u00e4",
2851         "&AE <<< \\u00c4"
2852     };
2853     /*
2854     const char* badRules[] = {
2855     "&l <<< Lj",
2856     "&n < nj <<< nJ <<< NJ",
2857     "&a <<< \\u00e4",
2858     "&AE <<< \\u00c4 <<< x"
2859     };
2860     */
2861 
2862     UChar sourceRules[1024], targetRules[1024];
2863     int32_t sourceRulesSize = 0, targetRulesSize = 0;
2864     int32_t rulesSize = UPRV_LENGTHOF(rules);
2865 
2866     for(i = 0; i < rulesSize; i++) {
2867         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2868         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2869     }
2870 
2871     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2872     if(status == U_FILE_ACCESS_ERROR) {
2873         log_data_err("Is your data around?\n");
2874         return;
2875     } else if(U_FAILURE(status)) {
2876         log_err("Error opening collator\n");
2877         return;
2878     }
2879     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2880     if(!ucol_equals(source, target)) {
2881         log_err("Equivalent collators not equal!\n");
2882     }
2883     ucol_close(source);
2884     ucol_close(target);
2885 
2886     source = ucol_open("root", &status);
2887     target = ucol_open("root", &status);
2888     log_verbose("Testing root\n");
2889     if(!ucol_equals(source, source)) {
2890         log_err("Same collator not equal\n");
2891     }
2892     if(TestEqualsForCollator("root", source, target)) {
2893         log_err("Errors for root\n");
2894     }
2895     ucol_close(source);
2896 
2897     for(i = 0; i<noOfLoc; i++) {
2898         status = U_ZERO_ERROR;
2899         locName = uloc_getAvailable(i);
2900         /*if(hasCollationElements(locName)) {*/
2901         log_verbose("Testing equality for locale %s\n", locName);
2902         source = ucol_open(locName, &status);
2903         target = ucol_open(locName, &status);
2904         if (U_FAILURE(status)) {
2905             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2906             continue;
2907         }
2908         if(TestEqualsForCollator(locName, source, target)) {
2909             log_err("Errors for locale %s\n", locName);
2910         }
2911         ucol_close(source);
2912         /*}*/
2913     }
2914 }
2915 
TestJ2726(void)2916 static void TestJ2726(void) {
2917     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2918     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2919     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2920     UErrorCode status = U_ZERO_ERROR;
2921     UCollator *coll = ucol_open("en", &status);
2922     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2923     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2924     doTest(coll, a, aSpace, UCOL_EQUAL);
2925     doTest(coll, aSpace, a, UCOL_EQUAL);
2926     doTest(coll, a, spaceA, UCOL_EQUAL);
2927     doTest(coll, spaceA, a, UCOL_EQUAL);
2928     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2929     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2930     ucol_close(coll);
2931 }
2932 
NullRule(void)2933 static void NullRule(void) {
2934     UChar r[3] = {0};
2935     UErrorCode status = U_ZERO_ERROR;
2936     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2937     if(U_SUCCESS(status)) {
2938         log_err("This should have been an error!\n");
2939         ucol_close(coll);
2940     } else {
2941         status = U_ZERO_ERROR;
2942     }
2943     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2944     if(U_FAILURE(status)) {
2945         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2946     } else {
2947         ucol_close(coll);
2948     }
2949 }
2950 
2951 /**
2952  * Test for CollationElementIterator previous and next for the whole set of
2953  * unicode characters with normalization on.
2954  */
TestNumericCollation(void)2955 static void TestNumericCollation(void)
2956 {
2957     UErrorCode status = U_ZERO_ERROR;
2958 
2959     const static char *basicTestStrings[]={
2960     "hello1",
2961     "hello2",
2962     "hello2002",
2963     "hello2003",
2964     "hello123456",
2965     "hello1234567",
2966     "hello10000000",
2967     "hello100000000",
2968     "hello1000000000",
2969     "hello10000000000",
2970     };
2971 
2972     const static char *preZeroTestStrings[]={
2973     "avery10000",
2974     "avery010000",
2975     "avery0010000",
2976     "avery00010000",
2977     "avery000010000",
2978     "avery0000010000",
2979     "avery00000010000",
2980     "avery000000010000",
2981     };
2982 
2983     const static char *thirtyTwoBitNumericStrings[]={
2984     "avery42949672960",
2985     "avery42949672961",
2986     "avery42949672962",
2987     "avery429496729610"
2988     };
2989 
2990      const static char *longNumericStrings[]={
2991      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2992         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2993         are treated as multiple collation elements. */
2994     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2995     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2996     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2997     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2998     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2999     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
3000     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3001     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3002     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3003     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3004     };
3005 
3006     const static char *supplementaryDigits[] = {
3007       "\\uD835\\uDFCE", /* 0 */
3008       "\\uD835\\uDFCF", /* 1 */
3009       "\\uD835\\uDFD0", /* 2 */
3010       "\\uD835\\uDFD1", /* 3 */
3011       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3012       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3013       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3014       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3015       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3016       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3017     };
3018 
3019     const static char *foreignDigits[] = {
3020       "\\u0661",
3021         "\\u0662",
3022         "\\u0663",
3023       "\\u0661\\u0660",
3024       "\\u0661\\u0662",
3025       "\\u0661\\u0663",
3026       "\\u0662\\u0660",
3027       "\\u0662\\u0662",
3028       "\\u0662\\u0663",
3029       "\\u0663\\u0660",
3030       "\\u0663\\u0662",
3031       "\\u0663\\u0663"
3032     };
3033 
3034     const static char *evenZeroes[] = {
3035       "2000",
3036       "2001",
3037         "2002",
3038         "2003"
3039     };
3040 
3041     UColAttribute att = UCOL_NUMERIC_COLLATION;
3042     UColAttributeValue val = UCOL_ON;
3043 
3044     /* Open our collator. */
3045     UCollator* coll = ucol_open("root", &status);
3046     if (U_FAILURE(status)){
3047         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3048               myErrorName(status));
3049         return;
3050     }
3051     genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3052     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3053     genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3054     genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3055     genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3056     genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
3057 
3058     /* Setting up our collator to do digits. */
3059     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3060     if (U_FAILURE(status)){
3061         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3062               myErrorName(status));
3063         return;
3064     }
3065 
3066     /*
3067        Testing that prepended zeroes still yield the correct collation behavior.
3068        We expect that every element in our strings array will be equal.
3069     */
3070     genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
3071 
3072     ucol_close(coll);
3073 }
3074 
TestTibetanConformance(void)3075 static void TestTibetanConformance(void)
3076 {
3077     const char* test[] = {
3078         "\\u0FB2\\u0591\\u0F71\\u0061",
3079         "\\u0FB2\\u0F71\\u0061"
3080     };
3081 
3082     UErrorCode status = U_ZERO_ERROR;
3083     UCollator *coll = ucol_open("", &status);
3084     UChar source[100];
3085     UChar target[100];
3086     int result;
3087     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3088     if (U_SUCCESS(status)) {
3089         u_unescape(test[0], source, 100);
3090         u_unescape(test[1], target, 100);
3091         doTest(coll, source, target, UCOL_EQUAL);
3092         result = ucol_strcoll(coll, source, -1,   target, -1);
3093         log_verbose("result %d\n", result);
3094         if (UCOL_EQUAL != result) {
3095             log_err("Tibetan comparison error\n");
3096         }
3097     }
3098     ucol_close(coll);
3099 
3100     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3101 }
3102 
TestPinyinProblem(void)3103 static void TestPinyinProblem(void) {
3104     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3105     genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
3106 }
3107 
3108 /**
3109  * Iterate through the given iterator, checking to see that all the strings
3110  * in the expected array are present.
3111  * @param expected array of strings we expect to see, or NULL
3112  * @param expectedCount number of elements of expected, or 0
3113  */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)3114 static int32_t checkUEnumeration(const char* msg,
3115                                  UEnumeration* iter,
3116                                  const char** expected,
3117                                  int32_t expectedCount) {
3118     UErrorCode ec = U_ZERO_ERROR;
3119     int32_t i = 0, n, j, bit;
3120     int32_t seenMask = 0;
3121 
3122     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3123     n = uenum_count(iter, &ec);
3124     if (!assertSuccess("count", &ec)) return -1;
3125     log_verbose("%s = [", msg);
3126     for (;; ++i) {
3127         const char* s = uenum_next(iter, NULL, &ec);
3128         if (!assertSuccess("snext", &ec) || s == NULL) break;
3129         if (i != 0) log_verbose(",");
3130         log_verbose("%s", s);
3131         /* check expected list */
3132         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3133             if ((seenMask&bit) == 0 &&
3134                 uprv_strcmp(s, expected[j]) == 0) {
3135                 seenMask |= bit;
3136                 break;
3137             }
3138         }
3139     }
3140     log_verbose("] (%d)\n", i);
3141     assertTrue("count verified", i==n);
3142     /* did we see all expected strings? */
3143     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3144         if ((seenMask&bit)!=0) {
3145             log_verbose("Ok: \"%s\" seen\n", expected[j]);
3146         } else {
3147             log_err("FAIL: \"%s\" not seen\n", expected[j]);
3148         }
3149     }
3150     return n;
3151 }
3152 
3153 /**
3154  * Test new API added for separate collation tree.
3155  */
TestSeparateTrees(void)3156 static void TestSeparateTrees(void) {
3157     UErrorCode ec = U_ZERO_ERROR;
3158     UEnumeration *e = NULL;
3159     int32_t n = -1;
3160     UBool isAvailable;
3161     char loc[256];
3162 
3163     static const char* AVAIL[] = { "en", "de" };
3164 
3165     static const char* KW[] = { "collation" };
3166 
3167     static const char* KWVAL[] = { "phonebook", "stroke" };
3168 
3169 #if !UCONFIG_NO_SERVICE
3170     e = ucol_openAvailableLocales(&ec);
3171     if (e != NULL) {
3172         assertSuccess("ucol_openAvailableLocales", &ec);
3173         assertTrue("ucol_openAvailableLocales!=0", e!=0);
3174         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
3175         (void)n;    /* Suppress set but not used warnings. */
3176         /* Don't need to check n because we check list */
3177         uenum_close(e);
3178     } else {
3179         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3180     }
3181 #endif
3182 
3183     e = ucol_getKeywords(&ec);
3184     if (e != NULL) {
3185         assertSuccess("ucol_getKeywords", &ec);
3186         assertTrue("ucol_getKeywords!=0", e!=0);
3187         n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
3188         /* Don't need to check n because we check list */
3189         uenum_close(e);
3190     } else {
3191         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3192     }
3193 
3194     e = ucol_getKeywordValues(KW[0], &ec);
3195     if (e != NULL) {
3196         assertSuccess("ucol_getKeywordValues", &ec);
3197         assertTrue("ucol_getKeywordValues!=0", e!=0);
3198         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3199         /* Don't need to check n because we check list */
3200         uenum_close(e);
3201     } else {
3202         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3203     }
3204 
3205     /* Try setting a warning before calling ucol_getKeywordValues */
3206     ec = U_USING_FALLBACK_WARNING;
3207     e = ucol_getKeywordValues(KW[0], &ec);
3208     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3209         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3210         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3211         /* Don't need to check n because we check list */
3212         uenum_close(e);
3213     }
3214 
3215     /*
3216 U_DRAFT int32_t U_EXPORT2
3217 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3218                              const char* locale, UBool* isAvailable,
3219                              UErrorCode* status);
3220 }
3221 */
3222     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3223                                      &isAvailable, &ec);
3224     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3225         assertEquals("getFunctionalEquivalent(de)", "root", loc);
3226         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3227                    isAvailable == TRUE);
3228     }
3229 
3230     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3231                                      &isAvailable, &ec);
3232     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3233         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3234         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3235                    isAvailable == FALSE);
3236     }
3237 }
3238 
3239 /* supercedes TestJ784 */
TestBeforePinyin(void)3240 static void TestBeforePinyin(void) {
3241     const static char rules[] = {
3242         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3243         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3244         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3245         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3246         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3247         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3248     };
3249 
3250     const static char *test[] = {
3251         "l\\u0101",
3252         "la",
3253         "l\\u0101n",
3254         "lan ",
3255         "l\\u0113",
3256         "le",
3257         "l\\u0113n",
3258         "len"
3259     };
3260 
3261     const static char *test2[] = {
3262         "x\\u0101",
3263         "x\\u0100",
3264         "X\\u0101",
3265         "X\\u0100",
3266         "x\\u00E1",
3267         "x\\u00C1",
3268         "X\\u00E1",
3269         "X\\u00C1",
3270         "x\\u01CE",
3271         "x\\u01CD",
3272         "X\\u01CE",
3273         "X\\u01CD",
3274         "x\\u00E0",
3275         "x\\u00C0",
3276         "X\\u00E0",
3277         "X\\u00C0",
3278         "xa",
3279         "xA",
3280         "Xa",
3281         "XA",
3282         "x\\u0101x",
3283         "x\\u0100x",
3284         "x\\u00E1x",
3285         "x\\u00C1x",
3286         "x\\u01CEx",
3287         "x\\u01CDx",
3288         "x\\u00E0x",
3289         "x\\u00C0x",
3290         "xax",
3291         "xAx"
3292     };
3293 
3294     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3295     genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3296     genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3297     genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3298 }
3299 
TestBeforeTightening(void)3300 static void TestBeforeTightening(void) {
3301     static const struct {
3302         const char *rules;
3303         UErrorCode expectedStatus;
3304     } tests[] = {
3305         { "&[before 1]a<x", U_ZERO_ERROR },
3306         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3307         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3308         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3309         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3310         { "&[before 2]a<<x",U_ZERO_ERROR },
3311         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3312         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3313         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3314         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3315         { "&[before 3]a<<<x",U_ZERO_ERROR },
3316         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3317         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3318     };
3319 
3320     int32_t i = 0;
3321 
3322     UErrorCode status = U_ZERO_ERROR;
3323     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3324     uint32_t rlen = 0;
3325 
3326     UCollator *coll = NULL;
3327 
3328 
3329     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3330         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3331         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3332         if(status != tests[i].expectedStatus) {
3333             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3334                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3335         }
3336         ucol_close(coll);
3337         status = U_ZERO_ERROR;
3338     }
3339 
3340 }
3341 
3342 /*
3343 &m < a
3344 &[before 1] a < x <<< X << q <<< Q < z
3345 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3346 
3347 &m < a
3348 &[before 2] a << x <<< X << q <<< Q < z
3349 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3350 
3351 &m < a
3352 &[before 3] a <<< x <<< X << q <<< Q < z
3353 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3354 
3355 
3356 &m << a
3357 &[before 1] a < x <<< X << q <<< Q < z
3358 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3359 
3360 &m << a
3361 &[before 2] a << x <<< X << q <<< Q < z
3362 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3363 
3364 &m << a
3365 &[before 3] a <<< x <<< X << q <<< Q < z
3366 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3367 
3368 
3369 &m <<< a
3370 &[before 1] a < x <<< X << q <<< Q < z
3371 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3372 
3373 &m <<< a
3374 &[before 2] a << x <<< X << q <<< Q < z
3375 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3376 
3377 &m <<< a
3378 &[before 3] a <<< x <<< X << q <<< Q < z
3379 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3380 
3381 
3382 &[before 1] s < x <<< X << q <<< Q < z
3383 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3384 
3385 &[before 2] s << x <<< X << q <<< Q < z
3386 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3387 
3388 &[before 3] s <<< x <<< X << q <<< Q < z
3389 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3390 
3391 
3392 &[before 1] \u24DC < x <<< X << q <<< Q < z
3393 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3394 
3395 &[before 2] \u24DC << x <<< X << q <<< Q < z
3396 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3397 
3398 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3399 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3400 */
3401 
3402 
3403 #if 0
3404 /* requires features not yet supported */
3405 static void TestMoreBefore(void) {
3406     static const struct {
3407         const char* rules;
3408         const char* order[16];
3409         int32_t size;
3410     } tests[] = {
3411         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3412         { "m","M","x","X","q","Q","z","a","n" }, 9},
3413         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3414         { "m","M","x","X","q","Q","a","z","n" }, 9},
3415         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3416         { "m","M","x","X","a","q","Q","z","n" }, 9},
3417         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3418         { "x","X","q","Q","z","m","M","a","n" }, 9},
3419         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3420         { "m","M","x","X","q","Q","a","z","n" }, 9},
3421         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3422         { "m","M","x","X","a","q","Q","z","n" }, 9},
3423         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3424         { "x","X","q","Q","z","n","m","a","M" }, 9},
3425         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3426         { "x","X","q","Q","m","a","M","z","n" }, 9},
3427         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3428         { "m","x","X","a","M","q","Q","z","n" }, 9},
3429         { "&[before 1] s < x <<< X << q <<< Q < z",
3430         { "r","R","x","X","q","Q","z","s","n" }, 9},
3431         { "&[before 2] s << x <<< X << q <<< Q < z",
3432         { "r","R","x","X","q","Q","s","z","n" }, 9},
3433         { "&[before 3] s <<< x <<< X << q <<< Q < z",
3434         { "r","R","x","X","s","q","Q","z","n" }, 9},
3435         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3436         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3437         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3438         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3439         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3440         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3441     };
3442 
3443     int32_t i = 0;
3444 
3445     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3446         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3447     }
3448 }
3449 #endif
3450 
TestTailorNULL(void)3451 static void TestTailorNULL( void ) {
3452     const static char* rule = "&a <<< '\\u0000'";
3453     UErrorCode status = U_ZERO_ERROR;
3454     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3455     uint32_t rlen = 0;
3456     UChar a = 1, null = 0;
3457     UCollationResult res = UCOL_EQUAL;
3458 
3459     UCollator *coll = NULL;
3460 
3461 
3462     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3463     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3464 
3465     if(U_FAILURE(status)) {
3466         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3467     } else {
3468         res = ucol_strcoll(coll, &a, 1, &null, 1);
3469 
3470         if(res != UCOL_LESS) {
3471             log_err("NULL was not tailored properly!\n");
3472         }
3473     }
3474 
3475     ucol_close(coll);
3476 }
3477 
3478 static void
TestUpperFirstQuaternary(void)3479 TestUpperFirstQuaternary(void)
3480 {
3481   const char* tests[] = { "B", "b", "Bb", "bB" };
3482   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3483   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3484   genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
3485 }
3486 
3487 static void
TestJ4960(void)3488 TestJ4960(void)
3489 {
3490   const char* tests[] = { "\\u00e2T", "aT" };
3491   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3492   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3493   const char* tests2[] = { "a", "A" };
3494   const char* rule = "&[first tertiary ignorable]=A=a";
3495   UColAttribute att2[] = { UCOL_CASE_LEVEL };
3496   UColAttributeValue attVals2[] = { UCOL_ON };
3497   /* Test whether we correctly ignore primary ignorables on case level when */
3498   /* we have only primary & case level */
3499   genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
3500   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3501   /* and case level */
3502   genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
3503   /* Test whether completely ignorable letters have case level info (they shouldn't) */
3504   genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
3505 }
3506 
3507 static void
TestJ5223(void)3508 TestJ5223(void)
3509 {
3510   static const char *test = "this is a test string";
3511   UChar ustr[256];
3512   int32_t ustr_length = u_unescape(test, ustr, 256);
3513   unsigned char sortkey[256];
3514   int32_t sortkey_length;
3515   UErrorCode status = U_ZERO_ERROR;
3516   static UCollator *coll = NULL;
3517   coll = ucol_open("root", &status);
3518   if(U_FAILURE(status)) {
3519     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3520     return;
3521   }
3522   ucol_setStrength(coll, UCOL_PRIMARY);
3523   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3524   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3525   if (U_FAILURE(status)) {
3526     log_err("Failed setting atributes\n");
3527     return;
3528   }
3529   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3530   if (sortkey_length > 256) return;
3531 
3532   /* we mark the position where the null byte should be written in advance */
3533   sortkey[sortkey_length-1] = 0xAA;
3534 
3535   /* we set the buffer size one byte higher than needed */
3536   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3537     sortkey_length+1);
3538 
3539   /* no error occurs (for me) */
3540   if (sortkey[sortkey_length-1] == 0xAA) {
3541     log_err("Hit bug at first try\n");
3542   }
3543 
3544   /* we mark the position where the null byte should be written again */
3545   sortkey[sortkey_length-1] = 0xAA;
3546 
3547   /* this time we set the buffer size to the exact amount needed */
3548   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3549     sortkey_length);
3550 
3551   /* now the trailing null byte is not written */
3552   if (sortkey[sortkey_length-1] == 0xAA) {
3553     log_err("Hit bug at second try\n");
3554   }
3555 
3556   ucol_close(coll);
3557 }
3558 
3559 /* Regression test for Thai partial sort key problem */
3560 static void
TestJ5232(void)3561 TestJ5232(void)
3562 {
3563     const static char *test[] = {
3564         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3565         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3566     };
3567 
3568     genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
3569 }
3570 
3571 static void
TestJ5367(void)3572 TestJ5367(void)
3573 {
3574     const static char *test[] = { "a", "y" };
3575     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3576     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3577 }
3578 
3579 static void
TestVI5913(void)3580 TestVI5913(void)
3581 {
3582     UErrorCode status = U_ZERO_ERROR;
3583     int32_t i, j;
3584     UCollator *coll =NULL;
3585     uint8_t  resColl[100], expColl[100];
3586     int32_t  rLen, tLen, ruleLen, sLen, kLen;
3587     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3588     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3589     /*
3590      * Note: Just tailoring &z<ae^ does not work as expected:
3591      * The UCA spec requires for discontiguous contractions that they
3592      * extend an *existing match* by one combining mark at a time.
3593      * Therefore, ae must be a contraction so that the builder finds
3594      * discontiguous contractions for ae^, for example with an intervening underdot.
3595      * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3596      */
3597     UChar rule3[256]={
3598         0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3599         0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3600         0};
3601     static const UChar tData[][20]={
3602         {0x1EAC, 0},
3603         {0x0041, 0x0323, 0x0302, 0},
3604         {0x1EA0, 0x0302, 0},
3605         {0x00C2, 0x0323, 0},
3606         {0x1ED8, 0},  /* O with dot and circumflex */
3607         {0x1ECC, 0x0302, 0},
3608         {0x1EB7, 0},
3609         {0x1EA1, 0x0306, 0},
3610     };
3611     static const UChar tailorData[][20]={
3612         {0x1FA2, 0},  /* Omega with 3 combining marks */
3613         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3614         {0x1FF3, 0x0313, 0x0300, 0},
3615         {0x1F60, 0x0300, 0x0345, 0},
3616         {0x1F62, 0x0345, 0},
3617         {0x1FA0, 0x0300, 0},
3618     };
3619     static const UChar tailorData2[][20]={
3620         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3621         {0x0073, 0x0323, 0x030C, 0},
3622         {0x0073, 0x030C, 0x0323, 0},
3623     };
3624     static const UChar tailorData3[][20]={
3625         {0x007a, 0},  /*  z */
3626         {0x0061, 0x0065, 0},  /*  a + e */
3627         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3628         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3629         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3630         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3631         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3632         {0x00EA, 0},  /* e with circumflex  */
3633     };
3634 
3635     /* Test Vietnamese sort. */
3636     coll = ucol_open("vi", &status);
3637     if(U_FAILURE(status)) {
3638         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3639         return;
3640     }
3641     log_verbose("\n\nVI collation:");
3642     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3643         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3644     }
3645     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3646         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3647     }
3648     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3649         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3650     }
3651     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3652         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3653     }
3654 
3655     for (j=0; j<8; j++) {
3656         tLen = u_strlen(tData[j]);
3657         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3658         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3659         for(i = 0; i<rLen; i++) {
3660             log_verbose(" %02X", resColl[i]);
3661         }
3662     }
3663 
3664     ucol_close(coll);
3665 
3666     /* Test Romanian sort. */
3667     coll = ucol_open("ro", &status);
3668     log_verbose("\n\nRO collation:");
3669     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3670         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3671     }
3672     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3673         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3674     }
3675     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3676         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3677     }
3678 
3679     for (j=4; j<8; j++) {
3680         tLen = u_strlen(tData[j]);
3681         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3682         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3683         for(i = 0; i<rLen; i++) {
3684             log_verbose(" %02X", resColl[i]);
3685         }
3686     }
3687     ucol_close(coll);
3688 
3689     /* Test the precomposed Greek character with 3 combining marks. */
3690     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3691     ruleLen = u_strlen(rule);
3692     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3693     if (U_FAILURE(status)) {
3694         log_err("ucol_openRules failed with %s\n", u_errorName(status));
3695         return;
3696     }
3697     sLen = u_strlen(tailorData[0]);
3698     for (j=1; j<6; j++) {
3699         tLen = u_strlen(tailorData[j]);
3700         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3701             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3702         }
3703     }
3704     /* Test getSortKey. */
3705     tLen = u_strlen(tailorData[0]);
3706     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3707     for (j=0; j<6; j++) {
3708         tLen = u_strlen(tailorData[j]);
3709         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3710         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3711             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3712             for(i = 0; i<rLen; i++) {
3713                 log_err(" %02X", resColl[i]);
3714             }
3715         }
3716     }
3717     ucol_close(coll);
3718 
3719     log_verbose("\n\nTailoring test for s with caron:");
3720     ruleLen = u_strlen(rule2);
3721     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3722     tLen = u_strlen(tailorData2[0]);
3723     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3724     for (j=1; j<3; j++) {
3725         tLen = u_strlen(tailorData2[j]);
3726         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3727         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3728             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3729             for(i = 0; i<rLen; i++) {
3730                 log_err(" %02X", resColl[i]);
3731             }
3732         }
3733     }
3734     ucol_close(coll);
3735 
3736     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3737     ruleLen = u_strlen(rule3);
3738     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3739     tLen = u_strlen(tailorData3[3]);
3740     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3741     log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3742     for(i = 0; i<kLen; i++) {
3743         log_verbose(" %02X", expColl[i]);
3744     }
3745     for (j=4; j<6; j++) {
3746         tLen = u_strlen(tailorData3[j]);
3747         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3748 
3749         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3750             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3751             for(i = 0; i<rLen; i++) {
3752                 log_err(" %02X", resColl[i]);
3753             }
3754         }
3755 
3756         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3757          for(i = 0; i<rLen; i++) {
3758              log_verbose(" %02X", resColl[i]);
3759          }
3760     }
3761     ucol_close(coll);
3762 }
3763 
3764 static void
TestTailor6179(void)3765 TestTailor6179(void)
3766 {
3767     UErrorCode status = U_ZERO_ERROR;
3768     int32_t i;
3769     UCollator *coll =NULL;
3770     uint8_t  resColl[100];
3771     int32_t  rLen, tLen, ruleLen;
3772     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3773     static const UChar rule1[]={
3774             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3775             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3776             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3777             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3778     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3779     static const UChar rule2[]={
3780             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3781             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3782             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3783             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3784             0x3C,0x3C,0x20,0x62,0};
3785 
3786     static const UChar tData1[][4]={
3787         {0x61, 0},
3788         {0x62, 0},
3789         { 0xFDD0,0x009E, 0}
3790     };
3791     static const UChar tData2[][4]={
3792         {0x61, 0},
3793         {0x62, 0},
3794         { 0xFDD0,0x009E, 0}
3795      };
3796 
3797     /*
3798      * These values from FractionalUCA.txt will change,
3799      * and need to be updated here.
3800      * TODO: Make this not check for particular sort keys.
3801      * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3802      */
3803     static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3804     static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3805     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3806     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3807 
3808     UParseError parseError;
3809 
3810     /* Test [Last Primary ignorable] */
3811 
3812     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3813     ruleLen = u_strlen(rule1);
3814     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3815     if (U_FAILURE(status)) {
3816         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3817         return;
3818     }
3819     tLen = u_strlen(tData1[0]);
3820     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3821     if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3822         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3823         for(i = 0; i<rLen; i++) {
3824             log_err(" %02X", resColl[i]);
3825         }
3826         log_err("\n");
3827     }
3828     tLen = u_strlen(tData1[1]);
3829     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3830     if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3831         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3832         for(i = 0; i<rLen; i++) {
3833             log_err(" %02X", resColl[i]);
3834         }
3835         log_err("\n");
3836     }
3837     ucol_close(coll);
3838 
3839 
3840     /* Test [Last Secondary ignorable] */
3841     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3842     ruleLen = u_strlen(rule2);
3843     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3844     if (U_FAILURE(status)) {
3845         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3846         log_info("  offset=%d  \"%s\" | \"%s\"\n",
3847                  parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3848         return;
3849     }
3850     tLen = u_strlen(tData2[0]);
3851     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3852     if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3853         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3854         for(i = 0; i<rLen; i++) {
3855             log_err(" %02X", resColl[i]);
3856         }
3857         log_err("\n");
3858     }
3859     tLen = u_strlen(tData2[1]);
3860     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3861     if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3862       log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3863       for(i = 0; i<rLen; i++) {
3864         log_err(" %02X", resColl[i]);
3865       }
3866       log_err("\n");
3867     }
3868     ucol_close(coll);
3869 }
3870 
3871 static void
TestUCAPrecontext(void)3872 TestUCAPrecontext(void)
3873 {
3874     UErrorCode status = U_ZERO_ERROR;
3875     int32_t i, j;
3876     UCollator *coll =NULL;
3877     uint8_t  resColl[100], prevColl[100];
3878     int32_t  rLen, tLen, ruleLen;
3879     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3880     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3881     /* & l middle-dot << a  a is an expansion. */
3882 
3883     UChar tData1[][20]={
3884             { 0xb7, 0},  /* standalone middle dot(0xb7) */
3885             { 0x387, 0}, /* standalone middle dot(0x387) */
3886             { 0x61, 0},  /* a */
3887             { 0x6C, 0},  /* l */
3888             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3889             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3890             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3891             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3892             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3893             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3894             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3895      };
3896 
3897     log_verbose("\n\nEN collation:");
3898     coll = ucol_open("en", &status);
3899     if (U_FAILURE(status)) {
3900         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3901         return;
3902     }
3903     for (j=0; j<11; j++) {
3904         tLen = u_strlen(tData1[j]);
3905         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3906         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3907             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3908                     j, tData1[j]);
3909         }
3910         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3911         for(i = 0; i<rLen; i++) {
3912             log_verbose(" %02X", resColl[i]);
3913         }
3914         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3915      }
3916      ucol_close(coll);
3917 
3918 
3919      log_verbose("\n\nJA collation:");
3920      coll = ucol_open("ja", &status);
3921      if (U_FAILURE(status)) {
3922          log_err("Tailoring test: &z <<a|- failed!");
3923          return;
3924      }
3925      for (j=0; j<11; j++) {
3926          tLen = u_strlen(tData1[j]);
3927          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3928          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3929              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3930                      j, tData1[j]);
3931          }
3932          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3933          for(i = 0; i<rLen; i++) {
3934              log_verbose(" %02X", resColl[i]);
3935          }
3936          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3937       }
3938       ucol_close(coll);
3939 
3940 
3941       log_verbose("\n\nTailoring test: & middle dot < a ");
3942       ruleLen = u_strlen(rule1);
3943       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3944       if (U_FAILURE(status)) {
3945           log_err("Tailoring test: & middle dot < a failed!");
3946           return;
3947       }
3948       for (j=0; j<11; j++) {
3949           tLen = u_strlen(tData1[j]);
3950           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3951           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3952               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3953                       j, tData1[j]);
3954           }
3955           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3956           for(i = 0; i<rLen; i++) {
3957               log_verbose(" %02X", resColl[i]);
3958           }
3959           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3960        }
3961        ucol_close(coll);
3962 
3963 
3964        log_verbose("\n\nTailoring test: & l middle-dot << a ");
3965        ruleLen = u_strlen(rule2);
3966        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3967        if (U_FAILURE(status)) {
3968            log_err("Tailoring test: & l middle-dot << a failed!");
3969            return;
3970        }
3971        for (j=0; j<11; j++) {
3972            tLen = u_strlen(tData1[j]);
3973            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3974            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3975                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3976                        j, tData1[j]);
3977            }
3978            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3979                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3980                        j, tData1[j]);
3981            }
3982            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3983            for(i = 0; i<rLen; i++) {
3984                log_verbose(" %02X", resColl[i]);
3985            }
3986            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3987         }
3988         ucol_close(coll);
3989 }
3990 
3991 static void
TestOutOfBuffer5468(void)3992 TestOutOfBuffer5468(void)
3993 {
3994     static const char *test = "\\u4e00";
3995     UChar ustr[256];
3996     int32_t ustr_length = u_unescape(test, ustr, 256);
3997     unsigned char shortKeyBuf[1];
3998     int32_t sortkey_length;
3999     UErrorCode status = U_ZERO_ERROR;
4000     static UCollator *coll = NULL;
4001 
4002     coll = ucol_open("root", &status);
4003     if(U_FAILURE(status)) {
4004       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4005       return;
4006     }
4007     ucol_setStrength(coll, UCOL_PRIMARY);
4008     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4009     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4010     if (U_FAILURE(status)) {
4011       log_err("Failed setting atributes\n");
4012       return;
4013     }
4014 
4015     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4016     if (sortkey_length != 4) {
4017         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4018     }
4019     log_verbose("length of sortKey is %d", sortkey_length);
4020     ucol_close(coll);
4021 }
4022 
4023 #define TSKC_DATA_SIZE 5
4024 #define TSKC_BUF_SIZE  50
4025 static void
TestSortKeyConsistency(void)4026 TestSortKeyConsistency(void)
4027 {
4028     UErrorCode icuRC = U_ZERO_ERROR;
4029     UCollator* ucol;
4030     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4031 
4032     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4033     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4034     int32_t i, j, i2;
4035 
4036     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4037     if (U_FAILURE(icuRC))
4038     {
4039         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4040         return;
4041     }
4042 
4043     for (i = 0; i < TSKC_DATA_SIZE; i++)
4044     {
4045         UCharIterator uiter;
4046         uint32_t state[2] = { 0, 0 };
4047         int32_t dataLen = i+1;
4048         for (j=0; j<TSKC_BUF_SIZE; j++)
4049             bufFull[i][j] = bufPart[i][j] = 0;
4050 
4051         /* Full sort key */
4052         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4053 
4054         /* Partial sort key */
4055         uiter_setString(&uiter, data, dataLen);
4056         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4057         if (U_FAILURE(icuRC))
4058         {
4059             log_err("ucol_nextSortKeyPart failed\n");
4060             ucol_close(ucol);
4061             return;
4062         }
4063 
4064         for (i2=0; i2<i; i2++)
4065         {
4066             UBool fullMatch = TRUE;
4067             UBool partMatch = TRUE;
4068             for (j=0; j<TSKC_BUF_SIZE; j++)
4069             {
4070                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4071                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4072             }
4073             if (fullMatch != partMatch) {
4074                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4075                                   : "partial key was consistent, but full key changed\n");
4076                 ucol_close(ucol);
4077                 return;
4078             }
4079         }
4080     }
4081 
4082     /*=============================================*/
4083    ucol_close(ucol);
4084 }
4085 
4086 /* ticket: 6101 */
TestCroatianSortKey(void)4087 static void TestCroatianSortKey(void) {
4088     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4089     UErrorCode status = U_ZERO_ERROR;
4090     UCollator *ucol;
4091     UCharIterator iter;
4092 
4093     static const UChar text[] = { 0x0044, 0xD81A };
4094 
4095     size_t length = UPRV_LENGTHOF(text);
4096 
4097     uint8_t textSortKey[32];
4098     size_t lenSortKey = 32;
4099     size_t actualSortKeyLen;
4100     uint32_t uStateInfo[2] = { 0, 0 };
4101 
4102     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4103     if (U_FAILURE(status)) {
4104         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4105         return;
4106     }
4107 
4108     uiter_setString(&iter, text, length);
4109 
4110     actualSortKeyLen = ucol_nextSortKeyPart(
4111         ucol, &iter, (uint32_t*)uStateInfo,
4112         textSortKey, lenSortKey, &status
4113         );
4114 
4115     if (actualSortKeyLen == lenSortKey) {
4116         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4117     }
4118 
4119     ucol_close(ucol);
4120 }
4121 
4122 /* ticket: 6140 */
4123 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4124  * they are both Hiragana and Katakana
4125  */
4126 #define SORTKEYLEN 50
TestHiragana(void)4127 static void TestHiragana(void) {
4128     UErrorCode status = U_ZERO_ERROR;
4129     UCollator* ucol;
4130     UCollationResult strcollresult;
4131     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4132     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4133     int32_t data1Len = UPRV_LENGTHOF(data1);
4134     int32_t data2Len = UPRV_LENGTHOF(data2);
4135     int32_t i, j;
4136     uint8_t sortKey1[SORTKEYLEN];
4137     uint8_t sortKey2[SORTKEYLEN];
4138 
4139     UCharIterator uiter1;
4140     UCharIterator uiter2;
4141     uint32_t state1[2] = { 0, 0 };
4142     uint32_t state2[2] = { 0, 0 };
4143     int32_t keySize1;
4144     int32_t keySize2;
4145 
4146     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4147             &status);
4148     if (U_FAILURE(status)) {
4149         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4150         return;
4151     }
4152 
4153     /* Start of full sort keys */
4154     /* Full sort key1 */
4155     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4156     /* Full sort key2 */
4157     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4158     if (keySize1 == keySize2) {
4159         for (i = 0; i < keySize1; i++) {
4160             if (sortKey1[i] != sortKey2[i]) {
4161                 log_err("Full sort keys are different. Should be equal.");
4162             }
4163         }
4164     } else {
4165         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4166     }
4167     /* End of full sort keys */
4168 
4169     /* Start of partial sort keys */
4170     /* Partial sort key1 */
4171     uiter_setString(&uiter1, data1, data1Len);
4172     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4173     /* Partial sort key2 */
4174     uiter_setString(&uiter2, data2, data2Len);
4175     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4176     if (U_SUCCESS(status) && keySize1 == keySize2) {
4177         for (j = 0; j < keySize1; j++) {
4178             if (sortKey1[j] != sortKey2[j]) {
4179                 log_err("Partial sort keys are different. Should be equal");
4180             }
4181         }
4182     } else {
4183         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4184     }
4185     /* End of partial sort keys */
4186 
4187     /* Start of strcoll */
4188     /* Use ucol_strcoll() to determine ordering */
4189     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4190     if (strcollresult != UCOL_EQUAL) {
4191         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4192     }
4193 
4194     ucol_close(ucol);
4195 }
4196 
4197 /* Convenient struct for running collation tests */
4198 typedef struct {
4199   const UChar source[MAX_TOKEN_LEN];  /* String on left */
4200   const UChar target[MAX_TOKEN_LEN];  /* String on right */
4201   UCollationResult result;            /* -1, 0 or +1, depending on collation */
4202 } OneTestCase;
4203 
4204 /*
4205  * Utility function to test one collation test case.
4206  * @param testcases Array of test cases.
4207  * @param n_testcases Size of the array testcases.
4208  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4209  * @param n_rules Size of the array str_rules.
4210  */
doTestOneTestCase(const OneTestCase testcases[],int n_testcases,const char * str_rules[],int n_rules)4211 static void doTestOneTestCase(const OneTestCase testcases[],
4212                               int n_testcases,
4213                               const char* str_rules[],
4214                               int n_rules)
4215 {
4216   int rule_no, testcase_no;
4217   UChar rule[500];
4218   int32_t length = 0;
4219   UErrorCode status = U_ZERO_ERROR;
4220   UParseError parse_error;
4221   UCollator  *myCollation;
4222 
4223   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4224 
4225     length = u_unescape(str_rules[rule_no], rule, 500);
4226     if (length == 0) {
4227         log_err("ERROR: The rule cannot be unescaped: %s\n");
4228         return;
4229     }
4230     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4231     if(U_FAILURE(status)){
4232         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4233         log_info("  offset=%d  \"%s\" | \"%s\"\n",
4234                  parse_error.offset,
4235                  aescstrdup(parse_error.preContext, -1),
4236                  aescstrdup(parse_error.postContext, -1));
4237         return;
4238     }
4239     log_verbose("Testing the <<* syntax\n");
4240     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4241     ucol_setStrength(myCollation, UCOL_TERTIARY);
4242     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4243       doTest(myCollation,
4244              testcases[testcase_no].source,
4245              testcases[testcase_no].target,
4246              testcases[testcase_no].result
4247              );
4248     }
4249     ucol_close(myCollation);
4250   }
4251 }
4252 
4253 const static OneTestCase rangeTestcases[] = {
4254   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4255   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4256   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4257 
4258   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4259   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4260   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4261   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4262   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4263 
4264   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4265   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4266   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4267   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4268 
4269   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4270   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4271   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4272   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4273   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4274   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4275   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4276   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4277 };
4278 
4279 static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
4280 
4281 const static OneTestCase rangeTestcasesSupplemental[] = {
4282   { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4283   { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4284   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4285   { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4286   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4287   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4288   { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4289 };
4290 
4291 static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
4292 
4293 const static OneTestCase rangeTestcasesQwerty[] = {
4294   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4295   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4296 
4297   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4298   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4299 
4300   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4301   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4302 
4303   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4304   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4305 
4306   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4307     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4308   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4309     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4310 };
4311 
4312 static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
4313 
TestSameStrengthList(void)4314 static void TestSameStrengthList(void)
4315 {
4316   const char* strRules[] = {
4317     /* Normal */
4318     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4319 
4320     /* Lists */
4321     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4322   };
4323   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4324 }
4325 
TestSameStrengthListQuoted(void)4326 static void TestSameStrengthListQuoted(void)
4327 {
4328   const char* strRules[] = {
4329     /* Lists with quoted characters */
4330     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4331     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4332 
4333     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4334     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4335 
4336     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4337     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4338   };
4339   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4340 }
4341 
TestSameStrengthListSupplemental(void)4342 static void TestSameStrengthListSupplemental(void)
4343 {
4344   const char* strRules[] = {
4345     "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4346     "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4347     "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4348     "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4349   };
4350   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4351 }
4352 
TestSameStrengthListQwerty(void)4353 static void TestSameStrengthListQwerty(void)
4354 {
4355   const char* strRules[] = {
4356     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4357     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4358     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4359     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4360     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4361 
4362     /* Quoted characters also will work if two quoted characters are not consecutive.  */
4363     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4364 
4365     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4366     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4367 
4368  };
4369   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4370 }
4371 
TestSameStrengthListQuotedQwerty(void)4372 static void TestSameStrengthListQuotedQwerty(void)
4373 {
4374   const char* strRules[] = {
4375     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4376     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4377     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4378 
4379     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4380     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4381    };
4382   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4383 }
4384 
TestSameStrengthListRanges(void)4385 static void TestSameStrengthListRanges(void)
4386 {
4387   const char* strRules[] = {
4388     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4389   };
4390   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4391 }
4392 
TestSameStrengthListSupplementalRanges(void)4393 static void TestSameStrengthListSupplementalRanges(void)
4394 {
4395   const char* strRules[] = {
4396     /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4397     "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4398   };
4399   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4400 }
4401 
TestSpecialCharacters(void)4402 static void TestSpecialCharacters(void)
4403 {
4404   const char* strRules[] = {
4405     /* Normal */
4406     "&';'<'+'<','<'-'<'&'<'*'",
4407 
4408     /* List */
4409     "&';'<*'+,-&*'",
4410 
4411     /* Range */
4412     "&';'<*'+'-'-&*'",
4413   };
4414 
4415   const static OneTestCase specialCharacterStrings[] = {
4416     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4417     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4418     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4419     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4420   };
4421   doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4422 }
4423 
TestPrivateUseCharacters(void)4424 static void TestPrivateUseCharacters(void)
4425 {
4426   const char* strRules[] = {
4427     /* Normal */
4428     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4429     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4430   };
4431 
4432   const static OneTestCase privateUseCharacterStrings[] = {
4433     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4434     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4435     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4436     { {0xe2da}, {0xe2db}, UCOL_LESS },
4437     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4438     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4439   };
4440   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4441 }
4442 
TestPrivateUseCharactersInList(void)4443 static void TestPrivateUseCharactersInList(void)
4444 {
4445   const char* strRules[] = {
4446     /* List */
4447     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4448     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4449     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4450   };
4451 
4452   const static OneTestCase privateUseCharacterStrings[] = {
4453     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4454     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4455     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4456     { {0xe2da}, {0xe2db}, UCOL_LESS },
4457     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4458     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4459   };
4460   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4461 }
4462 
TestPrivateUseCharactersInRange(void)4463 static void TestPrivateUseCharactersInRange(void)
4464 {
4465   const char* strRules[] = {
4466     /* Range */
4467     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4468     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4469     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4470   };
4471 
4472   const static OneTestCase privateUseCharacterStrings[] = {
4473     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4474     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4475     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4476     { {0xe2da}, {0xe2db}, UCOL_LESS },
4477     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4478     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4479   };
4480   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4481 }
4482 
TestInvalidListsAndRanges(void)4483 static void TestInvalidListsAndRanges(void)
4484 {
4485   const char* invalidRules[] = {
4486     /* Range not in starred expression */
4487     "&\\ufffe<\\uffff-\\U00010002",
4488 
4489     /* Range without start */
4490     "&a<*-c",
4491 
4492     /* Range without end */
4493     "&a<*b-",
4494 
4495     /* More than one hyphen */
4496     "&a<*b-g-l",
4497 
4498     /* Range in the wrong order */
4499     "&a<*k-b",
4500 
4501   };
4502 
4503   UChar rule[500];
4504   UErrorCode status = U_ZERO_ERROR;
4505   UParseError parse_error;
4506   int n_rules = UPRV_LENGTHOF(invalidRules);
4507   int rule_no;
4508   int length;
4509   UCollator  *myCollation;
4510 
4511   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4512 
4513     length = u_unescape(invalidRules[rule_no], rule, 500);
4514     if (length == 0) {
4515         log_err("ERROR: The rule cannot be unescaped: %s\n");
4516         return;
4517     }
4518     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4519     (void)myCollation;      /* Suppress set but not used warning. */
4520     if(!U_FAILURE(status)){
4521       log_err("ERROR: Could not cause a failure as expected: \n");
4522     }
4523     status = U_ZERO_ERROR;
4524   }
4525 }
4526 
4527 /*
4528  * This test ensures that characters placed before a character in a different script have the same lead byte
4529  * in their collation key before and after script reordering.
4530  */
TestBeforeRuleWithScriptReordering(void)4531 static void TestBeforeRuleWithScriptReordering(void)
4532 {
4533     UParseError error;
4534     UErrorCode status = U_ZERO_ERROR;
4535     UCollator  *myCollation;
4536     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4537     UChar rules[500];
4538     uint32_t rulesLength = 0;
4539     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4540     UCollationResult collResult;
4541 
4542     uint8_t baseKey[256];
4543     uint32_t baseKeyLength;
4544     uint8_t beforeKey[256];
4545     uint32_t beforeKeyLength;
4546 
4547     UChar base[] = { 0x03b1 }; /* base */
4548     int32_t baseLen = UPRV_LENGTHOF(base);
4549 
4550     UChar before[] = { 0x0e01 }; /* ko kai */
4551     int32_t beforeLen = UPRV_LENGTHOF(before);
4552 
4553     /*UChar *data[] = { before, base };
4554     genericRulesStarter(srules, data, 2);*/
4555 
4556     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4557 
4558     (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4559     (void)baseKeyLength;
4560 
4561     /* build collator */
4562     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4563 
4564     rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
4565     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4566     if(U_FAILURE(status)) {
4567         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4568         return;
4569     }
4570 
4571     /* check collation results - before rule applied but not script reordering */
4572     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4573     if (collResult != UCOL_GREATER) {
4574         log_err("Collation result not correct before script reordering = %d\n", collResult);
4575     }
4576 
4577     /* check the lead byte of the collation keys before script reordering */
4578     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4579     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4580     if (baseKey[0] != beforeKey[0]) {
4581       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4582    }
4583 
4584     /* reorder the scripts */
4585     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4586     if(U_FAILURE(status)) {
4587         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4588         return;
4589     }
4590 
4591     /* check collation results - before rule applied and after script reordering */
4592     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4593     if (collResult != UCOL_GREATER) {
4594         log_err("Collation result not correct after script reordering = %d\n", collResult);
4595     }
4596 
4597     /* check the lead byte of the collation keys after script reordering */
4598     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4599     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4600     if (baseKey[0] != beforeKey[0]) {
4601         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4602     }
4603 
4604     ucol_close(myCollation);
4605 }
4606 
4607 /*
4608  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4609  */
TestNonLeadBytesDuringCollationReordering(void)4610 static void TestNonLeadBytesDuringCollationReordering(void)
4611 {
4612     UErrorCode status = U_ZERO_ERROR;
4613     UCollator  *myCollation;
4614     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4615 
4616     uint8_t baseKey[256];
4617     uint32_t baseKeyLength;
4618     uint8_t reorderKey[256];
4619     uint32_t reorderKeyLength;
4620 
4621     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4622 
4623     uint32_t i;
4624 
4625 
4626     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4627 
4628     /* build collator tertiary */
4629     myCollation = ucol_open("", &status);
4630     ucol_setStrength(myCollation, UCOL_TERTIARY);
4631     if(U_FAILURE(status)) {
4632         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4633         return;
4634     }
4635     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4636 
4637     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4638     if(U_FAILURE(status)) {
4639         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4640         return;
4641     }
4642     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4643 
4644     if (baseKeyLength != reorderKeyLength) {
4645         log_err("Key lengths not the same during reordering.\n");
4646         return;
4647     }
4648 
4649     for (i = 1; i < baseKeyLength; i++) {
4650         if (baseKey[i] != reorderKey[i]) {
4651             log_err("Collation key bytes not the same at position %d.\n", i);
4652             return;
4653         }
4654     }
4655     ucol_close(myCollation);
4656 
4657     /* build collator quaternary */
4658     myCollation = ucol_open("", &status);
4659     ucol_setStrength(myCollation, UCOL_QUATERNARY);
4660     if(U_FAILURE(status)) {
4661         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4662         return;
4663     }
4664     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4665 
4666     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4667     if(U_FAILURE(status)) {
4668         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4669         return;
4670     }
4671     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4672 
4673     if (baseKeyLength != reorderKeyLength) {
4674         log_err("Key lengths not the same during reordering.\n");
4675         return;
4676     }
4677 
4678     for (i = 1; i < baseKeyLength; i++) {
4679         if (baseKey[i] != reorderKey[i]) {
4680             log_err("Collation key bytes not the same at position %d.\n", i);
4681             return;
4682         }
4683     }
4684     ucol_close(myCollation);
4685 }
4686 
4687 /*
4688  * Test reordering API.
4689  */
TestReorderingAPI(void)4690 static void TestReorderingAPI(void)
4691 {
4692     UErrorCode status = U_ZERO_ERROR;
4693     UCollator  *myCollation;
4694     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4695     int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4696     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4697     int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4698     UCollationResult collResult;
4699     int32_t retrievedReorderCodesLength;
4700     int32_t retrievedReorderCodes[10];
4701     UChar greekString[] = { 0x03b1 };
4702     UChar punctuationString[] = { 0x203e };
4703     int loopIndex;
4704 
4705     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4706 
4707     /* build collator tertiary */
4708     myCollation = ucol_open("", &status);
4709     ucol_setStrength(myCollation, UCOL_TERTIARY);
4710     if(U_FAILURE(status)) {
4711         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4712         return;
4713     }
4714 
4715     /* set the reorderding */
4716     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4717     if (U_FAILURE(status)) {
4718         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4719         return;
4720     }
4721 
4722     /* get the reordering */
4723     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4724     if (status != U_BUFFER_OVERFLOW_ERROR) {
4725         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4726         return;
4727     }
4728     status = U_ZERO_ERROR;
4729     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4730         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4731         return;
4732     }
4733     /* now let's really get it */
4734     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4735     if (U_FAILURE(status)) {
4736         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4737         return;
4738     }
4739     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4740         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4741         return;
4742     }
4743     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4744         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4745             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4746             return;
4747         }
4748     }
4749     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4750     if (collResult != UCOL_LESS) {
4751         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4752         return;
4753     }
4754 
4755     /* clear the reordering */
4756     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4757     if (U_FAILURE(status)) {
4758         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4759         return;
4760     }
4761 
4762     /* get the reordering again */
4763     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4764     if (retrievedReorderCodesLength != 0) {
4765         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4766         return;
4767     }
4768 
4769     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4770     if (collResult != UCOL_GREATER) {
4771         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4772         return;
4773     }
4774 
4775     /* clear the reordering using [NONE] */
4776     ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4777     if (U_FAILURE(status)) {
4778         log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4779         return;
4780     }
4781 
4782     /* get the reordering again */
4783     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4784     if (retrievedReorderCodesLength != 0) {
4785         log_err_status(status,
4786                        "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4787                        retrievedReorderCodesLength);
4788         return;
4789     }
4790 
4791     /* test for error condition on duplicate reorder codes */
4792     ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4793     if (!U_FAILURE(status)) {
4794         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4795         return;
4796     }
4797 
4798     status = U_ZERO_ERROR;
4799     /* test for reorder codes after a reset code */
4800     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4801     if (!U_FAILURE(status)) {
4802         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4803         return;
4804     }
4805 
4806     ucol_close(myCollation);
4807 }
4808 
4809 /*
4810  * Test reordering API.
4811  */
TestReorderingAPIWithRuleCreatedCollator(void)4812 static void TestReorderingAPIWithRuleCreatedCollator(void)
4813 {
4814     UErrorCode status = U_ZERO_ERROR;
4815     UCollator  *myCollation;
4816     UChar rules[90];
4817     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4818     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4819     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4820     UCollationResult collResult;
4821     int32_t retrievedReorderCodesLength;
4822     int32_t retrievedReorderCodes[10];
4823     static const UChar greekString[] = { 0x03b1 };
4824     static const UChar punctuationString[] = { 0x203e };
4825     static const UChar hanString[] = { 0x65E5, 0x672C };
4826     int loopIndex;
4827 
4828     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4829 
4830     /* build collator from rules */
4831     u_uastrcpy(rules, "[reorder Hani Grek]");
4832     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4833     if(U_FAILURE(status)) {
4834         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4835         return;
4836     }
4837 
4838     /* get the reordering */
4839     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4840     if (U_FAILURE(status)) {
4841         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4842         return;
4843     }
4844     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4845         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4846         return;
4847     }
4848     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4849         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4850             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4851             return;
4852         }
4853     }
4854     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4855     if (collResult != UCOL_GREATER) {
4856         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4857         return;
4858     }
4859 
4860     /* set the reordering */
4861     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4862     if (U_FAILURE(status)) {
4863         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4864         return;
4865     }
4866 
4867     /* get the reordering */
4868     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4869     if (status != U_BUFFER_OVERFLOW_ERROR) {
4870         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4871         return;
4872     }
4873     status = U_ZERO_ERROR;
4874     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4875         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4876         return;
4877     }
4878     /* now let's really get it */
4879     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4880     if (U_FAILURE(status)) {
4881         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4882         return;
4883     }
4884     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4885         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4886         return;
4887     }
4888     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4889         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4890             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4891             return;
4892         }
4893     }
4894     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4895     if (collResult != UCOL_LESS) {
4896         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4897         return;
4898     }
4899 
4900     /* clear the reordering */
4901     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4902     if (U_FAILURE(status)) {
4903         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4904         return;
4905     }
4906 
4907     /* get the reordering again */
4908     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4909     if (retrievedReorderCodesLength != 0) {
4910         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4911         return;
4912     }
4913 
4914     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4915     if (collResult != UCOL_GREATER) {
4916         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4917         return;
4918     }
4919 
4920     /* reset the reordering */
4921     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4922     if (U_FAILURE(status)) {
4923         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4924         return;
4925     }
4926     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4927     if (U_FAILURE(status)) {
4928         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4929         return;
4930     }
4931     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4932         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4933         return;
4934     }
4935     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4936         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4937             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4938             return;
4939         }
4940     }
4941 
4942     ucol_close(myCollation);
4943 }
4944 
containsExpectedScript(const int32_t scripts[],int32_t length,int32_t expectedScript)4945 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4946     int32_t i;
4947     for (i = 0; i < length; ++i) {
4948         if (expectedScript == scripts[i]) { return TRUE; }
4949     }
4950     return FALSE;
4951 }
4952 
TestEquivalentReorderingScripts(void)4953 static void TestEquivalentReorderingScripts(void) {
4954     // Beginning with ICU 55, collation reordering moves single scripts
4955     // rather than groups of scripts,
4956     // except where scripts share a range and sort primary-equal.
4957     UErrorCode status = U_ZERO_ERROR;
4958     int32_t equivalentScripts[100];
4959     int32_t length;
4960     int i;
4961     int32_t prevScript;
4962     /* These scripts are expected to be equivalent. */
4963     static const int32_t expectedScripts[] = {
4964         USCRIPT_HIRAGANA,
4965         USCRIPT_KATAKANA,
4966         USCRIPT_KATAKANA_OR_HIRAGANA
4967     };
4968 
4969     equivalentScripts[0] = 0;
4970     length = ucol_getEquivalentReorderCodes(
4971             USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4972     if (U_FAILURE(status)) {
4973         log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4974         return;
4975     }
4976     if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4977         log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4978                 "length expected 1, was = %d; expected [%d] was [%d]\n",
4979                 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4980     }
4981 
4982     length = ucol_getEquivalentReorderCodes(
4983             USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4984     if (U_FAILURE(status)) {
4985         log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4986         return;
4987     }
4988     if (length != UPRV_LENGTHOF(expectedScripts)) {
4989         log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4990                 "expected %d, was = %d\n",
4991                 UPRV_LENGTHOF(expectedScripts), length);
4992     }
4993     prevScript = -1;
4994     for (i = 0; i < length; ++i) {
4995         int32_t script = equivalentScripts[i];
4996         if (script <= prevScript) {
4997             log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4998         }
4999         prevScript = script;
5000     }
5001     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5002         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5003             log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5004                     expectedScripts[i]);
5005         }
5006     }
5007 
5008     length = ucol_getEquivalentReorderCodes(
5009             USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5010     if (U_FAILURE(status)) {
5011         log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5012         return;
5013     }
5014     if (length != UPRV_LENGTHOF(expectedScripts)) {
5015         log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5016                 "expected %d, was = %d\n",
5017                 UPRV_LENGTHOF(expectedScripts), length);
5018     }
5019     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5020         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5021             log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5022                     expectedScripts[i]);
5023         }
5024     }
5025 
5026     length = ucol_getEquivalentReorderCodes(
5027             USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5028     if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
5029         log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5030                 "expected %d, was = %d\n",
5031                 UPRV_LENGTHOF(expectedScripts), length);
5032     }
5033 
5034     length = ucol_getEquivalentReorderCodes(
5035             USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5036     if (U_FAILURE(status) || length != 3) {
5037         log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5038                 "expected 3, was = %d\n", length);
5039     }
5040     length = ucol_getEquivalentReorderCodes(
5041             USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5042     if (U_FAILURE(status) || length != 3) {
5043         log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5044                 "expected 3, was = %d\n", length);
5045     }
5046     length = ucol_getEquivalentReorderCodes(
5047             USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5048     if (U_FAILURE(status) || length != 3) {
5049         log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5050                 "expected 3, was = %d\n", length);
5051     }
5052 
5053     length = ucol_getEquivalentReorderCodes(
5054             USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5055     if (U_FAILURE(status) || length != 2) {
5056         log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5057                 "expected 2, was = %d\n", length);
5058     }
5059     length = ucol_getEquivalentReorderCodes(
5060             USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5061     if (U_FAILURE(status) || length != 2) {
5062         log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5063                 "expected 2, was = %d\n", length);
5064     }
5065 }
5066 
TestReorderingAcrossCloning(void)5067 static void TestReorderingAcrossCloning(void)
5068 {
5069     UErrorCode status = U_ZERO_ERROR;
5070     UCollator  *myCollation;
5071     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5072     UCollator *clonedCollation;
5073     int32_t retrievedReorderCodesLength;
5074     int32_t retrievedReorderCodes[10];
5075     int loopIndex;
5076 
5077     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5078 
5079     /* build collator tertiary */
5080     myCollation = ucol_open("", &status);
5081     ucol_setStrength(myCollation, UCOL_TERTIARY);
5082     if(U_FAILURE(status)) {
5083         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5084         return;
5085     }
5086 
5087     /* set the reorderding */
5088     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5089     if (U_FAILURE(status)) {
5090         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5091         return;
5092     }
5093 
5094     /* clone the collator */
5095     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5096     if (U_FAILURE(status)) {
5097         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5098         return;
5099     }
5100 
5101     /* get the reordering */
5102     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
5103     if (U_FAILURE(status)) {
5104         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5105         return;
5106     }
5107     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5108         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
5109         return;
5110     }
5111     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5112         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5113             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5114             return;
5115         }
5116     }
5117 
5118     /*uprv_free(buffer);*/
5119     ucol_close(myCollation);
5120     ucol_close(clonedCollation);
5121 }
5122 
5123 /*
5124  * Utility function to test one collation reordering test case set.
5125  * @param testcases Array of test cases.
5126  * @param n_testcases Size of the array testcases.
5127  * @param reorderTokens Array of reordering codes.
5128  * @param reorderTokensLen Size of the array reorderTokens.
5129  */
doTestOneReorderingAPITestCase(const OneTestCase testCases[],uint32_t testCasesLen,const int32_t reorderTokens[],int32_t reorderTokensLen)5130 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5131 {
5132     uint32_t testCaseNum;
5133     UErrorCode status = U_ZERO_ERROR;
5134     UCollator  *myCollation;
5135 
5136     myCollation = ucol_open("", &status);
5137     if (U_FAILURE(status)) {
5138         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5139         return;
5140     }
5141     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5142     if(U_FAILURE(status)) {
5143         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5144         return;
5145     }
5146 
5147     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5148         doTest(myCollation,
5149             testCases[testCaseNum].source,
5150             testCases[testCaseNum].target,
5151             testCases[testCaseNum].result
5152         );
5153     }
5154     ucol_close(myCollation);
5155 }
5156 
TestGreekFirstReorder(void)5157 static void TestGreekFirstReorder(void)
5158 {
5159     const char* strRules[] = {
5160         "[reorder Grek]"
5161     };
5162 
5163     const int32_t apiRules[] = {
5164         USCRIPT_GREEK
5165     };
5166 
5167     const static OneTestCase privateUseCharacterStrings[] = {
5168         { {0x0391}, {0x0391}, UCOL_EQUAL },
5169         { {0x0041}, {0x0391}, UCOL_GREATER },
5170         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5171         { {0x0060}, {0x0391}, UCOL_LESS },
5172         { {0x0391}, {0xe2dc}, UCOL_LESS },
5173         { {0x0391}, {0x0060}, UCOL_GREATER },
5174     };
5175 
5176     /* Test rules creation */
5177     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5178 
5179     /* Test collation reordering API */
5180     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5181 }
5182 
TestGreekLastReorder(void)5183 static void TestGreekLastReorder(void)
5184 {
5185     const char* strRules[] = {
5186         "[reorder Zzzz Grek]"
5187     };
5188 
5189     const int32_t apiRules[] = {
5190         USCRIPT_UNKNOWN, USCRIPT_GREEK
5191     };
5192 
5193     const static OneTestCase privateUseCharacterStrings[] = {
5194         { {0x0391}, {0x0391}, UCOL_EQUAL },
5195         { {0x0041}, {0x0391}, UCOL_LESS },
5196         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5197         { {0x0060}, {0x0391}, UCOL_LESS },
5198         { {0x0391}, {0xe2dc}, UCOL_GREATER },
5199     };
5200 
5201     /* Test rules creation */
5202     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5203 
5204     /* Test collation reordering API */
5205     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5206 }
5207 
TestNonScriptReorder(void)5208 static void TestNonScriptReorder(void)
5209 {
5210     const char* strRules[] = {
5211         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5212     };
5213 
5214     const int32_t apiRules[] = {
5215         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5216         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5217         UCOL_REORDER_CODE_CURRENCY
5218     };
5219 
5220     const static OneTestCase privateUseCharacterStrings[] = {
5221         { {0x0391}, {0x0041}, UCOL_LESS },
5222         { {0x0041}, {0x0391}, UCOL_GREATER },
5223         { {0x0060}, {0x0041}, UCOL_LESS },
5224         { {0x0060}, {0x0391}, UCOL_GREATER },
5225         { {0x0024}, {0x0041}, UCOL_GREATER },
5226     };
5227 
5228     /* Test rules creation */
5229     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5230 
5231     /* Test collation reordering API */
5232     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5233 }
5234 
TestHaniReorder(void)5235 static void TestHaniReorder(void)
5236 {
5237     const char* strRules[] = {
5238         "[reorder Hani]"
5239     };
5240     const int32_t apiRules[] = {
5241         USCRIPT_HAN
5242     };
5243 
5244     const static OneTestCase privateUseCharacterStrings[] = {
5245         { {0x4e00}, {0x0041}, UCOL_LESS },
5246         { {0x4e00}, {0x0060}, UCOL_GREATER },
5247         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5248         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5249         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5250         { {0xfa27}, {0x0041}, UCOL_LESS },
5251         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5252     };
5253 
5254     /* Test rules creation */
5255     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5256 
5257     /* Test collation reordering API */
5258     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5259 }
5260 
TestHaniReorderWithOtherRules(void)5261 static void TestHaniReorderWithOtherRules(void)
5262 {
5263     const char* strRules[] = {
5264         "[reorder Hani] &b<a"
5265     };
5266     /*const int32_t apiRules[] = {
5267         USCRIPT_HAN
5268     };*/
5269 
5270     const static OneTestCase privateUseCharacterStrings[] = {
5271         { {0x4e00}, {0x0041}, UCOL_LESS },
5272         { {0x4e00}, {0x0060}, UCOL_GREATER },
5273         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5274         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5275         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5276         { {0xfa27}, {0x0041}, UCOL_LESS },
5277         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5278         { {0x0062}, {0x0061}, UCOL_LESS },
5279     };
5280 
5281     /* Test rules creation */
5282     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5283 }
5284 
TestMultipleReorder(void)5285 static void TestMultipleReorder(void)
5286 {
5287     const char* strRules[] = {
5288         "[reorder Grek Zzzz DIGIT Latn Hani]"
5289     };
5290 
5291     const int32_t apiRules[] = {
5292         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5293     };
5294 
5295     const static OneTestCase collationTestCases[] = {
5296         { {0x0391}, {0x0041}, UCOL_LESS},
5297         { {0x0031}, {0x0041}, UCOL_LESS},
5298         { {0x0041}, {0x4e00}, UCOL_LESS},
5299     };
5300 
5301     /* Test rules creation */
5302     doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
5303 
5304     /* Test collation reordering API */
5305     doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
5306 }
5307 
5308 /*
5309  * Test that covers issue reported in ticket 8814
5310  */
TestReorderWithNumericCollation(void)5311 static void TestReorderWithNumericCollation(void)
5312 {
5313     UErrorCode status = U_ZERO_ERROR;
5314     UCollator  *myCollation;
5315     UCollator  *myReorderCollation;
5316     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5317     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5318     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5319     UChar fortyS[] = { 0x0053 };
5320     UChar fortyThreeP[] = { 0x0050 };
5321     uint8_t fortyS_sortKey[128];
5322     int32_t fortyS_sortKey_Length;
5323     uint8_t fortyThreeP_sortKey[128];
5324     int32_t fortyThreeP_sortKey_Length;
5325     uint8_t fortyS_sortKey_reorder[128];
5326     int32_t fortyS_sortKey_reorder_Length;
5327     uint8_t fortyThreeP_sortKey_reorder[128];
5328     int32_t fortyThreeP_sortKey_reorder_Length;
5329     UCollationResult collResult;
5330     UCollationResult collResultReorder;
5331 
5332     log_verbose("Testing reordering with and without numeric collation\n");
5333 
5334     /* build collator tertiary with numeric */
5335     myCollation = ucol_open("", &status);
5336     /*
5337     ucol_setStrength(myCollation, UCOL_TERTIARY);
5338     */
5339     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5340     if(U_FAILURE(status)) {
5341         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5342         return;
5343     }
5344 
5345     /* build collator tertiary with numeric and reordering */
5346     myReorderCollation = ucol_open("", &status);
5347     /*
5348     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5349     */
5350     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5351     ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5352     if(U_FAILURE(status)) {
5353         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5354         return;
5355     }
5356 
5357     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5358     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5359     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5360     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5361 
5362     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5363         log_err_status(status, "ERROR: couldn't generate sort keys\n");
5364         return;
5365     }
5366     collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5367     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5368     /*
5369     fprintf(stderr, "\tcollResult = %x\n", collResult);
5370     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5371     fprintf(stderr, "\nfortyS\n");
5372     for (i = 0; i < fortyS_sortKey_Length; i++) {
5373         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5374     }
5375     fprintf(stderr, "\nfortyThreeP\n");
5376     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5377         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5378     }
5379     */
5380     if (collResult != collResultReorder) {
5381         log_err_status(status, "ERROR: collation results should have been the same.\n");
5382         return;
5383     }
5384 
5385     ucol_close(myCollation);
5386     ucol_close(myReorderCollation);
5387 }
5388 
compare_uint8_t_arrays(const uint8_t * a,const uint8_t * b)5389 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5390 {
5391   for (; *a == *b; ++a, ++b) {
5392     if (*a == 0) {
5393       return 0;
5394     }
5395   }
5396   return (*a < *b ? -1 : 1);
5397 }
5398 
TestImportRulesDeWithPhonebook(void)5399 static void TestImportRulesDeWithPhonebook(void)
5400 {
5401   const char* normalRules[] = {
5402     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5403     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5404     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5405   };
5406   const OneTestCase normalTests[] = {
5407     { {0x00e6}, {0x00c6}, UCOL_LESS},
5408     { {0x00fc}, {0x00dc}, UCOL_GREATER},
5409   };
5410 
5411   const char* importRules[] = {
5412     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5413     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5414     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5415   };
5416   const OneTestCase importTests[] = {
5417     { {0x00e6}, {0x00c6}, UCOL_LESS},
5418     { {0x00fc}, {0x00dc}, UCOL_LESS},
5419   };
5420 
5421   doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5422   doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
5423 }
5424 
5425 #if 0
5426 static void TestImportRulesFiWithEor(void)
5427 {
5428   /* DUCET. */
5429   const char* defaultRules[] = {
5430     "&a<b",                                    /* Dummy rule. */
5431   };
5432 
5433   const OneTestCase defaultTests[] = {
5434     { {0x0110}, {0x00F0}, UCOL_LESS},
5435     { {0x00a3}, {0x00a5}, UCOL_LESS},
5436     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5437   };
5438 
5439   /* European Ordering rules: ignore currency characters. */
5440   const char* eorRules[] = {
5441     "[import root-u-co-eor]",
5442   };
5443 
5444   const OneTestCase eorTests[] = {
5445     { {0x0110}, {0x00F0}, UCOL_LESS},
5446     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5447     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5448   };
5449 
5450   const char* fiStdRules[] = {
5451     "[import fi-u-co-standard]",
5452   };
5453 
5454   const OneTestCase fiStdTests[] = {
5455     { {0x0110}, {0x00F0}, UCOL_GREATER},
5456     { {0x00a3}, {0x00a5}, UCOL_LESS},
5457     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5458   };
5459 
5460   /* Both European Ordering Rules and Fi Standard Rules. */
5461   const char* eorFiStdRules[] = {
5462     "[import root-u-co-eor][import fi-u-co-standard]",
5463   };
5464 
5465   /* This is essentially same as the one before once fi.txt is updated with import. */
5466   const char* fiEorRules[] = {
5467     "[import fi-u-co-eor]",
5468   };
5469 
5470   const OneTestCase fiEorTests[] = {
5471     { {0x0110}, {0x00F0}, UCOL_GREATER},
5472     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5473     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5474   };
5475 
5476   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5477   doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5478   doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5479   doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
5480 
5481   log_knownIssue("8962", NULL);
5482   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5483         eor{
5484             Sequence{
5485                 "[import root-u-co-eor][import fi-u-co-standard]"
5486             }
5487             Version{"21.0"}
5488         }
5489   */
5490   /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5491 
5492 }
5493 #endif
5494 
5495 #if 0
5496 /*
5497  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5498  * the resource files are built with -includeUnihanColl option.
5499  * TODO: Uncomment this function and make it work when unihan rules are built by default.
5500  */
5501 static void TestImportRulesCJKWithUnihan(void)
5502 {
5503   /* DUCET. */
5504   const char* defaultRules[] = {
5505     "&a<b",                                    /* Dummy rule. */
5506   };
5507 
5508   const OneTestCase defaultTests[] = {
5509     { {0x3402}, {0x4e1e}, UCOL_GREATER},
5510   };
5511 
5512   /* European Ordering rules: ignore currency characters. */
5513   const char* unihanRules[] = {
5514     "[import ko-u-co-unihan]",
5515   };
5516 
5517   const OneTestCase unihanTests[] = {
5518     { {0x3402}, {0x4e1e}, UCOL_LESS},
5519   };
5520 
5521   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5522   doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
5523 
5524 }
5525 #endif
5526 
TestImport(void)5527 static void TestImport(void)
5528 {
5529     UCollator* vicoll;
5530     UCollator* escoll;
5531     UCollator* viescoll;
5532     UCollator* importviescoll;
5533     UParseError error;
5534     UErrorCode status = U_ZERO_ERROR;
5535     UChar* virules;
5536     int32_t viruleslength;
5537     UChar* esrules;
5538     int32_t esruleslength;
5539     UChar* viesrules;
5540     int32_t viesruleslength;
5541     char srules[500] = "[import vi][import es]";
5542     UChar rules[500];
5543     uint32_t length = 0;
5544     int32_t itemCount;
5545     int32_t i, k;
5546     UChar32 start;
5547     UChar32 end;
5548     UChar str[500];
5549     int32_t strLength;
5550 
5551     uint8_t sk1[500];
5552     uint8_t sk2[500];
5553 
5554     UBool b;
5555     USet* tailoredSet;
5556     USet* importTailoredSet;
5557 
5558 
5559     vicoll = ucol_open("vi", &status);
5560     if(U_FAILURE(status)){
5561         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5562         return;
5563     }
5564 
5565     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5566     if(viruleslength == 0) {
5567         log_data_err("missing vi tailoring rule string\n");
5568         ucol_close(vicoll);
5569         return;
5570     }
5571     escoll = ucol_open("es", &status);
5572     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5573     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5574     viesrules[0] = 0;
5575     u_strcat(viesrules, virules);
5576     u_strcat(viesrules, esrules);
5577     viesruleslength = viruleslength + esruleslength;
5578     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5579 
5580     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5581     length = u_unescape(srules, rules, 500);
5582     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5583     if(U_FAILURE(status)){
5584         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5585         return;
5586     }
5587 
5588     tailoredSet = ucol_getTailoredSet(viescoll, &status);
5589     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5590 
5591     if(!uset_equals(tailoredSet, importTailoredSet)){
5592         log_err("Tailored sets not equal");
5593     }
5594 
5595     uset_close(importTailoredSet);
5596 
5597     itemCount = uset_getItemCount(tailoredSet);
5598 
5599     for( i = 0; i < itemCount; i++){
5600         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5601         if(strLength < 2){
5602             for (; start <= end; start++){
5603                 k = 0;
5604                 U16_APPEND(str, k, 500, start, b);
5605                 (void)b;    /* Suppress set but not used warning. */
5606                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5607                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5608                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5609                     log_err("Sort key for %s not equal\n", str);
5610                     break;
5611                 }
5612             }
5613         }else{
5614             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5615             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5616             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5617                 log_err("ZZSort key for %s not equal\n", str);
5618                 break;
5619             }
5620 
5621         }
5622     }
5623 
5624     uset_close(tailoredSet);
5625 
5626     uprv_free(viesrules);
5627 
5628     ucol_close(vicoll);
5629     ucol_close(escoll);
5630     ucol_close(viescoll);
5631     ucol_close(importviescoll);
5632 }
5633 
TestImportWithType(void)5634 static void TestImportWithType(void)
5635 {
5636     UCollator* vicoll;
5637     UCollator* decoll;
5638     UCollator* videcoll;
5639     UCollator* importvidecoll;
5640     UParseError error;
5641     UErrorCode status = U_ZERO_ERROR;
5642     const UChar* virules;
5643     int32_t viruleslength;
5644     const UChar* derules;
5645     int32_t deruleslength;
5646     UChar* viderules;
5647     int32_t videruleslength;
5648     const char srules[500] = "[import vi][import de-u-co-phonebk]";
5649     UChar rules[500];
5650     uint32_t length = 0;
5651     int32_t itemCount;
5652     int32_t i, k;
5653     UChar32 start;
5654     UChar32 end;
5655     UChar str[500];
5656     int32_t strLength;
5657 
5658     uint8_t sk1[500];
5659     uint8_t sk2[500];
5660 
5661     USet* tailoredSet;
5662     USet* importTailoredSet;
5663 
5664     vicoll = ucol_open("vi", &status);
5665     if(U_FAILURE(status)){
5666         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5667         return;
5668     }
5669     virules = ucol_getRules(vicoll, &viruleslength);
5670     if(viruleslength == 0) {
5671         log_data_err("missing vi tailoring rule string\n");
5672         ucol_close(vicoll);
5673         return;
5674     }
5675     /* decoll = ucol_open("de@collation=phonebook", &status); */
5676     decoll = ucol_open("de-u-co-phonebk", &status);
5677     if(U_FAILURE(status)){
5678         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5679         return;
5680     }
5681 
5682 
5683     derules = ucol_getRules(decoll, &deruleslength);
5684     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5685     viderules[0] = 0;
5686     u_strcat(viderules, virules);
5687     u_strcat(viderules, derules);
5688     videruleslength = viruleslength + deruleslength;
5689     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5690 
5691     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5692     length = u_unescape(srules, rules, 500);
5693     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5694     if(U_FAILURE(status)){
5695         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5696         return;
5697     }
5698 
5699     tailoredSet = ucol_getTailoredSet(videcoll, &status);
5700     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5701 
5702     if(!uset_equals(tailoredSet, importTailoredSet)){
5703         log_err("Tailored sets not equal");
5704     }
5705 
5706     uset_close(importTailoredSet);
5707 
5708     itemCount = uset_getItemCount(tailoredSet);
5709 
5710     for( i = 0; i < itemCount; i++){
5711         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5712         if(strLength < 2){
5713             for (; start <= end; start++){
5714                 k = 0;
5715                 U16_APPEND_UNSAFE(str, k, start);
5716                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5717                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5718                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5719                     log_err("Sort key for %s not equal\n", str);
5720                     break;
5721                 }
5722             }
5723         }else{
5724             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5725             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5726             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5727                 log_err("Sort key for %s not equal\n", str);
5728                 break;
5729             }
5730 
5731         }
5732     }
5733 
5734     uset_close(tailoredSet);
5735 
5736     uprv_free(viderules);
5737 
5738     ucol_close(videcoll);
5739     ucol_close(importvidecoll);
5740     ucol_close(vicoll);
5741     ucol_close(decoll);
5742 }
5743 
5744 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5745 static const UChar longUpperStr1[]= { /* 155 chars */
5746     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5747     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5748     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5749     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5750     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5751     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5752     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5753     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5754     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5755     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5756 };
5757 
5758 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5759 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5760     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5763     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5764     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5765 };
5766 
5767 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5768 static const UChar longUpperStr3[]= { /* 324 chars */
5769     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5780     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5781 };
5782 
5783 typedef struct {
5784     const UChar * longUpperStrPtr;
5785     int32_t       longUpperStrLen;
5786 } LongUpperStrItem;
5787 
5788 /* String pointers must be in reverse collation order of the corresponding strings */
5789 static const LongUpperStrItem longUpperStrItems[] = {
5790     { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5791     { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5792     { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
5793     { NULL,          0                           }
5794 };
5795 
5796 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5797 
5798 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
TestCaseLevelBufferOverflow(void)5799 static void TestCaseLevelBufferOverflow(void)
5800 {
5801     UErrorCode status = U_ZERO_ERROR;
5802     UCollator * ucol = ucol_open("root", &status);
5803     if ( U_SUCCESS(status) ) {
5804         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5805         if ( U_SUCCESS(status) ) {
5806             const LongUpperStrItem * itemPtr;
5807             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5808             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5809                 int32_t sortKeyLen;
5810                 if (itemPtr > longUpperStrItems) {
5811                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5812                 }
5813                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5814                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5815                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5816                     break;
5817                 }
5818                 if ( itemPtr > longUpperStrItems ) {
5819                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5820                     if (compareResult >= 0) {
5821                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5822                     }
5823                 }
5824             }
5825         } else {
5826             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5827         }
5828         ucol_close(ucol);
5829     } else {
5830         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5831     }
5832 }
5833 
5834 /* Test for #10595 */
5835 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5836 #define KEY_PART_SIZE 16
5837 
TestNextSortKeyPartJaIdentical(void)5838 static void TestNextSortKeyPartJaIdentical(void)
5839 {
5840     UErrorCode status = U_ZERO_ERROR;
5841     UCollator *coll;
5842     uint8_t keyPart[KEY_PART_SIZE];
5843     UCharIterator iter;
5844     uint32_t state[2] = {0, 0};
5845     int32_t keyPartLen;
5846 
5847     coll = ucol_open("ja", &status);
5848     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5849     if (U_FAILURE(status)) {
5850         log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5851         return;
5852     }
5853 
5854     uiter_setString(&iter, testJapaneseName, 5);
5855     keyPartLen = KEY_PART_SIZE;
5856     while (keyPartLen == KEY_PART_SIZE) {
5857         keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5858         if (U_FAILURE(status)) {
5859             log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5860             break;
5861         }
5862     }
5863 
5864     ucol_close(coll);
5865 }
5866 
5867 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5868 
addMiscCollTest(TestNode ** root)5869 void addMiscCollTest(TestNode** root)
5870 {
5871     TEST(TestRuleOptions);
5872     TEST(TestBeforePrefixFailure);
5873     TEST(TestContractionClosure);
5874     TEST(TestPrefixCompose);
5875     TEST(TestStrCollIdenticalPrefix);
5876     TEST(TestPrefix);
5877     TEST(TestNewJapanese);
5878     /*TEST(TestLimitations);*/
5879     TEST(TestNonChars);
5880     TEST(TestExtremeCompression);
5881     TEST(TestSurrogates);
5882     TEST(TestVariableTopSetting);
5883     TEST(TestMaxVariable);
5884     TEST(TestBocsuCoverage);
5885     TEST(TestCyrillicTailoring);
5886     TEST(TestCase);
5887     TEST(IncompleteCntTest);
5888     TEST(BlackBirdTest);
5889     TEST(FunkyATest);
5890     TEST(BillFairmanTest);
5891     TEST(TestChMove);
5892     TEST(TestImplicitTailoring);
5893     TEST(TestFCDProblem);
5894     TEST(TestEmptyRule);
5895     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5896     TEST(TestJ815);
5897     /*TEST(TestJ831);*/ /* we changed lv locale */
5898     TEST(TestBefore);
5899     TEST(TestHangulTailoring);
5900     TEST(TestUCARules);
5901     TEST(TestIncrementalNormalize);
5902     TEST(TestComposeDecompose);
5903     TEST(TestCompressOverlap);
5904     TEST(TestContraction);
5905     TEST(TestExpansion);
5906     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5907     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5908     TEST(TestOptimize);
5909     TEST(TestSuppressContractions);
5910     TEST(Alexis2);
5911     TEST(TestHebrewUCA);
5912     TEST(TestPartialSortKeyTermination);
5913     TEST(TestSettings);
5914     TEST(TestEquals);
5915     TEST(TestJ2726);
5916     TEST(NullRule);
5917     TEST(TestNumericCollation);
5918     TEST(TestTibetanConformance);
5919     TEST(TestPinyinProblem);
5920     TEST(TestSeparateTrees);
5921     TEST(TestBeforePinyin);
5922     TEST(TestBeforeTightening);
5923     /*TEST(TestMoreBefore);*/
5924     TEST(TestTailorNULL);
5925     TEST(TestUpperFirstQuaternary);
5926     TEST(TestJ4960);
5927     TEST(TestJ5223);
5928     TEST(TestJ5232);
5929     TEST(TestJ5367);
5930     TEST(TestHiragana);
5931     TEST(TestSortKeyConsistency);
5932     TEST(TestVI5913);  /* VI, RO tailored rules */
5933     TEST(TestCroatianSortKey);
5934     TEST(TestTailor6179);
5935     TEST(TestUCAPrecontext);
5936     TEST(TestOutOfBuffer5468);
5937     TEST(TestSameStrengthList);
5938 
5939     TEST(TestSameStrengthListQuoted);
5940     TEST(TestSameStrengthListSupplemental);
5941     TEST(TestSameStrengthListQwerty);
5942     TEST(TestSameStrengthListQuotedQwerty);
5943     TEST(TestSameStrengthListRanges);
5944     TEST(TestSameStrengthListSupplementalRanges);
5945     TEST(TestSpecialCharacters);
5946     TEST(TestPrivateUseCharacters);
5947     TEST(TestPrivateUseCharactersInList);
5948     TEST(TestPrivateUseCharactersInRange);
5949     TEST(TestInvalidListsAndRanges);
5950     TEST(TestImportRulesDeWithPhonebook);
5951     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5952     /* TEST(TestImportRulesCJKWithUnihan); */
5953     TEST(TestImport);
5954     TEST(TestImportWithType);
5955 
5956     TEST(TestBeforeRuleWithScriptReordering);
5957     TEST(TestNonLeadBytesDuringCollationReordering);
5958     TEST(TestReorderingAPI);
5959     TEST(TestReorderingAPIWithRuleCreatedCollator);
5960     TEST(TestEquivalentReorderingScripts);
5961     TEST(TestGreekFirstReorder);
5962     TEST(TestGreekLastReorder);
5963     TEST(TestNonScriptReorder);
5964     TEST(TestHaniReorder);
5965     TEST(TestHaniReorderWithOtherRules);
5966     TEST(TestMultipleReorder);
5967     TEST(TestReorderingAcrossCloning);
5968     TEST(TestReorderWithNumericCollation);
5969 
5970     TEST(TestCaseLevelBufferOverflow);
5971     TEST(TestNextSortKeyPartJaIdentical);
5972 }
5973 
5974 #endif /* #if !UCONFIG_NO_COLLATION */
5975