• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2001-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File cmsccoll.C
11 *
12 *******************************************************************************/
13 /**
14  * These are the tests specific to ICU 1.8 and above, that I didn't know where
15  * to fit.
16  */
17 
18 #include <stdio.h>
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_COLLATION
23 
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
27 #include "cintltst.h"
28 #include "ccolltst.h"
29 #include "callcoll.h"
30 #include "unicode/ustring.h"
31 #include "string.h"
32 #include "ucol_imp.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
41 #include "uparse.h"
42 #include "putilimp.h"
43 
44 
45 #define MAX_TOKEN_LEN 16
46 
47 typedef UCollationResult tst_strcoll(void *collator, const int object,
48                         const UChar *source, const int sLen,
49                         const UChar *target, const int tLen);
50 
51 
52 
53 const static char cnt1[][10] = {
54 
55   "AA",
56   "AC",
57   "AZ",
58   "AQ",
59   "AB",
60   "ABZ",
61   "ABQ",
62   "Z",
63   "ABC",
64   "Q",
65   "B"
66 };
67 
68 const static char cnt2[][10] = {
69   "DA",
70   "DAD",
71   "DAZ",
72   "MAR",
73   "Z",
74   "DAVIS",
75   "MARK",
76   "DAV",
77   "DAVI"
78 };
79 
IncompleteCntTest(void)80 static void IncompleteCntTest(void)
81 {
82   UErrorCode status = U_ZERO_ERROR;
83   UChar temp[90];
84   UChar t1[90];
85   UChar t2[90];
86 
87   UCollator *coll =  NULL;
88   uint32_t i = 0, j = 0;
89   uint32_t size = 0;
90 
91   u_uastrcpy(temp, " & Z < ABC < Q < B");
92 
93   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
94 
95   if(U_SUCCESS(status)) {
96     size = UPRV_LENGTHOF(cnt1);
97     for(i = 0; i < size-1; i++) {
98       for(j = i+1; j < size; j++) {
99         UCollationElements *iter;
100         u_uastrcpy(t1, cnt1[i]);
101         u_uastrcpy(t2, cnt1[j]);
102         doTest(coll, t1, t2, UCOL_LESS);
103         /* synwee : added collation element iterator test */
104         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
105         if (U_FAILURE(status)) {
106           log_err("Creation of iterator failed\n");
107           break;
108         }
109         backAndForth(iter);
110         ucol_closeElements(iter);
111       }
112     }
113   }
114 
115   ucol_close(coll);
116 
117 
118   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
119   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
120 
121   if(U_SUCCESS(status)) {
122     size = UPRV_LENGTHOF(cnt2);
123     for(i = 0; i < size-1; i++) {
124       for(j = i+1; j < size; j++) {
125         UCollationElements *iter;
126         u_uastrcpy(t1, cnt2[i]);
127         u_uastrcpy(t2, cnt2[j]);
128         doTest(coll, t1, t2, UCOL_LESS);
129 
130         /* synwee : added collation element iterator test */
131         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
132         if (U_FAILURE(status)) {
133           log_err("Creation of iterator failed\n");
134           break;
135         }
136         backAndForth(iter);
137         ucol_closeElements(iter);
138       }
139     }
140   }
141 
142   ucol_close(coll);
143 
144 
145 }
146 
147 const static char shifted[][20] = {
148   "black bird",
149   "black-bird",
150   "blackbird",
151   "black Bird",
152   "black-Bird",
153   "blackBird",
154   "black birds",
155   "black-birds",
156   "blackbirds"
157 };
158 
159 const static UCollationResult shiftedTert[] = {
160   UCOL_EQUAL,
161   UCOL_EQUAL,
162   UCOL_EQUAL,
163   UCOL_LESS,
164   UCOL_EQUAL,
165   UCOL_EQUAL,
166   UCOL_LESS,
167   UCOL_EQUAL,
168   UCOL_EQUAL
169 };
170 
171 const static char nonignorable[][20] = {
172   "black bird",
173   "black Bird",
174   "black birds",
175   "black-bird",
176   "black-Bird",
177   "black-birds",
178   "blackbird",
179   "blackBird",
180   "blackbirds"
181 };
182 
BlackBirdTest(void)183 static void BlackBirdTest(void) {
184   UErrorCode status = U_ZERO_ERROR;
185   UChar t1[90];
186   UChar t2[90];
187 
188   uint32_t i = 0, j = 0;
189   uint32_t size = 0;
190   UCollator *coll = ucol_open("en_US", &status);
191 
192   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
193   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
194 
195   if(U_SUCCESS(status)) {
196     size = UPRV_LENGTHOF(nonignorable);
197     for(i = 0; i < size-1; i++) {
198       for(j = i+1; j < size; j++) {
199         u_uastrcpy(t1, nonignorable[i]);
200         u_uastrcpy(t2, nonignorable[j]);
201         doTest(coll, t1, t2, UCOL_LESS);
202       }
203     }
204   }
205 
206   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
207   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
208 
209   if(U_SUCCESS(status)) {
210     size = UPRV_LENGTHOF(shifted);
211     for(i = 0; i < size-1; i++) {
212       for(j = i+1; j < size; j++) {
213         u_uastrcpy(t1, shifted[i]);
214         u_uastrcpy(t2, shifted[j]);
215         doTest(coll, t1, t2, UCOL_LESS);
216       }
217     }
218   }
219 
220   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
221   if(U_SUCCESS(status)) {
222     size = UPRV_LENGTHOF(shifted);
223     for(i = 1; i < size; i++) {
224       u_uastrcpy(t1, shifted[i-1]);
225       u_uastrcpy(t2, shifted[i]);
226       doTest(coll, t1, t2, shiftedTert[i]);
227     }
228   }
229 
230   ucol_close(coll);
231 }
232 
233 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
234     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236     {0x0041/*'A'*/, 0x0300, 0x0000},
237     {0x00C0, 0x0301, 0x0000},
238     /* this would work with forced normalization */
239     {0x00C0, 0x0316, 0x0000}
240 };
241 
242 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
243     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
245     {0x00C0, 0},
246     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247     /* this would work with forced normalization */
248     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249 };
250 
251 const static UCollationResult results[] = {
252     UCOL_GREATER,
253     UCOL_EQUAL,
254     UCOL_EQUAL,
255     UCOL_GREATER,
256     UCOL_EQUAL
257 };
258 
FunkyATest(void)259 static void FunkyATest(void)
260 {
261 
262     int32_t i;
263     UErrorCode status = U_ZERO_ERROR;
264     UCollator  *myCollation;
265     myCollation = ucol_open("en_US", &status);
266     if(U_FAILURE(status)){
267         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
268         return;
269     }
270     log_verbose("Testing some A letters, for some reason\n");
271     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
272     ucol_setStrength(myCollation, UCOL_TERTIARY);
273     for (i = 0; i < 4 ; i++)
274     {
275         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
276     }
277     ucol_close(myCollation);
278 }
279 
280 UColAttributeValue caseFirst[] = {
281     UCOL_OFF,
282     UCOL_LOWER_FIRST,
283     UCOL_UPPER_FIRST
284 };
285 
286 
287 UColAttributeValue alternateHandling[] = {
288     UCOL_NON_IGNORABLE,
289     UCOL_SHIFTED
290 };
291 
292 UColAttributeValue caseLevel[] = {
293     UCOL_OFF,
294     UCOL_ON
295 };
296 
297 UColAttributeValue strengths[] = {
298     UCOL_PRIMARY,
299     UCOL_SECONDARY,
300     UCOL_TERTIARY,
301     UCOL_QUATERNARY,
302     UCOL_IDENTICAL
303 };
304 
305 #if 0
306 static const char * strengthsC[] = {
307     "UCOL_PRIMARY",
308     "UCOL_SECONDARY",
309     "UCOL_TERTIARY",
310     "UCOL_QUATERNARY",
311     "UCOL_IDENTICAL"
312 };
313 
314 static const char * caseFirstC[] = {
315     "UCOL_OFF",
316     "UCOL_LOWER_FIRST",
317     "UCOL_UPPER_FIRST"
318 };
319 
320 
321 static const char * alternateHandlingC[] = {
322     "UCOL_NON_IGNORABLE",
323     "UCOL_SHIFTED"
324 };
325 
326 static const char * caseLevelC[] = {
327     "UCOL_OFF",
328     "UCOL_ON"
329 };
330 
331 /* not used currently - does not test only prints */
332 static void PrintMarkDavis(void)
333 {
334   UErrorCode status = U_ZERO_ERROR;
335   UChar m[256];
336   uint8_t sortkey[256];
337   UCollator *coll = ucol_open("en_US", &status);
338   uint32_t h,i,j,k, sortkeysize;
339   uint32_t sizem = 0;
340   char buffer[512];
341   uint32_t len = 512;
342 
343   log_verbose("PrintMarkDavis");
344 
345   u_uastrcpy(m, "Mark Davis");
346   sizem = u_strlen(m);
347 
348 
349   m[1] = 0xe4;
350 
351   for(i = 0; i<sizem; i++) {
352     fprintf(stderr, "\\u%04X ", m[i]);
353   }
354   fprintf(stderr, "\n");
355 
356   for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
357     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
358     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
359 
360     for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
361       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
362       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
363 
364       for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
365         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
366         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
367 
368         for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
369           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
370           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
371           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
372           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
373         }
374 
375       }
376 
377     }
378 
379   }
380 }
381 #endif
382 
BillFairmanTest(void)383 static void BillFairmanTest(void) {
384 /*
385 ** check for actual locale via ICU resource bundles
386 **
387 ** lp points to the original locale ("fr_FR_....")
388 */
389 
390     UResourceBundle *lr,*cr;
391     UErrorCode              lec = U_ZERO_ERROR;
392     const char *lp = "fr_FR_you_ll_never_find_this_locale";
393 
394     log_verbose("BillFairmanTest\n");
395 
396     lr = ures_open(NULL,lp,&lec);
397     if (lr) {
398         cr = ures_getByKey(lr,"collations",0,&lec);
399         if (cr) {
400             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
401             if (lp) {
402                 if (U_SUCCESS(lec)) {
403                     if(strcmp(lp, "fr") != 0) {
404                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
405                     }
406                 }
407             }
408             ures_close(cr);
409         }
410         ures_close(lr);
411     }
412 }
413 
414 const static char chTest[][20] = {
415   "c",
416   "C",
417   "ca", "cb", "cx", "cy", "CZ",
418   "c\\u030C", "C\\u030C",
419   "h",
420   "H",
421   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422   "ch", "cH", "Ch", "CH",
423   "cha", "charly", "che", "chh", "chch", "chr",
424   "i", "I", "iarly",
425   "r", "R",
426   "r\\u030C", "R\\u030C",
427   "s",
428   "S",
429   "s\\u030C", "S\\u030C",
430   "z", "Z",
431   "z\\u030C", "Z\\u030C"
432 };
433 
TestChMove(void)434 static void TestChMove(void) {
435     UChar t1[256] = {0};
436     UChar t2[256] = {0};
437 
438     uint32_t i = 0, j = 0;
439     uint32_t size = 0;
440     UErrorCode status = U_ZERO_ERROR;
441 
442     UCollator *coll = ucol_open("cs", &status);
443 
444     if(U_SUCCESS(status)) {
445         size = UPRV_LENGTHOF(chTest);
446         for(i = 0; i < size-1; i++) {
447             for(j = i+1; j < size; j++) {
448                 u_unescape(chTest[i], t1, 256);
449                 u_unescape(chTest[j], t2, 256);
450                 doTest(coll, t1, t2, UCOL_LESS);
451             }
452         }
453     }
454     else {
455         log_data_err("Can't open collator");
456     }
457     ucol_close(coll);
458 }
459 
460 
461 
462 
463 /*
464 const static char impTest[][20] = {
465   "\\u4e00",
466     "a",
467     "A",
468     "b",
469     "B",
470     "\\u4e01"
471 };
472 */
473 
474 
TestImplicitTailoring(void)475 static void TestImplicitTailoring(void) {
476   static const struct {
477     const char *rules;
478     const char *data[10];
479     const uint32_t len;
480   } tests[] = {
481       {
482         /* Tailor b and c before U+4E00. */
483         "&[before 1]\\u4e00 < b < c "
484         /* Now, before U+4E00 is c; put d and e after that. */
485         "&[before 1]\\u4e00 < d < e",
486         { "b", "c", "d", "e", "\\u4e00"}, 5 },
487       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
490   };
491 
492   int32_t i = 0;
493 
494   for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
495       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
496   }
497 
498 /*
499   UChar t1[256] = {0};
500   UChar t2[256] = {0};
501 
502   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
503 
504   uint32_t i = 0, j = 0;
505   uint32_t size = 0;
506   uint32_t ruleLen = 0;
507   UErrorCode status = U_ZERO_ERROR;
508   UCollator *coll = NULL;
509   ruleLen = u_unescape(rule, t1, 256);
510 
511   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
512 
513   if(U_SUCCESS(status)) {
514     size = UPRV_LENGTHOF(impTest);
515     for(i = 0; i < size-1; i++) {
516       for(j = i+1; j < size; j++) {
517         u_unescape(impTest[i], t1, 256);
518         u_unescape(impTest[j], t2, 256);
519         doTest(coll, t1, t2, UCOL_LESS);
520       }
521     }
522   }
523   else {
524     log_err("Can't open collator");
525   }
526   ucol_close(coll);
527   */
528 }
529 
TestFCDProblem(void)530 static void TestFCDProblem(void) {
531   UChar t1[256] = {0};
532   UChar t2[256] = {0};
533 
534   const char *s1 = "\\u0430\\u0306\\u0325";
535   const char *s2 = "\\u04D1\\u0325";
536 
537   UErrorCode status = U_ZERO_ERROR;
538   UCollator *coll = ucol_open("", &status);
539   u_unescape(s1, t1, 256);
540   u_unescape(s2, t2, 256);
541 
542   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
543   doTest(coll, t1, t2, UCOL_EQUAL);
544 
545   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
546   doTest(coll, t1, t2, UCOL_EQUAL);
547 
548   ucol_close(coll);
549 }
550 
551 /*
552 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553 We're only using NFC/NFD in this test.
554 */
555 #define NORM_BUFFER_TEST_LEN 18
556 typedef struct {
557   UChar32 u;
558   UChar NFC[NORM_BUFFER_TEST_LEN];
559   UChar NFD[NORM_BUFFER_TEST_LEN];
560 } tester;
561 
TestComposeDecompose(void)562 static void TestComposeDecompose(void) {
563     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564     static const UChar UNICODESET_STR[] = {
565         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568     };
569     int32_t noOfLoc;
570     int32_t i = 0, j = 0;
571 
572     UErrorCode status = U_ZERO_ERROR;
573     const char *locName = NULL;
574     uint32_t nfcSize;
575     uint32_t nfdSize;
576     tester **t;
577     uint32_t noCases = 0;
578     UCollator *coll = NULL;
579     UChar32 u = 0;
580     UChar comp[NORM_BUFFER_TEST_LEN];
581     uint32_t len = 0;
582     UCollationElements *iter;
583     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
584     int32_t charsToTestSize;
585 
586     noOfLoc = uloc_countAvailable();
587 
588     coll = ucol_open("", &status);
589     if (U_FAILURE(status)) {
590         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
591         uset_close(charsToTest);
592         return;
593     }
594     charsToTestSize = uset_size(charsToTest);
595     if (charsToTestSize <= 0) {
596         log_err("Set was zero. Missing data?\n");
597         uset_close(charsToTest);
598         return;
599     }
600     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
601     t[0] = (tester *)malloc(sizeof(tester));
602     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
603 
604     for(u = 0; u < charsToTestSize; u++) {
605         UChar32 ch = uset_charAt(charsToTest, u);
606         len = 0;
607         U16_APPEND_UNSAFE(comp, len, ch);
608         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
609         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
610 
611         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
612           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
613             t[noCases]->u = ch;
614             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
615                 u_strncpy(t[noCases]->NFC, comp, len);
616                 t[noCases]->NFC[len] = 0;
617             }
618             noCases++;
619             t[noCases] = (tester *)malloc(sizeof(tester));
620             uprv_memset(t[noCases], 0, sizeof(tester));
621         }
622     }
623     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
624     uset_close(charsToTest);
625     charsToTest = NULL;
626 
627     for(u=0; u<(UChar32)noCases; u++) {
628         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
629             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
630             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
631         }
632     }
633     /*
634     for(u = 0; u < charsToTestSize; u++) {
635       if(!(u&0xFFFF)) {
636         log_verbose("%08X ", u);
637       }
638       uprv_memset(t[noCases], 0, sizeof(tester));
639       t[noCases]->u = u;
640       len = 0;
641       U16_APPEND_UNSAFE(comp, len, u);
642       comp[len] = 0;
643       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
644       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
645       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
646       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
647     }
648     */
649 
650     ucol_close(coll);
651 
652     log_verbose("Testing locales, number of cases = %i\n", noCases);
653     for(i = 0; i<noOfLoc; i++) {
654         status = U_ZERO_ERROR;
655         locName = uloc_getAvailable(i);
656         if(hasCollationElements(locName)) {
657             char cName[256];
658             UChar name[256];
659             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
660 
661             for(j = 0; j<nameSize; j++) {
662                 cName[j] = (char)name[j];
663             }
664             cName[nameSize] = 0;
665             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
666 
667             coll = ucol_open(locName, &status);
668             ucol_setStrength(coll, UCOL_IDENTICAL);
669             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
670 
671             for(u=0; u<(UChar32)noCases; u++) {
672                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
673                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
674                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
675                     log_verbose("Testing NFC\n");
676                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
677                     backAndForth(iter);
678                     log_verbose("Testing NFD\n");
679                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
680                     backAndForth(iter);
681                 }
682             }
683             ucol_closeElements(iter);
684             ucol_close(coll);
685         }
686     }
687     for(u = 0; u <= (UChar32)noCases; u++) {
688         free(t[u]);
689     }
690     free(t);
691 }
692 
TestEmptyRule(void)693 static void TestEmptyRule(void) {
694   UErrorCode status = U_ZERO_ERROR;
695   UChar rulez[] = { 0 };
696   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
697 
698   ucol_close(coll);
699 }
700 
TestUCARules(void)701 static void TestUCARules(void) {
702   UErrorCode status = U_ZERO_ERROR;
703   UChar b[256];
704   UChar *rules = b;
705   uint32_t ruleLen = 0;
706   UCollator *UCAfromRules = NULL;
707   UCollator *coll = ucol_open("", &status);
708   if(status == U_FILE_ACCESS_ERROR) {
709     log_data_err("Is your data around?\n");
710     return;
711   } else if(U_FAILURE(status)) {
712     log_err("Error opening collator\n");
713     return;
714   }
715   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
716 
717   log_verbose("TestUCARules\n");
718   if(ruleLen > 256) {
719     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
720     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
721   }
722   log_verbose("Rules length is %d\n", ruleLen);
723   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
724   if(U_SUCCESS(status)) {
725     ucol_close(UCAfromRules);
726   } else {
727     log_verbose("Unable to create a collator from UCARules!\n");
728   }
729 /*
730   u_unescape(blah, b, 256);
731   ucol_getSortKey(coll, b, 1, res, 256);
732 */
733   ucol_close(coll);
734   if(rules != b) {
735     free(rules);
736   }
737 }
738 
739 
740 /* Pinyin tonal order */
741 /*
742     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
743           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
744     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
745     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
746     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
747     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
748       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
749 .. (\u00fc)
750 
751 However, in testing we got the following order:
752     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
753           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
754     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
755 .. (\u0113)
756     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
757     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
758     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
759 .. (\u01d8)
760       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
761 */
762 
TestBefore(void)763 static void TestBefore(void) {
764   const static char *data[] = {
765       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
766       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
767       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
768       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
769       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
770       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
771   };
772   genericRulesStarter(
773     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
774     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
775     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
776     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
777     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
778     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
779     data, UPRV_LENGTHOF(data));
780 }
781 
782 #if 0
783 /* superceded by TestBeforePinyin */
784 static void TestJ784(void) {
785   const static char *data[] = {
786       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
787       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
788       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
789       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
790       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
791       "\\u00fc",
792            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
793   };
794   genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
795 }
796 #endif
797 
TestUpperCaseFirst(void)798 static void TestUpperCaseFirst(void) {
799   const static char *data[] = {
800     "I",
801       "i",
802       "Y",
803       "y"
804   };
805   genericLocaleStarter("da", data, UPRV_LENGTHOF(data));
806 }
807 
TestJ815(void)808 static void TestJ815(void) {
809   const static char *data[] = {
810     "aa",
811       "Aa",
812       "ab",
813       "Ab",
814       "ad",
815       "Ad",
816       "ae",
817       "Ae",
818       "\\u00e6",
819       "\\u00c6",
820       "af",
821       "Af",
822       "b",
823       "B"
824   };
825   genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
826   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
827 }
828 
829 
TestCase(void)830 static void TestCase(void)
831 {
832     const static UChar gRules[MAX_TOKEN_LEN] =
833     /*" & 0 < 1,\u2461<a,A"*/
834     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
835 
836     const static UChar testCase[][MAX_TOKEN_LEN] =
837     {
838         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
839         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
840         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
841         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
842     };
843 
844     const static UCollationResult caseTestResults[][9] =
845     {
846         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
847         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
848         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
849         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
850     };
851 
852     const static UColAttributeValue caseTestAttributes[][2] =
853     {
854         { UCOL_LOWER_FIRST, UCOL_OFF},
855         { UCOL_UPPER_FIRST, UCOL_OFF},
856         { UCOL_LOWER_FIRST, UCOL_ON},
857         { UCOL_UPPER_FIRST, UCOL_ON}
858     };
859     int32_t i,j,k;
860     UErrorCode status = U_ZERO_ERROR;
861     UCollationElements *iter;
862     UCollator  *myCollation;
863     myCollation = ucol_open("en_US", &status);
864 
865     if(U_FAILURE(status)){
866         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
867         return;
868     }
869     log_verbose("Testing different case settings\n");
870     ucol_setStrength(myCollation, UCOL_TERTIARY);
871 
872     for(k = 0; k<4; k++) {
873       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
874       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
875       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
876       for (i = 0; i < 3 ; i++) {
877         for(j = i+1; j<4; j++) {
878           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
879         }
880       }
881     }
882     ucol_close(myCollation);
883 
884     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
885     if(U_FAILURE(status)){
886         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
887         return;
888     }
889     log_verbose("Testing different case settings with custom rules\n");
890     ucol_setStrength(myCollation, UCOL_TERTIARY);
891 
892     for(k = 0; k<4; k++) {
893       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
894       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
895       for (i = 0; i < 3 ; i++) {
896         for(j = i+1; j<4; j++) {
897           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
898           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
899           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
900           backAndForth(iter);
901           ucol_closeElements(iter);
902           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
903           backAndForth(iter);
904           ucol_closeElements(iter);
905         }
906       }
907     }
908     ucol_close(myCollation);
909     {
910       const static char *lowerFirst[] = {
911         "h",
912         "H",
913         "ch",
914         "Ch",
915         "CH",
916         "cha",
917         "chA",
918         "Cha",
919         "ChA",
920         "CHa",
921         "CHA",
922         "i",
923         "I"
924       };
925 
926       const static char *upperFirst[] = {
927         "H",
928         "h",
929         "CH",
930         "Ch",
931         "ch",
932         "CHA",
933         "CHa",
934         "ChA",
935         "Cha",
936         "chA",
937         "cha",
938         "I",
939         "i"
940       };
941       log_verbose("mixed case test\n");
942       log_verbose("lower first, case level off\n");
943       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
944       log_verbose("upper first, case level off\n");
945       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
946       log_verbose("lower first, case level on\n");
947       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
948       log_verbose("upper first, case level on\n");
949       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
950     }
951 
952 }
953 
TestIncrementalNormalize(void)954 static void TestIncrementalNormalize(void) {
955 
956     /*UChar baseA     =0x61;*/
957     UChar baseA     =0x41;
958 /*    UChar baseB     = 0x42;*/
959     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
960     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
961     /*
962         0x316 is combining grave accent below, cc=220
963         0x321 is combining palatalized hook below, cc=202
964         0x300 is combining grave accent, cc=230
965     */
966 
967 #define MAXSLEN 2000
968     /*int          maxSLen   = 64000;*/
969     int          sLen;
970     int          i;
971 
972     UCollator        *coll;
973     UErrorCode       status = U_ZERO_ERROR;
974     UCollationResult result;
975 
976     int32_t myQ = getTestOption(QUICK_OPTION);
977 
978     if(getTestOption(QUICK_OPTION) < 0) {
979         setTestOption(QUICK_OPTION, 1);
980     }
981 
982     {
983         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
984         /*          most buffers along the way.*/
985         UChar            strA[MAXSLEN+1];
986         UChar            strB[MAXSLEN+1];
987 
988         coll = ucol_open("en_US", &status);
989         if(status == U_FILE_ACCESS_ERROR) {
990           log_data_err("Is your data around?\n");
991           return;
992         } else if(U_FAILURE(status)) {
993           log_err("Error opening collator\n");
994           return;
995         }
996         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
997 
998         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
999         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1000         /*for (sLen = 1000; sLen<1001; sLen++) {*/
1001         for (sLen = 500; sLen<501; sLen++) {
1002         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1003             strA[0] = baseA;
1004             strB[0] = baseA;
1005             for (i=1; i<=sLen-1; i++) {
1006                 strA[i] = ccMix[i % 3];
1007                 strB[sLen-i] = ccMix[i % 3];
1008             }
1009             strA[sLen]   = 0;
1010             strB[sLen]   = 0;
1011 
1012             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1013             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1014             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1015             doTest(coll, strA, strB, UCOL_EQUAL);
1016         }
1017     }
1018 
1019     setTestOption(QUICK_OPTION, myQ);
1020 
1021 
1022     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1023     /*         of the string.  Checks a couple of edge cases.*/
1024 
1025     {
1026         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1027         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1028         ucol_setStrength(coll, UCOL_TERTIARY);
1029         doTest(coll, strA, strB, UCOL_EQUAL);
1030     }
1031 
1032     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1033 
1034     {
1035       /* New UCA  3.1.1.
1036        * test below used a code point from Desseret, which sorts differently
1037        * than d800 dc00
1038        */
1039         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1040         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1041         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1042         ucol_setStrength(coll, UCOL_TERTIARY);
1043         doTest(coll, strA, strB, UCOL_GREATER);
1044     }
1045 
1046     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1047 
1048     {
1049         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1050         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1051         char  sortKeyA[50];
1052         char  sortKeyAz[50];
1053         char  sortKeyB[50];
1054         char  sortKeyBz[50];
1055         int   r;
1056 
1057         /* there used to be -3 here. Hmmmm.... */
1058         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1059         result = ucol_strcoll(coll, strA, 3, strB, 3);
1060         if (result != UCOL_GREATER) {
1061             log_err("ERROR 1 in test 4\n");
1062         }
1063         result = ucol_strcoll(coll, strA, -1, strB, -1);
1064         if (result != UCOL_EQUAL) {
1065             log_err("ERROR 2 in test 4\n");
1066         }
1067 
1068         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1069         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1070         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1071         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1072 
1073         r = strcmp(sortKeyA, sortKeyAz);
1074         if (r <= 0) {
1075             log_err("Error 3 in test 4\n");
1076         }
1077         r = strcmp(sortKeyA, sortKeyB);
1078         if (r <= 0) {
1079             log_err("Error 4 in test 4\n");
1080         }
1081         r = strcmp(sortKeyAz, sortKeyBz);
1082         if (r != 0) {
1083             log_err("Error 5 in test 4\n");
1084         }
1085 
1086         ucol_setStrength(coll, UCOL_IDENTICAL);
1087         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1088         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1089         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1090         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1091 
1092         r = strcmp(sortKeyA, sortKeyAz);
1093         if (r <= 0) {
1094             log_err("Error 6 in test 4\n");
1095         }
1096         r = strcmp(sortKeyA, sortKeyB);
1097         if (r <= 0) {
1098             log_err("Error 7 in test 4\n");
1099         }
1100         r = strcmp(sortKeyAz, sortKeyBz);
1101         if (r != 0) {
1102             log_err("Error 8 in test 4\n");
1103         }
1104         ucol_setStrength(coll, UCOL_TERTIARY);
1105     }
1106 
1107 
1108     /*  Test 5:  Null characters in non-normal source strings.*/
1109 
1110     {
1111         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1112         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1113         char  sortKeyA[50];
1114         char  sortKeyAz[50];
1115         char  sortKeyB[50];
1116         char  sortKeyBz[50];
1117         int   r;
1118 
1119         result = ucol_strcoll(coll, strA, 6, strB, 6);
1120         if (result != UCOL_GREATER) {
1121             log_err("ERROR 1 in test 5\n");
1122         }
1123         result = ucol_strcoll(coll, strA, -1, strB, -1);
1124         if (result != UCOL_EQUAL) {
1125             log_err("ERROR 2 in test 5\n");
1126         }
1127 
1128         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1129         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1130         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1131         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1132 
1133         r = strcmp(sortKeyA, sortKeyAz);
1134         if (r <= 0) {
1135             log_err("Error 3 in test 5\n");
1136         }
1137         r = strcmp(sortKeyA, sortKeyB);
1138         if (r <= 0) {
1139             log_err("Error 4 in test 5\n");
1140         }
1141         r = strcmp(sortKeyAz, sortKeyBz);
1142         if (r != 0) {
1143             log_err("Error 5 in test 5\n");
1144         }
1145 
1146         ucol_setStrength(coll, UCOL_IDENTICAL);
1147         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1148         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1149         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1150         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1151 
1152         r = strcmp(sortKeyA, sortKeyAz);
1153         if (r <= 0) {
1154             log_err("Error 6 in test 5\n");
1155         }
1156         r = strcmp(sortKeyA, sortKeyB);
1157         if (r <= 0) {
1158             log_err("Error 7 in test 5\n");
1159         }
1160         r = strcmp(sortKeyAz, sortKeyBz);
1161         if (r != 0) {
1162             log_err("Error 8 in test 5\n");
1163         }
1164         ucol_setStrength(coll, UCOL_TERTIARY);
1165     }
1166 
1167 
1168     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1169 
1170     {
1171         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1172         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1173 
1174         result = ucol_strcoll(coll, strA, 5, strB, 5);
1175         if (result != UCOL_LESS) {
1176             log_err("Error 1 in test 6\n");
1177         }
1178         result = ucol_strcoll(coll, strA, -1, strB, -1);
1179         if (result != UCOL_EQUAL) {
1180             log_err("Error 2 in test 6\n");
1181         }
1182     }
1183 
1184     ucol_close(coll);
1185 }
1186 
1187 
1188 
1189 #if 0
1190 static void TestGetCaseBit(void) {
1191   static const char *caseBitData[] = {
1192     "a", "A", "ch", "Ch", "CH",
1193       "\\uFF9E", "\\u0009"
1194   };
1195 
1196   static const uint8_t results[] = {
1197     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1198       UCOL_UPPER_CASE, UCOL_LOWER_CASE
1199   };
1200 
1201   uint32_t i, blen = 0;
1202   UChar b[256] = {0};
1203   UErrorCode status = U_ZERO_ERROR;
1204   UCollator *UCA = ucol_open("", &status);
1205   uint8_t res = 0;
1206 
1207   for(i = 0; i<UPRV_LENGTHOF(results); i++) {
1208     blen = u_unescape(caseBitData[i], b, 256);
1209     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1210     if(results[i] != res) {
1211       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1212     }
1213   }
1214 }
1215 #endif
1216 
TestHangulTailoring(void)1217 static void TestHangulTailoring(void) {
1218     static const char *koreanData[] = {
1219         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1220             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1221             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1222             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1223             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1224             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1225     };
1226 
1227     const char *rules =
1228         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1229         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1230         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1231         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1232         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1233         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1234 
1235 
1236   UErrorCode status = U_ZERO_ERROR;
1237   UChar rlz[2048] = { 0 };
1238   uint32_t rlen = u_unescape(rules, rlz, 2048);
1239 
1240   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1241   if(status == U_FILE_ACCESS_ERROR) {
1242     log_data_err("Is your data around?\n");
1243     return;
1244   } else if(U_FAILURE(status)) {
1245     log_err("Error opening collator\n");
1246     return;
1247   }
1248 
1249   log_verbose("Using start of korean rules\n");
1250 
1251   if(U_SUCCESS(status)) {
1252     genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
1253   } else {
1254     log_err("Unable to open collator with rules %s\n", rules);
1255   }
1256 
1257   ucol_close(coll);
1258 
1259   log_verbose("Using ko__LOTUS locale\n");
1260   genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
1261 }
1262 
1263 /*
1264  * The secondary/tertiary compression middle byte
1265  * as used by the current implementation.
1266  * Subject to change as the sort key compression changes.
1267  * See class CollationKeys.
1268  */
1269 enum {
1270     SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1271     TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1272 };
1273 
TestCompressOverlap(void)1274 static void TestCompressOverlap(void) {
1275     UChar       secstr[150];
1276     UChar       tertstr[150];
1277     UErrorCode  status = U_ZERO_ERROR;
1278     UCollator  *coll;
1279     uint8_t     result[500];
1280     uint32_t    resultlen;
1281     int         count = 0;
1282     uint8_t    *tempptr;
1283 
1284     coll = ucol_open("", &status);
1285 
1286     if (U_FAILURE(status)) {
1287         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1288         return;
1289     }
1290     while (count < 149) {
1291         secstr[count] = 0x0020; /* [06, 05, 05] */
1292         tertstr[count] = 0x0020;
1293         count ++;
1294     }
1295 
1296     /* top down compression ----------------------------------- */
1297     secstr[count] = 0x0332; /* [, 87, 05] */
1298     tertstr[count] = 0x3000; /* [06, 05, 07] */
1299 
1300     /* no compression secstr should have 150 secondary bytes, tertstr should
1301     have 150 tertiary bytes.
1302     with correct compression, secstr should have 6 secondary
1303     bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1304     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1305     (void)resultlen;    /* Suppress set but not used warning. */
1306     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1307     while (*(tempptr + 1) != 1) {
1308         /* the last secondary collation element is not checked since it is not
1309         part of the compression */
1310         if (*tempptr < SEC_COMMON_MIDDLE) {
1311             log_err("Secondary top down compression overlapped\n");
1312         }
1313         tempptr ++;
1314     }
1315 
1316     /* tertiary top/bottom/common for en_US is similar to the secondary
1317     top/bottom/common */
1318     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1319     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1320     while (*(tempptr + 1) != 0) {
1321         /* the last secondary collation element is not checked since it is not
1322         part of the compression */
1323         if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1324             log_err("Tertiary top down compression overlapped\n");
1325         }
1326         tempptr ++;
1327     }
1328 
1329     /* bottom up compression ------------------------------------- */
1330     secstr[count] = 0;
1331     tertstr[count] = 0;
1332     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1333     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1334     while (*(tempptr + 1) != 1) {
1335         /* the last secondary collation element is not checked since it is not
1336         part of the compression */
1337         if (*tempptr > SEC_COMMON_MIDDLE) {
1338             log_err("Secondary bottom up compression overlapped\n");
1339         }
1340         tempptr ++;
1341     }
1342 
1343     /* tertiary top/bottom/common for en_US is similar to the secondary
1344     top/bottom/common */
1345     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1346     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1347     while (*(tempptr + 1) != 0) {
1348         /* the last secondary collation element is not checked since it is not
1349         part of the compression */
1350         if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1351             log_err("Tertiary bottom up compression overlapped\n");
1352         }
1353         tempptr ++;
1354     }
1355 
1356     ucol_close(coll);
1357 }
1358 
TestCyrillicTailoring(void)1359 static void TestCyrillicTailoring(void) {
1360   static const char *test[] = {
1361     "\\u0410b",
1362       "\\u0410\\u0306a",
1363       "\\u04d0A"
1364   };
1365 
1366     /* Russian overrides contractions, so this test is not valid anymore */
1367     /*genericLocaleStarter("ru", test, 3);*/
1368 
1369     // Most of the following are commented out because UCA 8.0
1370     // drops most of the Cyrillic contractions from the default order.
1371     // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1372 
1373     // genericLocaleStarter("root", test, 3);
1374     // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1375     // genericRulesStarter("&Z < \\u0410", test, 3);
1376     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1377     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1378     // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1379     // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1380 }
1381 
TestSuppressContractions(void)1382 static void TestSuppressContractions(void) {
1383 
1384   static const char *testNoCont2[] = {
1385       "\\u0410\\u0302a",
1386       "\\u0410\\u0306b",
1387       "\\u0410c"
1388   };
1389   static const char *testNoCont[] = {
1390       "a\\u0410",
1391       "A\\u0410\\u0306",
1392       "\\uFF21\\u0410\\u0302"
1393   };
1394 
1395   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1396   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1397 }
1398 
TestContraction(void)1399 static void TestContraction(void) {
1400     const static char *testrules[] = {
1401         "&A = AB / B",
1402         "&A = A\\u0306/\\u0306",
1403         "&c = ch / h"
1404     };
1405     const static UChar testdata[][2] = {
1406         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1407         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1408         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1409     };
1410     const static UChar testdata2[][2] = {
1411         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1412         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1413         {0x0063 /* 'c' */, 0x006C /* 'l' */}
1414     };
1415 #if 0
1416     /*
1417      * These pairs of rule strings are not guaranteed to yield the very same mappings.
1418      * In fact, LDML 24 recommends an improved way of creating mappings
1419      * which always yields different mappings for such pairs. See
1420      * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1421      */
1422     const static char *testrules3[] = {
1423         "&z < xyz &xyzw << B",
1424         "&z < xyz &xyz << B / w",
1425         "&z < ch &achm << B",
1426         "&z < ch &a << B / chm",
1427         "&\\ud800\\udc00w << B",
1428         "&\\ud800\\udc00 << B / w",
1429         "&a\\ud800\\udc00m << B",
1430         "&a << B / \\ud800\\udc00m",
1431     };
1432 #endif
1433 
1434     UErrorCode  status   = U_ZERO_ERROR;
1435     UCollator  *coll;
1436     UChar       rule[256] = {0};
1437     uint32_t    rlen     = 0;
1438     int         i;
1439 
1440     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1441         UCollationElements *iter1;
1442         int j = 0;
1443         log_verbose("Rule %s for testing\n", testrules[i]);
1444         rlen = u_unescape(testrules[i], rule, 32);
1445         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1446         if (U_FAILURE(status)) {
1447             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1448             return;
1449         }
1450         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1451         if (U_FAILURE(status)) {
1452             log_err("Collation iterator creation failed\n");
1453             return;
1454         }
1455         while (j < 2) {
1456             UCollationElements *iter2 = ucol_openElements(coll,
1457                                                          &(testdata[i][j]),
1458                                                          1, &status);
1459             int32_t ce;
1460             if (U_FAILURE(status)) {
1461                 log_err("Collation iterator creation failed\n");
1462                 return;
1463             }
1464             ce = ucol_next(iter2, &status);
1465             while (ce != UCOL_NULLORDER) {
1466                 if (ucol_next(iter1, &status) != ce) {
1467                     log_err("Collation elements in contraction split does not match\n");
1468                     return;
1469                 }
1470                 ce = ucol_next(iter2, &status);
1471             }
1472             j ++;
1473             ucol_closeElements(iter2);
1474         }
1475         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1476             log_err("Collation elements not exhausted\n");
1477             return;
1478         }
1479         ucol_closeElements(iter1);
1480         ucol_close(coll);
1481     }
1482 
1483     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1484     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1485     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1486         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1487                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1488                 testdata2[1][1]);
1489         return;
1490     }
1491     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1492         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1493                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1494                 testdata2[2][1]);
1495         return;
1496     }
1497     ucol_close(coll);
1498 #if 0  /* see above */
1499     for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
1500         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1501         UCollator          *coll1,
1502                            *coll2;
1503         UCollationElements *iter1,
1504                            *iter2;
1505         UChar               ch = 0x0042 /* 'B' */;
1506         uint32_t            ce;
1507         rlen = u_unescape(testrules3[i], rule, 32);
1508         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1509         rlen = u_unescape(testrules3[i + 1], rule, 32);
1510         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1511         if (U_FAILURE(status)) {
1512             log_err("Collator creation failed %s\n", testrules[i]);
1513             return;
1514         }
1515         iter1 = ucol_openElements(coll1, &ch, 1, &status);
1516         iter2 = ucol_openElements(coll2, &ch, 1, &status);
1517         if (U_FAILURE(status)) {
1518             log_err("Collation iterator creation failed\n");
1519             return;
1520         }
1521         ce = ucol_next(iter1, &status);
1522         if (U_FAILURE(status)) {
1523             log_err("Retrieving ces failed\n");
1524             return;
1525         }
1526         while (ce != UCOL_NULLORDER) {
1527             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1528             if (ce == ce2) {
1529                 log_verbose("CEs match: %08x\n", ce);
1530             } else {
1531                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1532                 return;
1533             }
1534             ce = ucol_next(iter1, &status);
1535             if (U_FAILURE(status)) {
1536                 log_err("Retrieving ces failed\n");
1537                 return;
1538             }
1539         }
1540         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1541             log_err("CEs not exhausted\n");
1542             return;
1543         }
1544         ucol_closeElements(iter1);
1545         ucol_closeElements(iter2);
1546         ucol_close(coll1);
1547         ucol_close(coll2);
1548     }
1549 #endif
1550 }
1551 
TestExpansion(void)1552 static void TestExpansion(void) {
1553     const static char *testrules[] = {
1554 #if 0
1555         /*
1556          * This seems to have tested that M was not mapped to an expansion.
1557          * I believe the old builder just did that because it computed the extension CEs
1558          * at the very end, which was a bug.
1559          * Among other problems, it violated the core tailoring principle
1560          * by making an earlier rule depend on a later one.
1561          * And, of course, if M did not get an expansion, then it was primary different from K,
1562          * unlike what the rule &K<<M says.
1563          */
1564         "&J << K / B & K << M",
1565 #endif
1566         "&J << K / B << M"
1567     };
1568     const static UChar testdata[][3] = {
1569         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1570         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1571         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1572         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1573         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1574         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1575     };
1576 
1577     UErrorCode  status   = U_ZERO_ERROR;
1578     UCollator  *coll;
1579     UChar       rule[256] = {0};
1580     uint32_t    rlen     = 0;
1581     int         i;
1582 
1583     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1584         int j = 0;
1585         log_verbose("Rule %s for testing\n", testrules[i]);
1586         rlen = u_unescape(testrules[i], rule, 32);
1587         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1588         if (U_FAILURE(status)) {
1589             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1590             return;
1591         }
1592 
1593         for (j = 0; j < 5; j ++) {
1594             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1595         }
1596         ucol_close(coll);
1597     }
1598 }
1599 
1600 #if 0
1601 /* this test tests the current limitations of the engine */
1602 /* it always fail, so it is disabled by default */
1603 static void TestLimitations(void) {
1604   /* recursive expansions */
1605   {
1606     static const char *rule = "&a=b/c&d=c/e";
1607     static const char *tlimit01[] = {"add","b","adf"};
1608     static const char *tlimit02[] = {"aa","b","af"};
1609     log_verbose("recursive expansions\n");
1610     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1611     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1612   }
1613   /* contractions spanning expansions */
1614   {
1615     static const char *rule = "&a<<<c/e&g<<<eh";
1616     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1617     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1618     log_verbose("contractions spanning expansions\n");
1619     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1620     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1621   }
1622   /* normalization: nulls in contractions */
1623   {
1624     static const char *rule = "&a<<<\\u0000\\u0302";
1625     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1626     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1627     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1628     static const UColAttributeValue valOn[] = { UCOL_ON };
1629     static const UColAttributeValue valOff[] = { UCOL_OFF };
1630 
1631     log_verbose("NULL in contractions\n");
1632     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1633     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1634     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1635     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1636 
1637   }
1638   /* normalization: contractions spanning normalization */
1639   {
1640     static const char *rule = "&a<<<\\u0000\\u0302";
1641     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1642     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1643     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1644     static const UColAttributeValue valOn[] = { UCOL_ON };
1645     static const UColAttributeValue valOff[] = { UCOL_OFF };
1646 
1647     log_verbose("contractions spanning normalization\n");
1648     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1649     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1650     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1651     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1652 
1653   }
1654   /* variable top:  */
1655   {
1656     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1657     static const char *rule = "&\\u2010<x<[variable top]=z";
1658     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1659     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1660     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1661     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1662     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1663     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1664     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1665 
1666     log_verbose("variable top\n");
1667     genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1668     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1669     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1670     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1671     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
1672 
1673   }
1674   /* case level */
1675   {
1676     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1677     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1678     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1679     static const UColAttribute att[] = { UCOL_CASE_FIRST};
1680     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1681     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1682     log_verbose("case level\n");
1683     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1684     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1685     /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1686     /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1687   }
1688 
1689 }
1690 #endif
1691 
TestBocsuCoverage(void)1692 static void TestBocsuCoverage(void) {
1693   UErrorCode status = U_ZERO_ERROR;
1694   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1695   UChar       test[256] = {0};
1696   uint32_t    tlen     = u_unescape(testString, test, 32);
1697   uint8_t key[256]     = {0};
1698   uint32_t klen         = 0;
1699 
1700   UCollator *coll = ucol_open("", &status);
1701   if(U_SUCCESS(status)) {
1702   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1703 
1704   klen = ucol_getSortKey(coll, test, tlen, key, 256);
1705   (void)klen;    /* Suppress set but not used warning. */
1706 
1707   ucol_close(coll);
1708   } else {
1709     log_data_err("Couldn't open UCA\n");
1710   }
1711 }
1712 
TestVariableTopSetting(void)1713 static void TestVariableTopSetting(void) {
1714   UErrorCode status = U_ZERO_ERROR;
1715   uint32_t varTopOriginal = 0, varTop1, varTop2;
1716   UCollator *coll = ucol_open("", &status);
1717   if(U_SUCCESS(status)) {
1718 
1719   static const UChar nul = 0;
1720   static const UChar space = 0x20;
1721   static const UChar dot = 0x2e;  /* punctuation */
1722   static const UChar degree = 0xb0;  /* symbol */
1723   static const UChar dollar = 0x24;  /* currency symbol */
1724   static const UChar zero = 0x30;  /* digit */
1725 
1726   varTopOriginal = ucol_getVariableTop(coll, &status);
1727   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1728   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1729 
1730   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1731   varTop2 = ucol_getVariableTop(coll, &status);
1732   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1733   if(U_FAILURE(status) || varTop1 != varTop2 ||
1734       !ucol_equal(coll, &nul, 0, &space, 1) ||
1735       ucol_equal(coll, &nul, 0, &dot, 1) ||
1736       ucol_equal(coll, &nul, 0, &degree, 1) ||
1737       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1738       ucol_equal(coll, &nul, 0, &zero, 1) ||
1739       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1740     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1741   }
1742 
1743   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1744   varTop2 = ucol_getVariableTop(coll, &status);
1745   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1746   if(U_FAILURE(status) || varTop1 != varTop2 ||
1747       !ucol_equal(coll, &nul, 0, &space, 1) ||
1748       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1749       ucol_equal(coll, &nul, 0, &degree, 1) ||
1750       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1751       ucol_equal(coll, &nul, 0, &zero, 1) ||
1752       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1753     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1754   }
1755 
1756   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1757   varTop2 = ucol_getVariableTop(coll, &status);
1758   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1759   if(U_FAILURE(status) || varTop1 != varTop2 ||
1760       !ucol_equal(coll, &nul, 0, &space, 1) ||
1761       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1762       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1763       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1764       ucol_equal(coll, &nul, 0, &zero, 1) ||
1765       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1766     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1767   }
1768 
1769   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1770   varTop2 = ucol_getVariableTop(coll, &status);
1771   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1772   if(U_FAILURE(status) || varTop1 != varTop2 ||
1773       !ucol_equal(coll, &nul, 0, &space, 1) ||
1774       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1775       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1776       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1777       ucol_equal(coll, &nul, 0, &zero, 1) ||
1778       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1779     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1780   }
1781 
1782   log_verbose("Testing setting variable top to contractions\n");
1783   {
1784     UChar first[4] = { 0 };
1785     first[0] = 0x0040;
1786     first[1] = 0x0050;
1787     first[2] = 0x0000;
1788 
1789     status = U_ZERO_ERROR;
1790     ucol_setVariableTop(coll, first, -1, &status);
1791 
1792     if(U_SUCCESS(status)) {
1793       log_err("Invalid contraction succeded in setting variable top!\n");
1794     }
1795 
1796   }
1797 
1798   log_verbose("Test restoring variable top\n");
1799 
1800   status = U_ZERO_ERROR;
1801   ucol_restoreVariableTop(coll, varTopOriginal, &status);
1802   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1803     log_err("Couldn't restore old variable top\n");
1804   }
1805 
1806   log_verbose("Testing calling with error set\n");
1807 
1808   status = U_INTERNAL_PROGRAM_ERROR;
1809   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1810   varTop2 = ucol_getVariableTop(coll, &status);
1811   ucol_restoreVariableTop(coll, varTop2, &status);
1812   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1813   varTop2 = ucol_getVariableTop(NULL, &status);
1814   ucol_restoreVariableTop(NULL, varTop2, &status);
1815   if(status != U_INTERNAL_PROGRAM_ERROR) {
1816     log_err("Bad reaction to passed error!\n");
1817   }
1818   ucol_close(coll);
1819   } else {
1820     log_data_err("Couldn't open UCA collator\n");
1821   }
1822 }
1823 
TestMaxVariable(void)1824 static void TestMaxVariable(void) {
1825   UErrorCode status = U_ZERO_ERROR;
1826   UColReorderCode oldMax, max;
1827   UCollator *coll;
1828 
1829   static const UChar nul = 0;
1830   static const UChar space = 0x20;
1831   static const UChar dot = 0x2e;  /* punctuation */
1832   static const UChar degree = 0xb0;  /* symbol */
1833   static const UChar dollar = 0x24;  /* currency symbol */
1834   static const UChar zero = 0x30;  /* digit */
1835 
1836   coll = ucol_open("", &status);
1837   if(U_FAILURE(status)) {
1838     log_data_err("Couldn't open root collator\n");
1839     return;
1840   }
1841 
1842   oldMax = ucol_getMaxVariable(coll);
1843   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1844   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1845 
1846   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1847   max = ucol_getMaxVariable(coll);
1848   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1849   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1850       !ucol_equal(coll, &nul, 0, &space, 1) ||
1851       ucol_equal(coll, &nul, 0, &dot, 1) ||
1852       ucol_equal(coll, &nul, 0, &degree, 1) ||
1853       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1854       ucol_equal(coll, &nul, 0, &zero, 1) ||
1855       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1856     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1857   }
1858 
1859   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1860   max = ucol_getMaxVariable(coll);
1861   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1862   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1863       !ucol_equal(coll, &nul, 0, &space, 1) ||
1864       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1865       ucol_equal(coll, &nul, 0, &degree, 1) ||
1866       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1867       ucol_equal(coll, &nul, 0, &zero, 1) ||
1868       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1869     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1870   }
1871 
1872   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1873   max = ucol_getMaxVariable(coll);
1874   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1875   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1876       !ucol_equal(coll, &nul, 0, &space, 1) ||
1877       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1878       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1879       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1880       ucol_equal(coll, &nul, 0, &zero, 1) ||
1881       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1882     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1883   }
1884 
1885   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1886   max = ucol_getMaxVariable(coll);
1887   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1888   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1889       !ucol_equal(coll, &nul, 0, &space, 1) ||
1890       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1891       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1892       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1893       ucol_equal(coll, &nul, 0, &zero, 1) ||
1894       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1895     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1896   }
1897 
1898   log_verbose("Test restoring maxVariable\n");
1899   status = U_ZERO_ERROR;
1900   ucol_setMaxVariable(coll, oldMax, &status);
1901   if(oldMax != ucol_getMaxVariable(coll)) {
1902     log_err("Couldn't restore old maxVariable\n");
1903   }
1904 
1905   log_verbose("Testing calling with error set\n");
1906   status = U_INTERNAL_PROGRAM_ERROR;
1907   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1908   max = ucol_getMaxVariable(coll);
1909   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1910     log_err("Bad reaction to passed error!\n");
1911   }
1912   ucol_close(coll);
1913 }
1914 
TestNonChars(void)1915 static void TestNonChars(void) {
1916   static const char *test[] = {
1917       "\\u0000",  /* ignorable */
1918       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1919       "\\uFDD0", "\\uFDEF",
1920       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1921       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1922       "\\U0003FFFE", "\\U0003FFFF",
1923       "\\U0004FFFE", "\\U0004FFFF",
1924       "\\U0005FFFE", "\\U0005FFFF",
1925       "\\U0006FFFE", "\\U0006FFFF",
1926       "\\U0007FFFE", "\\U0007FFFF",
1927       "\\U0008FFFE", "\\U0008FFFF",
1928       "\\U0009FFFE", "\\U0009FFFF",
1929       "\\U000AFFFE", "\\U000AFFFF",
1930       "\\U000BFFFE", "\\U000BFFFF",
1931       "\\U000CFFFE", "\\U000CFFFF",
1932       "\\U000DFFFE", "\\U000DFFFF",
1933       "\\U000EFFFE", "\\U000EFFFF",
1934       "\\U000FFFFE", "\\U000FFFFF",
1935       "\\U0010FFFE", "\\U0010FFFF",
1936       "\\uFFFF"  /* special character with maximum primary weight */
1937   };
1938   UErrorCode status = U_ZERO_ERROR;
1939   UCollator *coll = ucol_open("en_US", &status);
1940 
1941   log_verbose("Test non characters\n");
1942 
1943   if(U_SUCCESS(status)) {
1944     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1945   } else {
1946     log_err_status(status, "Unable to open collator\n");
1947   }
1948 
1949   ucol_close(coll);
1950 }
1951 
TestExtremeCompression(void)1952 static void TestExtremeCompression(void) {
1953   static char *test[4];
1954   int32_t j = 0, i = 0;
1955 
1956   for(i = 0; i<4; i++) {
1957     test[i] = (char *)malloc(2048*sizeof(char));
1958   }
1959 
1960   for(j = 20; j < 500; j++) {
1961     for(i = 0; i<4; i++) {
1962       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1963       test[i][j-1] = (char)('a'+i);
1964       test[i][j] = 0;
1965     }
1966     genericLocaleStarter("en_US", (const char **)test, 4);
1967   }
1968 
1969 
1970   for(i = 0; i<4; i++) {
1971     free(test[i]);
1972   }
1973 }
1974 
1975 #if 0
1976 static void TestExtremeCompression(void) {
1977   static char *test[4];
1978   int32_t j = 0, i = 0;
1979   UErrorCode status = U_ZERO_ERROR;
1980   UCollator *coll = ucol_open("en_US", status);
1981   for(i = 0; i<4; i++) {
1982     test[i] = (char *)malloc(2048*sizeof(char));
1983   }
1984   for(j = 10; j < 2048; j++) {
1985     for(i = 0; i<4; i++) {
1986       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1987       test[i][j-1] = (char)('a'+i);
1988       test[i][j] = 0;
1989     }
1990   }
1991   genericLocaleStarter("en_US", (const char **)test, 4);
1992 
1993   for(j = 10; j < 2048; j++) {
1994     for(i = 0; i<1; i++) {
1995       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1996       test[i][j] = 0;
1997     }
1998   }
1999   for(i = 0; i<4; i++) {
2000     free(test[i]);
2001   }
2002 }
2003 #endif
2004 
TestSurrogates(void)2005 static void TestSurrogates(void) {
2006   static const char *test[] = {
2007     "z","\\ud900\\udc25",  "\\ud805\\udc50",
2008        "\\ud800\\udc00y",  "\\ud800\\udc00r",
2009        "\\ud800\\udc00f",  "\\ud800\\udc00",
2010        "\\ud800\\udc00c", "\\ud800\\udc00b",
2011        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2012        "\\ud800\\udc00a",
2013        "c", "b"
2014   };
2015 
2016   static const char *rule =
2017     "&z < \\ud900\\udc25   < \\ud805\\udc50"
2018        "< \\ud800\\udc00y  < \\ud800\\udc00r"
2019        "< \\ud800\\udc00f  << \\ud800\\udc00"
2020        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2021        "< \\ud800\\udc00a  < c < b" ;
2022 
2023   genericRulesStarter(rule, test, 14);
2024 }
2025 
2026 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)2027 static void TestPrefix(void) {
2028   uint32_t i;
2029 
2030   static const struct {
2031     const char *rules;
2032     const char *data[50];
2033     const uint32_t len;
2034   } tests[] = {
2035     { "&z <<< z|a",
2036       {"zz", "za"}, 2 },
2037 
2038     { "&z <<< z|   a",
2039       {"zz", "za"}, 2 },
2040     { "[strength I]"
2041       "&a=\\ud900\\udc25"
2042       "&z<<<\\ud900\\udc25|a",
2043       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2044   };
2045 
2046 
2047   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2048     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2049   }
2050 }
2051 
2052 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2053 /* JIS X 4061 collation order implementation                                   */
TestNewJapanese(void)2054 static void TestNewJapanese(void) {
2055 
2056   static const char * const test1[] = {
2057       "\\u30b7\\u30e3\\u30fc\\u30ec",
2058       "\\u30b7\\u30e3\\u30a4",
2059       "\\u30b7\\u30e4\\u30a3",
2060       "\\u30b7\\u30e3\\u30ec",
2061       "\\u3061\\u3087\\u3053",
2062       "\\u3061\\u3088\\u3053",
2063       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2064       "\\u3066\\u30fc\\u305f",
2065       "\\u30c6\\u30fc\\u30bf",
2066       "\\u30c6\\u30a7\\u30bf",
2067       "\\u3066\\u3048\\u305f",
2068       "\\u3067\\u30fc\\u305f",
2069       "\\u30c7\\u30fc\\u30bf",
2070       "\\u30c7\\u30a7\\u30bf",
2071       "\\u3067\\u3048\\u305f",
2072       "\\u3066\\u30fc\\u305f\\u30fc",
2073       "\\u30c6\\u30fc\\u30bf\\u30a1",
2074       "\\u30c6\\u30a7\\u30bf\\u30fc",
2075       "\\u3066\\u3047\\u305f\\u3041",
2076       "\\u3066\\u3048\\u305f\\u30fc",
2077       "\\u3067\\u30fc\\u305f\\u30fc",
2078       "\\u30c7\\u30fc\\u30bf\\u30a1",
2079       "\\u3067\\u30a7\\u305f\\u30a1",
2080       "\\u30c7\\u3047\\u30bf\\u3041",
2081       "\\u30c7\\u30a8\\u30bf\\u30a2",
2082       "\\u3072\\u3086",
2083       "\\u3073\\u3085\\u3042",
2084       "\\u3074\\u3085\\u3042",
2085       "\\u3073\\u3085\\u3042\\u30fc",
2086       "\\u30d3\\u30e5\\u30a2\\u30fc",
2087       "\\u3074\\u3085\\u3042\\u30fc",
2088       "\\u30d4\\u30e5\\u30a2\\u30fc",
2089       "\\u30d2\\u30e5\\u30a6",
2090       "\\u30d2\\u30e6\\u30a6",
2091       "\\u30d4\\u30e5\\u30a6\\u30a2",
2092       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2093       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2094       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2095       "\\u3072\\u3085\\u3093",
2096       "\\u3074\\u3085\\u3093",
2097       "\\u3075\\u30fc\\u308a",
2098       "\\u30d5\\u30fc\\u30ea",
2099       "\\u3075\\u3045\\u308a",
2100       "\\u3075\\u30a5\\u308a",
2101       "\\u3075\\u30a5\\u30ea",
2102       "\\u30d5\\u30a6\\u30ea",
2103       "\\u3076\\u30fc\\u308a",
2104       "\\u30d6\\u30fc\\u30ea",
2105       "\\u3076\\u3045\\u308a",
2106       "\\u30d6\\u30a5\\u308a",
2107       "\\u3077\\u3046\\u308a",
2108       "\\u30d7\\u30a6\\u30ea",
2109       "\\u3075\\u30fc\\u308a\\u30fc",
2110       "\\u30d5\\u30a5\\u30ea\\u30fc",
2111       "\\u3075\\u30a5\\u308a\\u30a3",
2112       "\\u30d5\\u3045\\u308a\\u3043",
2113       "\\u30d5\\u30a6\\u30ea\\u30fc",
2114       "\\u3075\\u3046\\u308a\\u3043",
2115       "\\u30d6\\u30a6\\u30ea\\u30a4",
2116       "\\u3077\\u30fc\\u308a\\u30fc",
2117       "\\u3077\\u30a5\\u308a\\u30a4",
2118       "\\u3077\\u3046\\u308a\\u30fc",
2119       "\\u30d7\\u30a6\\u30ea\\u30a4",
2120       "\\u30d5\\u30fd",
2121       "\\u3075\\u309e",
2122       "\\u3076\\u309d",
2123       "\\u3076\\u3075",
2124       "\\u3076\\u30d5",
2125       "\\u30d6\\u3075",
2126       "\\u30d6\\u30d5",
2127       "\\u3076\\u309e",
2128       "\\u3076\\u3077",
2129       "\\u30d6\\u3077",
2130       "\\u3077\\u309d",
2131       "\\u30d7\\u30fd",
2132       "\\u3077\\u3075",
2133 };
2134 
2135   static const char *test2[] = {
2136     "\\u306f\\u309d", /* H\\u309d */
2137     "\\u30cf\\u30fd", /* K\\u30fd */
2138     "\\u306f\\u306f", /* HH */
2139     "\\u306f\\u30cf", /* HK */
2140     "\\u30cf\\u30cf", /* KK */
2141     "\\u306f\\u309e", /* H\\u309e */
2142     "\\u30cf\\u30fe", /* K\\u30fe */
2143     "\\u306f\\u3070", /* HH\\u309b */
2144     "\\u30cf\\u30d0", /* KK\\u309b */
2145     "\\u306f\\u3071", /* HH\\u309c */
2146     "\\u30cf\\u3071", /* KH\\u309c */
2147     "\\u30cf\\u30d1", /* KK\\u309c */
2148     "\\u3070\\u309d", /* H\\u309b\\u309d */
2149     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2150     "\\u3070\\u306f", /* H\\u309bH */
2151     "\\u30d0\\u30cf", /* K\\u309bK */
2152     "\\u3070\\u309e", /* H\\u309b\\u309e */
2153     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2154     "\\u3070\\u3070", /* H\\u309bH\\u309b */
2155     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2156     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2157     "\\u3070\\u3071", /* H\\u309bH\\u309c */
2158     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2159     "\\u3071\\u309d", /* H\\u309c\\u309d */
2160     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2161     "\\u3071\\u306f", /* H\\u309cH */
2162     "\\u30d1\\u30cf", /* K\\u309cK */
2163     "\\u3071\\u3070", /* H\\u309cH\\u309b */
2164     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2165     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2166     "\\u3071\\u3071", /* H\\u309cH\\u309c */
2167     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2168   };
2169   /*
2170   static const char *test3[] = {
2171     "\\u221er\\u221e",
2172     "\\u221eR#",
2173     "\\u221et\\u221e",
2174     "#r\\u221e",
2175     "#R#",
2176     "#t%",
2177     "#T%",
2178     "8t\\u221e",
2179     "8T\\u221e",
2180     "8t#",
2181     "8T#",
2182     "8t%",
2183     "8T%",
2184     "8t8",
2185     "8T8",
2186     "\\u03c9r\\u221e",
2187     "\\u03a9R%",
2188     "rr\\u221e",
2189     "rR\\u221e",
2190     "Rr\\u221e",
2191     "RR\\u221e",
2192     "RT%",
2193     "rt8",
2194     "tr\\u221e",
2195     "tr8",
2196     "TR8",
2197     "tt8",
2198     "\\u30b7\\u30e3\\u30fc\\u30ec",
2199   };
2200   */
2201   static const UColAttribute att[] = { UCOL_STRENGTH };
2202   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2203 
2204   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2205   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2206 
2207   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2208   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2209   /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2210   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2211   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
2212 }
2213 
TestStrCollIdenticalPrefix(void)2214 static void TestStrCollIdenticalPrefix(void) {
2215   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2216   const char* test[] = {
2217     "ab\\ud9b0\\udc70",
2218     "ab\\ud9b0\\udc71"
2219   };
2220   genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
2221 }
2222 /* Contractions should have all their canonically equivalent */
2223 /* strings included */
TestContractionClosure(void)2224 static void TestContractionClosure(void) {
2225   static const struct {
2226     const char *rules;
2227     const char *data[10];
2228     const uint32_t len;
2229   } tests[] = {
2230     {   "&b=\\u00e4\\u00e4",
2231       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2232     {   "&b=\\u00C5",
2233       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2234   };
2235   uint32_t i;
2236 
2237 
2238   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2239     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2240   }
2241 }
2242 
2243 /* This tests also fails*/
TestBeforePrefixFailure(void)2244 static void TestBeforePrefixFailure(void) {
2245   static const struct {
2246     const char *rules;
2247     const char *data[10];
2248     const uint32_t len;
2249   } tests[] = {
2250     { "&g <<< a"
2251       "&[before 3]\\uff41 <<< x",
2252       {"x", "\\uff41"}, 2 },
2253     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2254         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2255         "&[before 3]\\u30a7<<<\\u30a9",
2256       {"\\u30a9", "\\u30a7"}, 2 },
2257     {   "&[before 3]\\u30a7<<<\\u30a9"
2258         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2259         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2260       {"\\u30a9", "\\u30a7"}, 2 },
2261   };
2262   uint32_t i;
2263 
2264 
2265   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2266     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2267   }
2268 
2269 #if 0
2270   const char* rule1 =
2271         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2272         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2273         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2274   const char* rule2 =
2275         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2276         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2277         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2278   const char* test[] = {
2279       "\\u30c6\\u30fc\\u30bf",
2280       "\\u30c6\\u30a7\\u30bf",
2281   };
2282   genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2283   genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
2284 /* this piece of code should be in some sort of verbose mode     */
2285 /* it gets the collation elements for elements and prints them   */
2286 /* This is useful when trying to see whether the problem is      */
2287   {
2288     UErrorCode status = U_ZERO_ERROR;
2289     uint32_t i = 0;
2290     UCollationElements *it = NULL;
2291     uint32_t CE;
2292     UChar string[256];
2293     uint32_t uStringLen;
2294     UCollator *coll = NULL;
2295 
2296     uStringLen = u_unescape(rule1, string, 256);
2297 
2298     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2299 
2300     /*coll = ucol_open("ja_JP_JIS", &status);*/
2301     it = ucol_openElements(coll, string, 0, &status);
2302 
2303     for(i = 0; i < UPRV_LENGTHOF(test); i++) {
2304       log_verbose("%s\n", test[i]);
2305       uStringLen = u_unescape(test[i], string, 256);
2306       ucol_setText(it, string, uStringLen, &status);
2307 
2308       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2309         log_verbose("%08X\n", CE);
2310       }
2311       log_verbose("\n");
2312 
2313     }
2314 
2315     ucol_closeElements(it);
2316     ucol_close(coll);
2317   }
2318 #endif
2319 }
2320 
TestPrefixCompose(void)2321 static void TestPrefixCompose(void) {
2322   const char* rule1 =
2323         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2324   /*
2325   const char* test[] = {
2326       "\\u30c6\\u30fc\\u30bf",
2327       "\\u30c6\\u30a7\\u30bf",
2328   };
2329   */
2330   {
2331     UErrorCode status = U_ZERO_ERROR;
2332     /*uint32_t i = 0;*/
2333     /*UCollationElements *it = NULL;*/
2334 /*    uint32_t CE;*/
2335     UChar string[256];
2336     uint32_t uStringLen;
2337     UCollator *coll = NULL;
2338 
2339     uStringLen = u_unescape(rule1, string, 256);
2340 
2341     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2342     ucol_close(coll);
2343   }
2344 
2345 
2346 }
2347 
2348 /*
2349 [last variable] last variable value
2350 [last primary ignorable] largest CE for primary ignorable
2351 [last secondary ignorable] largest CE for secondary ignorable
2352 [last tertiary ignorable] largest CE for tertiary ignorable
2353 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2354 */
2355 
TestRuleOptions(void)2356 static void TestRuleOptions(void) {
2357   /* values here are hardcoded and are correct for the current UCA
2358    * when the UCA changes, one might be forced to change these
2359    * values.
2360    */
2361 
2362   /*
2363    * These strings contain the last character before [variable top]
2364    * and the first and second characters (by primary weights) after it.
2365    * See FractionalUCA.txt. For example:
2366       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2367       [variable top = 0C FE]
2368       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2369      and
2370       00B4; [0D 0C, 05, 05]
2371    *
2372    * Note: Starting with UCA 6.0, the [variable top] collation element
2373    * is not the weight of any character or string,
2374    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2375    */
2376 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2377 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2378 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2379 
2380   /*
2381    * This string has to match the character that has the [last regular] weight
2382    * which changes with each UCA version.
2383    * See the bottom of FractionalUCA.txt which says something like
2384       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2385    *
2386    * Note: Starting with UCA 6.0, the [last regular] collation element
2387    * is not the weight of any character or string,
2388    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2389    */
2390 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2391 
2392   static const struct {
2393     const char *rules;
2394     const char *data[10];
2395     const uint32_t len;
2396   } tests[] = {
2397 #if 0
2398     /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2399     /* - all befores here amount to zero */
2400     { "&[before 3][first tertiary ignorable]<<<a",
2401         { "\\u0000", "a"}, 2
2402     }, /* you cannot go before first tertiary ignorable */
2403 
2404     { "&[before 3][last tertiary ignorable]<<<a",
2405         { "\\u0000", "a"}, 2
2406     }, /* you cannot go before last tertiary ignorable */
2407 #endif
2408     /*
2409      * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2410      * and it *is* possible to "go before" that.
2411      */
2412     { "&[before 3][first secondary ignorable]<<<a",
2413         { "\\u0000", "a"}, 2
2414     },
2415 
2416     { "&[before 3][last secondary ignorable]<<<a",
2417         { "\\u0000", "a"}, 2
2418     },
2419 
2420     /* 'normal' befores */
2421 
2422     /*
2423      * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2424      * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2425      * because there is no tailoring space before that boundary.
2426      * Made the tests work by tailoring to a space instead.
2427      */
2428     { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2429         {  "c", "b", "\\u0332", "a" }, 4
2430     },
2431 
2432     /* we don't have a code point that corresponds to
2433      * the last primary ignorable
2434      */
2435     { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2436         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2437     },
2438 
2439     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2440         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2441     },
2442 
2443     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2444         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2445     },
2446 
2447     { "&[first regular]<a"
2448       "&[before 1][first regular]<b",
2449       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2450     },
2451 
2452     { "&[before 1][last regular]<b"
2453       "&[last regular]<a",
2454         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2455     },
2456 
2457     { "&[before 1][first implicit]<b"
2458       "&[first implicit]<a",
2459         { "b", "\\u4e00", "a", "\\u4e01"}, 4
2460     },
2461 #if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2462     { "&[before 1][last implicit]<b"
2463       "&[last implicit]<a",
2464         { "b", "\\U0010FFFD", "a" }, 3
2465     },
2466 #endif
2467     { "&[last variable]<z"
2468       "&' '<x"  /* was &[last primary ignorable]<x, see above */
2469       "&[last secondary ignorable]<<y"
2470       "&[last tertiary ignorable]<<<w"
2471       "&[top]<u",
2472       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2473     }
2474 
2475   };
2476   uint32_t i;
2477 
2478   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2479     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2480   }
2481 }
2482 
2483 
TestOptimize(void)2484 static void TestOptimize(void) {
2485   /* this is not really a test - just trying out
2486    * whether copying of UCA contents will fail
2487    * Cannot really test, since the functionality
2488    * remains the same.
2489    */
2490   static const struct {
2491     const char *rules;
2492     const char *data[10];
2493     const uint32_t len;
2494   } tests[] = {
2495     /* - all befores here amount to zero */
2496     { "[optimize [\\uAC00-\\uD7FF]]",
2497     { "a", "b"}, 2}
2498   };
2499   uint32_t i;
2500 
2501   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2502     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2503   }
2504 }
2505 
2506 /*
2507 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2508 weiv    ucol_strcollIter?
2509 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2510 weiv    these are the input strings?
2511 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2512 weiv    will check - could be a problem with utf-8 iterator
2513 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2514 weiv    hmmm
2515 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2516 weiv    that doesn't sound right
2517 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2518 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2519 cycheng@ca.ibm.c... yes
2520 weiv    and then do the comparison
2521 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2522 weiv    utf-16 strings look like a little endian ones in the example you sent me
2523 weiv    It could be a bug - let me try to test it out
2524 cycheng@ca.ibm.c... ok
2525 cycheng@ca.ibm.c... we can wait till the conf. call
2526 cycheng@ca.ibm.c... next weke
2527 weiv    that would be great
2528 weiv    hmmm
2529 weiv    I might be wrong
2530 weiv    let me play with it some more
2531 cycheng@ca.ibm.c... ok
2532 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2533 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2534 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2535 weiv    ok
2536 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2537 weiv    thanks
2538 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2539 */
2540 #if 0
2541 static void Alexis(void) {
2542   UErrorCode status = U_ZERO_ERROR;
2543   UCollator *coll = ucol_open("", &status);
2544 
2545 
2546   const char utf16be[2][4] = {
2547     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2548     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2549   };
2550 
2551   const char utf8[2][4] = {
2552     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2553     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2554   };
2555 
2556   UCharIterator iterU161, iterU162;
2557   UCharIterator iterU81, iterU82;
2558 
2559   UCollationResult resU16, resU8;
2560 
2561   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2562   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2563 
2564   uiter_setUTF8(&iterU81, utf8[0], 4);
2565   uiter_setUTF8(&iterU82, utf8[1], 4);
2566 
2567   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2568 
2569   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2570   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2571 
2572 
2573   if(resU16 != resU8) {
2574     log_err("different results\n");
2575   }
2576 
2577   ucol_close(coll);
2578 }
2579 #endif
2580 
2581 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)2582 static void Alexis2(void) {
2583   UErrorCode status = U_ZERO_ERROR;
2584   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2585   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2586   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2587   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2588 
2589   UConverter *conv = NULL;
2590 
2591   UCharIterator U16BEItS, U16BEItT;
2592   UCharIterator U8ItS, U8ItT;
2593 
2594   UCollationResult resU16, resU16BE, resU8;
2595 
2596   static const char* const pairs[][2] = {
2597     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2598     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2599     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2600     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2601     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2602     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2603     { "\\u0020", "\\u0020\\u0000"}
2604 /*
2605 5F20 (my result here)
2606 5F204E008E3F
2607 5F20 (your result here)
2608 */
2609   };
2610 
2611   int32_t i = 0;
2612 
2613   UCollator *coll = ucol_open("", &status);
2614   if(status == U_FILE_ACCESS_ERROR) {
2615     log_data_err("Is your data around?\n");
2616     return;
2617   } else if(U_FAILURE(status)) {
2618     log_err("Error opening collator\n");
2619     return;
2620   }
2621   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2622   conv = ucnv_open("UTF16BE", &status);
2623   for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
2624     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2625     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2626 
2627     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2628 
2629     log_verbose("Result of strcoll is %i\n", resU16);
2630 
2631     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2632     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2633     (void)U16BELenS;    /* Suppress set but not used warnings. */
2634     (void)U16BELenT;
2635 
2636     /* use the original sizes, as the result from converter is in bytes */
2637     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2638     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2639 
2640     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2641 
2642     log_verbose("Result of U16BE is %i\n", resU16BE);
2643 
2644     if(resU16 != resU16BE) {
2645       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2646     }
2647 
2648     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2649     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2650 
2651     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2652     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2653 
2654     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2655 
2656     if(resU16 != resU8) {
2657       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2658     }
2659 
2660   }
2661 
2662   ucol_close(coll);
2663   ucnv_close(conv);
2664 }
2665 
TestHebrewUCA(void)2666 static void TestHebrewUCA(void) {
2667   UErrorCode status = U_ZERO_ERROR;
2668   static const char *first[] = {
2669     "d790d6b8d79cd795d6bcd7a9",
2670     "d790d79cd79ed7a7d799d799d7a1",
2671     "d790d6b4d79ed795d6bcd7a9",
2672   };
2673 
2674   char utf8String[3][256];
2675   UChar utf16String[3][256];
2676 
2677   int32_t i = 0, j = 0;
2678   int32_t sizeUTF8[3];
2679   int32_t sizeUTF16[3];
2680 
2681   UCollator *coll = ucol_open("", &status);
2682   if (U_FAILURE(status)) {
2683       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2684       return;
2685   }
2686   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2687 
2688   for(i = 0; i < UPRV_LENGTHOF(first); i++) {
2689     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2690     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2691     log_verbose("%i: ");
2692     for(j = 0; j < sizeUTF16[i]; j++) {
2693       /*log_verbose("\\u%04X", utf16String[i][j]);*/
2694       log_verbose("%04X", utf16String[i][j]);
2695     }
2696     log_verbose("\n");
2697   }
2698   for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2699     for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
2700       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2701     }
2702   }
2703 
2704   ucol_close(coll);
2705 
2706 }
2707 
TestPartialSortKeyTermination(void)2708 static void TestPartialSortKeyTermination(void) {
2709   static const char* cases[] = {
2710     "\\u1234\\u1234\\udc00",
2711     "\\udc00\\ud800\\ud800"
2712   };
2713 
2714   int32_t i;
2715 
2716   UErrorCode status = U_ZERO_ERROR;
2717 
2718   UCollator *coll = ucol_open("", &status);
2719 
2720   UCharIterator iter;
2721 
2722   UChar currCase[256];
2723   int32_t length = 0;
2724   int32_t pKeyLen = 0;
2725 
2726   uint8_t key[256];
2727 
2728   for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
2729     uint32_t state[2] = {0, 0};
2730     length = u_unescape(cases[i], currCase, 256);
2731     uiter_setString(&iter, currCase, length);
2732     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2733     (void)pKeyLen;   /* Suppress set but not used warning. */
2734 
2735     log_verbose("Done\n");
2736 
2737   }
2738   ucol_close(coll);
2739 }
2740 
TestSettings(void)2741 static void TestSettings(void) {
2742   static const char* cases[] = {
2743     "apple",
2744       "Apple"
2745   };
2746 
2747   static const char* locales[] = {
2748     "",
2749       "en"
2750   };
2751 
2752   UErrorCode status = U_ZERO_ERROR;
2753 
2754   int32_t i = 0, j = 0;
2755 
2756   UChar source[256], target[256];
2757   int32_t sLen = 0, tLen = 0;
2758 
2759   UCollator *collateObject = NULL;
2760   for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
2761     collateObject = ucol_open(locales[i], &status);
2762     ucol_setStrength(collateObject, UCOL_PRIMARY);
2763     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2764     for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
2765       sLen = u_unescape(cases[j-1], source, 256);
2766       source[sLen] = 0;
2767       tLen = u_unescape(cases[j], target, 256);
2768       source[tLen] = 0;
2769       doTest(collateObject, source, target, UCOL_EQUAL);
2770     }
2771     ucol_close(collateObject);
2772   }
2773 }
2774 
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)2775 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2776     UErrorCode status = U_ZERO_ERROR;
2777     int32_t errorNo = 0;
2778     const UChar *sourceRules = NULL;
2779     int32_t sourceRulesLen = 0;
2780     UParseError parseError;
2781     UColAttributeValue french = UCOL_OFF;
2782 
2783     if(!ucol_equals(source, target)) {
2784         log_err("Same collators, different address not equal\n");
2785         errorNo++;
2786     }
2787     ucol_close(target);
2788     if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2789         target = ucol_safeClone(source, NULL, NULL, &status);
2790         if(U_FAILURE(status)) {
2791             log_err("Error creating clone\n");
2792             errorNo++;
2793             return errorNo;
2794         }
2795         if(!ucol_equals(source, target)) {
2796             log_err("Collator different from it's clone\n");
2797             errorNo++;
2798         }
2799         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2800         if(french == UCOL_ON) {
2801             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2802         } else {
2803             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2804         }
2805         if(U_FAILURE(status)) {
2806             log_err("Error setting attributes\n");
2807             errorNo++;
2808             return errorNo;
2809         }
2810         if(ucol_equals(source, target)) {
2811             log_err("Collators same even when options changed\n");
2812             errorNo++;
2813         }
2814         ucol_close(target);
2815 
2816         sourceRules = ucol_getRules(source, &sourceRulesLen);
2817         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2818         if(U_FAILURE(status)) {
2819             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2820             errorNo++;
2821             return errorNo;
2822         }
2823         /* Note: The tailoring rule string is an optional data item. */
2824         if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2825             log_err("Collator different from collator that was created from the same rules\n");
2826             errorNo++;
2827         }
2828         ucol_close(target);
2829     }
2830     return errorNo;
2831 }
2832 
2833 
TestEquals(void)2834 static void TestEquals(void) {
2835     /* ucol_equals is not currently a public API. There is a chance that it will become
2836     * something like this.
2837     */
2838     /* test whether the two collators instantiated from the same locale are equal */
2839     UErrorCode status = U_ZERO_ERROR;
2840     UParseError parseError;
2841     int32_t noOfLoc = uloc_countAvailable();
2842     const char *locName = NULL;
2843     UCollator *source = NULL, *target = NULL;
2844     int32_t i = 0;
2845 
2846     const char* rules[] = {
2847         "&l < lj <<< Lj <<< LJ",
2848         "&n < nj <<< Nj <<< NJ",
2849         "&ae <<< \\u00e4",
2850         "&AE <<< \\u00c4"
2851     };
2852     /*
2853     const char* badRules[] = {
2854     "&l <<< Lj",
2855     "&n < nj <<< nJ <<< NJ",
2856     "&a <<< \\u00e4",
2857     "&AE <<< \\u00c4 <<< x"
2858     };
2859     */
2860 
2861     UChar sourceRules[1024], targetRules[1024];
2862     int32_t sourceRulesSize = 0, targetRulesSize = 0;
2863     int32_t rulesSize = UPRV_LENGTHOF(rules);
2864 
2865     for(i = 0; i < rulesSize; i++) {
2866         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2867         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2868     }
2869 
2870     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2871     if(status == U_FILE_ACCESS_ERROR) {
2872         log_data_err("Is your data around?\n");
2873         return;
2874     } else if(U_FAILURE(status)) {
2875         log_err("Error opening collator\n");
2876         return;
2877     }
2878     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2879     if(!ucol_equals(source, target)) {
2880         log_err("Equivalent collators not equal!\n");
2881     }
2882     ucol_close(source);
2883     ucol_close(target);
2884 
2885     source = ucol_open("root", &status);
2886     target = ucol_open("root", &status);
2887     log_verbose("Testing root\n");
2888     if(!ucol_equals(source, source)) {
2889         log_err("Same collator not equal\n");
2890     }
2891     if(TestEqualsForCollator("root", source, target)) {
2892         log_err("Errors for root\n");
2893     }
2894     ucol_close(source);
2895 
2896     for(i = 0; i<noOfLoc; i++) {
2897         status = U_ZERO_ERROR;
2898         locName = uloc_getAvailable(i);
2899         /*if(hasCollationElements(locName)) {*/
2900         log_verbose("Testing equality for locale %s\n", locName);
2901         source = ucol_open(locName, &status);
2902         target = ucol_open(locName, &status);
2903         if (U_FAILURE(status)) {
2904             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2905             continue;
2906         }
2907         if(TestEqualsForCollator(locName, source, target)) {
2908             log_err("Errors for locale %s\n", locName);
2909         }
2910         ucol_close(source);
2911         /*}*/
2912     }
2913 }
2914 
TestJ2726(void)2915 static void TestJ2726(void) {
2916     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2917     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2918     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2919     UErrorCode status = U_ZERO_ERROR;
2920     UCollator *coll = ucol_open("en", &status);
2921     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2922     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2923     doTest(coll, a, aSpace, UCOL_EQUAL);
2924     doTest(coll, aSpace, a, UCOL_EQUAL);
2925     doTest(coll, a, spaceA, UCOL_EQUAL);
2926     doTest(coll, spaceA, a, UCOL_EQUAL);
2927     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2928     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2929     ucol_close(coll);
2930 }
2931 
NullRule(void)2932 static void NullRule(void) {
2933     UChar r[3] = {0};
2934     UErrorCode status = U_ZERO_ERROR;
2935     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2936     if(U_SUCCESS(status)) {
2937         log_err("This should have been an error!\n");
2938         ucol_close(coll);
2939     } else {
2940         status = U_ZERO_ERROR;
2941     }
2942     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2943     if(U_FAILURE(status)) {
2944         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2945     } else {
2946         ucol_close(coll);
2947     }
2948 }
2949 
2950 /**
2951  * Test for CollationElementIterator previous and next for the whole set of
2952  * unicode characters with normalization on.
2953  */
TestNumericCollation(void)2954 static void TestNumericCollation(void)
2955 {
2956     UErrorCode status = U_ZERO_ERROR;
2957 
2958     const static char *basicTestStrings[]={
2959     "hello1",
2960     "hello2",
2961     "hello2002",
2962     "hello2003",
2963     "hello123456",
2964     "hello1234567",
2965     "hello10000000",
2966     "hello100000000",
2967     "hello1000000000",
2968     "hello10000000000",
2969     };
2970 
2971     const static char *preZeroTestStrings[]={
2972     "avery10000",
2973     "avery010000",
2974     "avery0010000",
2975     "avery00010000",
2976     "avery000010000",
2977     "avery0000010000",
2978     "avery00000010000",
2979     "avery000000010000",
2980     };
2981 
2982     const static char *thirtyTwoBitNumericStrings[]={
2983     "avery42949672960",
2984     "avery42949672961",
2985     "avery42949672962",
2986     "avery429496729610"
2987     };
2988 
2989      const static char *longNumericStrings[]={
2990      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2991         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2992         are treated as multiple collation elements. */
2993     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2994     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2995     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2996     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2997     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2998     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2999     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3000     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3001     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3002     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3003     };
3004 
3005     const static char *supplementaryDigits[] = {
3006       "\\uD835\\uDFCE", /* 0 */
3007       "\\uD835\\uDFCF", /* 1 */
3008       "\\uD835\\uDFD0", /* 2 */
3009       "\\uD835\\uDFD1", /* 3 */
3010       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3011       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3012       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3013       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3014       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3015       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3016     };
3017 
3018     const static char *foreignDigits[] = {
3019       "\\u0661",
3020         "\\u0662",
3021         "\\u0663",
3022       "\\u0661\\u0660",
3023       "\\u0661\\u0662",
3024       "\\u0661\\u0663",
3025       "\\u0662\\u0660",
3026       "\\u0662\\u0662",
3027       "\\u0662\\u0663",
3028       "\\u0663\\u0660",
3029       "\\u0663\\u0662",
3030       "\\u0663\\u0663"
3031     };
3032 
3033     const static char *evenZeroes[] = {
3034       "2000",
3035       "2001",
3036         "2002",
3037         "2003"
3038     };
3039 
3040     UColAttribute att = UCOL_NUMERIC_COLLATION;
3041     UColAttributeValue val = UCOL_ON;
3042 
3043     /* Open our collator. */
3044     UCollator* coll = ucol_open("root", &status);
3045     if (U_FAILURE(status)){
3046         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3047               myErrorName(status));
3048         return;
3049     }
3050     genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3051     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3052     genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3053     genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3054     genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3055     genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
3056 
3057     /* Setting up our collator to do digits. */
3058     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3059     if (U_FAILURE(status)){
3060         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3061               myErrorName(status));
3062         return;
3063     }
3064 
3065     /*
3066        Testing that prepended zeroes still yield the correct collation behavior.
3067        We expect that every element in our strings array will be equal.
3068     */
3069     genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
3070 
3071     ucol_close(coll);
3072 }
3073 
TestTibetanConformance(void)3074 static void TestTibetanConformance(void)
3075 {
3076     const char* test[] = {
3077         "\\u0FB2\\u0591\\u0F71\\u0061",
3078         "\\u0FB2\\u0F71\\u0061"
3079     };
3080 
3081     UErrorCode status = U_ZERO_ERROR;
3082     UCollator *coll = ucol_open("", &status);
3083     UChar source[100];
3084     UChar target[100];
3085     int result;
3086     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3087     if (U_SUCCESS(status)) {
3088         u_unescape(test[0], source, 100);
3089         u_unescape(test[1], target, 100);
3090         doTest(coll, source, target, UCOL_EQUAL);
3091         result = ucol_strcoll(coll, source, -1,   target, -1);
3092         log_verbose("result %d\n", result);
3093         if (UCOL_EQUAL != result) {
3094             log_err("Tibetan comparison error\n");
3095         }
3096     }
3097     ucol_close(coll);
3098 
3099     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3100 }
3101 
TestPinyinProblem(void)3102 static void TestPinyinProblem(void) {
3103     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3104     genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
3105 }
3106 
3107 /**
3108  * Iterate through the given iterator, checking to see that all the strings
3109  * in the expected array are present.
3110  * @param expected array of strings we expect to see, or NULL
3111  * @param expectedCount number of elements of expected, or 0
3112  */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)3113 static int32_t checkUEnumeration(const char* msg,
3114                                  UEnumeration* iter,
3115                                  const char** expected,
3116                                  int32_t expectedCount) {
3117     UErrorCode ec = U_ZERO_ERROR;
3118     int32_t i = 0, n, j, bit;
3119     int32_t seenMask = 0;
3120 
3121     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3122     n = uenum_count(iter, &ec);
3123     if (!assertSuccess("count", &ec)) return -1;
3124     log_verbose("%s = [", msg);
3125     for (;; ++i) {
3126         const char* s = uenum_next(iter, NULL, &ec);
3127         if (!assertSuccess("snext", &ec) || s == NULL) break;
3128         if (i != 0) log_verbose(",");
3129         log_verbose("%s", s);
3130         /* check expected list */
3131         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3132             if ((seenMask&bit) == 0 &&
3133                 uprv_strcmp(s, expected[j]) == 0) {
3134                 seenMask |= bit;
3135                 break;
3136             }
3137         }
3138     }
3139     log_verbose("] (%d)\n", i);
3140     assertTrue("count verified", i==n);
3141     /* did we see all expected strings? */
3142     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3143         if ((seenMask&bit)!=0) {
3144             log_verbose("Ok: \"%s\" seen\n", expected[j]);
3145         } else {
3146             log_err("FAIL: \"%s\" not seen\n", expected[j]);
3147         }
3148     }
3149     return n;
3150 }
3151 
3152 /**
3153  * Test new API added for separate collation tree.
3154  */
TestSeparateTrees(void)3155 static void TestSeparateTrees(void) {
3156     UErrorCode ec = U_ZERO_ERROR;
3157     UEnumeration *e = NULL;
3158     int32_t n = -1;
3159     UBool isAvailable;
3160     char loc[256];
3161 
3162     static const char* AVAIL[] = { "en", "de" };
3163 
3164     static const char* KW[] = { "collation" };
3165 
3166     static const char* KWVAL[] = { "phonebook", "stroke" };
3167 
3168 #if !UCONFIG_NO_SERVICE
3169     e = ucol_openAvailableLocales(&ec);
3170     if (e != NULL) {
3171         assertSuccess("ucol_openAvailableLocales", &ec);
3172         assertTrue("ucol_openAvailableLocales!=0", e!=0);
3173         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
3174         (void)n;    /* Suppress set but not used warnings. */
3175         /* Don't need to check n because we check list */
3176         uenum_close(e);
3177     } else {
3178         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3179     }
3180 #endif
3181 
3182     e = ucol_getKeywords(&ec);
3183     if (e != NULL) {
3184         assertSuccess("ucol_getKeywords", &ec);
3185         assertTrue("ucol_getKeywords!=0", e!=0);
3186         n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
3187         /* Don't need to check n because we check list */
3188         uenum_close(e);
3189     } else {
3190         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3191     }
3192 
3193     e = ucol_getKeywordValues(KW[0], &ec);
3194     if (e != NULL) {
3195         assertSuccess("ucol_getKeywordValues", &ec);
3196         assertTrue("ucol_getKeywordValues!=0", e!=0);
3197         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3198         /* Don't need to check n because we check list */
3199         uenum_close(e);
3200     } else {
3201         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3202     }
3203 
3204     /* Try setting a warning before calling ucol_getKeywordValues */
3205     ec = U_USING_FALLBACK_WARNING;
3206     e = ucol_getKeywordValues(KW[0], &ec);
3207     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3208         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3209         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3210         /* Don't need to check n because we check list */
3211         uenum_close(e);
3212     }
3213 
3214     /*
3215 U_CAPI int32_t U_EXPORT2
3216 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3217                              const char* locale, UBool* isAvailable,
3218                              UErrorCode* status);
3219 }
3220 */
3221     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3222                                      &isAvailable, &ec);
3223     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3224         assertEquals("getFunctionalEquivalent(de)", "root", loc);
3225         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3226                    isAvailable == TRUE);
3227     }
3228 
3229     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3230                                      &isAvailable, &ec);
3231     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3232         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3233         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3234                    isAvailable == FALSE);
3235     }
3236 }
3237 
3238 /* supercedes TestJ784 */
TestBeforePinyin(void)3239 static void TestBeforePinyin(void) {
3240     const static char rules[] = {
3241         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3242         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3243         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3244         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3245         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3246         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3247     };
3248 
3249     const static char *test[] = {
3250         "l\\u0101",
3251         "la",
3252         "l\\u0101n",
3253         "lan ",
3254         "l\\u0113",
3255         "le",
3256         "l\\u0113n",
3257         "len"
3258     };
3259 
3260     const static char *test2[] = {
3261         "x\\u0101",
3262         "x\\u0100",
3263         "X\\u0101",
3264         "X\\u0100",
3265         "x\\u00E1",
3266         "x\\u00C1",
3267         "X\\u00E1",
3268         "X\\u00C1",
3269         "x\\u01CE",
3270         "x\\u01CD",
3271         "X\\u01CE",
3272         "X\\u01CD",
3273         "x\\u00E0",
3274         "x\\u00C0",
3275         "X\\u00E0",
3276         "X\\u00C0",
3277         "xa",
3278         "xA",
3279         "Xa",
3280         "XA",
3281         "x\\u0101x",
3282         "x\\u0100x",
3283         "x\\u00E1x",
3284         "x\\u00C1x",
3285         "x\\u01CEx",
3286         "x\\u01CDx",
3287         "x\\u00E0x",
3288         "x\\u00C0x",
3289         "xax",
3290         "xAx"
3291     };
3292 
3293     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3294     genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3295     genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3296     genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3297 }
3298 
TestBeforeTightening(void)3299 static void TestBeforeTightening(void) {
3300     static const struct {
3301         const char *rules;
3302         UErrorCode expectedStatus;
3303     } tests[] = {
3304         { "&[before 1]a<x", U_ZERO_ERROR },
3305         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3306         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3307         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3308         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3309         { "&[before 2]a<<x",U_ZERO_ERROR },
3310         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3311         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3312         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3313         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3314         { "&[before 3]a<<<x",U_ZERO_ERROR },
3315         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3316         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3317     };
3318 
3319     int32_t i = 0;
3320 
3321     UErrorCode status = U_ZERO_ERROR;
3322     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3323     uint32_t rlen = 0;
3324 
3325     UCollator *coll = NULL;
3326 
3327 
3328     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3329         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3330         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3331         if(status != tests[i].expectedStatus) {
3332             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3333                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3334         }
3335         ucol_close(coll);
3336         status = U_ZERO_ERROR;
3337     }
3338 
3339 }
3340 
3341 /*
3342 &m < a
3343 &[before 1] a < x <<< X << q <<< Q < z
3344 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3345 
3346 &m < a
3347 &[before 2] a << x <<< X << q <<< Q < z
3348 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3349 
3350 &m < a
3351 &[before 3] a <<< x <<< X << q <<< Q < z
3352 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3353 
3354 
3355 &m << a
3356 &[before 1] a < x <<< X << q <<< Q < z
3357 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3358 
3359 &m << a
3360 &[before 2] a << x <<< X << q <<< Q < z
3361 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3362 
3363 &m << a
3364 &[before 3] a <<< x <<< X << q <<< Q < z
3365 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3366 
3367 
3368 &m <<< a
3369 &[before 1] a < x <<< X << q <<< Q < z
3370 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3371 
3372 &m <<< a
3373 &[before 2] a << x <<< X << q <<< Q < z
3374 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3375 
3376 &m <<< a
3377 &[before 3] a <<< x <<< X << q <<< Q < z
3378 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3379 
3380 
3381 &[before 1] s < x <<< X << q <<< Q < z
3382 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3383 
3384 &[before 2] s << x <<< X << q <<< Q < z
3385 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3386 
3387 &[before 3] s <<< x <<< X << q <<< Q < z
3388 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3389 
3390 
3391 &[before 1] \u24DC < x <<< X << q <<< Q < z
3392 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3393 
3394 &[before 2] \u24DC << x <<< X << q <<< Q < z
3395 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3396 
3397 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3398 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3399 */
3400 
3401 
3402 #if 0
3403 /* requires features not yet supported */
3404 static void TestMoreBefore(void) {
3405     static const struct {
3406         const char* rules;
3407         const char* order[16];
3408         int32_t size;
3409     } tests[] = {
3410         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3411         { "m","M","x","X","q","Q","z","a","n" }, 9},
3412         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3413         { "m","M","x","X","q","Q","a","z","n" }, 9},
3414         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3415         { "m","M","x","X","a","q","Q","z","n" }, 9},
3416         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3417         { "x","X","q","Q","z","m","M","a","n" }, 9},
3418         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3419         { "m","M","x","X","q","Q","a","z","n" }, 9},
3420         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3421         { "m","M","x","X","a","q","Q","z","n" }, 9},
3422         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3423         { "x","X","q","Q","z","n","m","a","M" }, 9},
3424         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3425         { "x","X","q","Q","m","a","M","z","n" }, 9},
3426         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3427         { "m","x","X","a","M","q","Q","z","n" }, 9},
3428         { "&[before 1] s < x <<< X << q <<< Q < z",
3429         { "r","R","x","X","q","Q","z","s","n" }, 9},
3430         { "&[before 2] s << x <<< X << q <<< Q < z",
3431         { "r","R","x","X","q","Q","s","z","n" }, 9},
3432         { "&[before 3] s <<< x <<< X << q <<< Q < z",
3433         { "r","R","x","X","s","q","Q","z","n" }, 9},
3434         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3435         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3436         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3437         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3438         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3439         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3440     };
3441 
3442     int32_t i = 0;
3443 
3444     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3445         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3446     }
3447 }
3448 #endif
3449 
TestTailorNULL(void)3450 static void TestTailorNULL( void ) {
3451     const static char* rule = "&a <<< '\\u0000'";
3452     UErrorCode status = U_ZERO_ERROR;
3453     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3454     uint32_t rlen = 0;
3455     UChar a = 1, null = 0;
3456     UCollationResult res = UCOL_EQUAL;
3457 
3458     UCollator *coll = NULL;
3459 
3460 
3461     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3462     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3463 
3464     if(U_FAILURE(status)) {
3465         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3466     } else {
3467         res = ucol_strcoll(coll, &a, 1, &null, 1);
3468 
3469         if(res != UCOL_LESS) {
3470             log_err("NULL was not tailored properly!\n");
3471         }
3472     }
3473 
3474     ucol_close(coll);
3475 }
3476 
3477 static void
TestUpperFirstQuaternary(void)3478 TestUpperFirstQuaternary(void)
3479 {
3480   const char* tests[] = { "B", "b", "Bb", "bB" };
3481   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3482   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3483   genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
3484 }
3485 
3486 static void
TestJ4960(void)3487 TestJ4960(void)
3488 {
3489   const char* tests[] = { "\\u00e2T", "aT" };
3490   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3491   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3492   const char* tests2[] = { "a", "A" };
3493   const char* rule = "&[first tertiary ignorable]=A=a";
3494   UColAttribute att2[] = { UCOL_CASE_LEVEL };
3495   UColAttributeValue attVals2[] = { UCOL_ON };
3496   /* Test whether we correctly ignore primary ignorables on case level when */
3497   /* we have only primary & case level */
3498   genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
3499   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3500   /* and case level */
3501   genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
3502   /* Test whether completely ignorable letters have case level info (they shouldn't) */
3503   genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
3504 }
3505 
3506 static void
TestJ5223(void)3507 TestJ5223(void)
3508 {
3509   static const char *test = "this is a test string";
3510   UChar ustr[256];
3511   int32_t ustr_length = u_unescape(test, ustr, 256);
3512   unsigned char sortkey[256];
3513   int32_t sortkey_length;
3514   UErrorCode status = U_ZERO_ERROR;
3515   static UCollator *coll = NULL;
3516   coll = ucol_open("root", &status);
3517   if(U_FAILURE(status)) {
3518     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3519     return;
3520   }
3521   ucol_setStrength(coll, UCOL_PRIMARY);
3522   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3523   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3524   if (U_FAILURE(status)) {
3525     log_err("Failed setting atributes\n");
3526     return;
3527   }
3528   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3529   if (sortkey_length > 256) return;
3530 
3531   /* we mark the position where the null byte should be written in advance */
3532   sortkey[sortkey_length-1] = 0xAA;
3533 
3534   /* we set the buffer size one byte higher than needed */
3535   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3536     sortkey_length+1);
3537 
3538   /* no error occurs (for me) */
3539   if (sortkey[sortkey_length-1] == 0xAA) {
3540     log_err("Hit bug at first try\n");
3541   }
3542 
3543   /* we mark the position where the null byte should be written again */
3544   sortkey[sortkey_length-1] = 0xAA;
3545 
3546   /* this time we set the buffer size to the exact amount needed */
3547   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3548     sortkey_length);
3549 
3550   /* now the trailing null byte is not written */
3551   if (sortkey[sortkey_length-1] == 0xAA) {
3552     log_err("Hit bug at second try\n");
3553   }
3554 
3555   ucol_close(coll);
3556 }
3557 
3558 /* Regression test for Thai partial sort key problem */
3559 static void
TestJ5232(void)3560 TestJ5232(void)
3561 {
3562     const static char *test[] = {
3563         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3564         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3565     };
3566 
3567     genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
3568 }
3569 
3570 static void
TestJ5367(void)3571 TestJ5367(void)
3572 {
3573     const static char *test[] = { "a", "y" };
3574     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3575     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3576 }
3577 
3578 static void
TestVI5913(void)3579 TestVI5913(void)
3580 {
3581     UErrorCode status = U_ZERO_ERROR;
3582     int32_t i, j;
3583     UCollator *coll =NULL;
3584     uint8_t  resColl[100], expColl[100];
3585     int32_t  rLen, tLen, ruleLen, sLen, kLen;
3586     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3587     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3588     /*
3589      * Note: Just tailoring &z<ae^ does not work as expected:
3590      * The UCA spec requires for discontiguous contractions that they
3591      * extend an *existing match* by one combining mark at a time.
3592      * Therefore, ae must be a contraction so that the builder finds
3593      * discontiguous contractions for ae^, for example with an intervening underdot.
3594      * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3595      */
3596     UChar rule3[256]={
3597         0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3598         0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3599         0};
3600     static const UChar tData[][20]={
3601         {0x1EAC, 0},
3602         {0x0041, 0x0323, 0x0302, 0},
3603         {0x1EA0, 0x0302, 0},
3604         {0x00C2, 0x0323, 0},
3605         {0x1ED8, 0},  /* O with dot and circumflex */
3606         {0x1ECC, 0x0302, 0},
3607         {0x1EB7, 0},
3608         {0x1EA1, 0x0306, 0},
3609     };
3610     static const UChar tailorData[][20]={
3611         {0x1FA2, 0},  /* Omega with 3 combining marks */
3612         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3613         {0x1FF3, 0x0313, 0x0300, 0},
3614         {0x1F60, 0x0300, 0x0345, 0},
3615         {0x1F62, 0x0345, 0},
3616         {0x1FA0, 0x0300, 0},
3617     };
3618     static const UChar tailorData2[][20]={
3619         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3620         {0x0073, 0x0323, 0x030C, 0},
3621         {0x0073, 0x030C, 0x0323, 0},
3622     };
3623     static const UChar tailorData3[][20]={
3624         {0x007a, 0},  /*  z */
3625         {0x0061, 0x0065, 0},  /*  a + e */
3626         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3627         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3628         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3629         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3630         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3631         {0x00EA, 0},  /* e with circumflex  */
3632     };
3633 
3634     /* Test Vietnamese sort. */
3635     coll = ucol_open("vi", &status);
3636     if(U_FAILURE(status)) {
3637         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3638         return;
3639     }
3640     log_verbose("\n\nVI collation:");
3641     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3642         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3643     }
3644     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3645         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3646     }
3647     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3648         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3649     }
3650     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3651         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3652     }
3653 
3654     for (j=0; j<8; j++) {
3655         tLen = u_strlen(tData[j]);
3656         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3657         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3658         for(i = 0; i<rLen; i++) {
3659             log_verbose(" %02X", resColl[i]);
3660         }
3661     }
3662 
3663     ucol_close(coll);
3664 
3665     /* Test Romanian sort. */
3666     coll = ucol_open("ro", &status);
3667     log_verbose("\n\nRO collation:");
3668     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3669         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3670     }
3671     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3672         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3673     }
3674     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3675         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3676     }
3677 
3678     for (j=4; j<8; j++) {
3679         tLen = u_strlen(tData[j]);
3680         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3681         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3682         for(i = 0; i<rLen; i++) {
3683             log_verbose(" %02X", resColl[i]);
3684         }
3685     }
3686     ucol_close(coll);
3687 
3688     /* Test the precomposed Greek character with 3 combining marks. */
3689     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3690     ruleLen = u_strlen(rule);
3691     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3692     if (U_FAILURE(status)) {
3693         log_err("ucol_openRules failed with %s\n", u_errorName(status));
3694         return;
3695     }
3696     sLen = u_strlen(tailorData[0]);
3697     for (j=1; j<6; j++) {
3698         tLen = u_strlen(tailorData[j]);
3699         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3700             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3701         }
3702     }
3703     /* Test getSortKey. */
3704     tLen = u_strlen(tailorData[0]);
3705     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3706     for (j=0; j<6; j++) {
3707         tLen = u_strlen(tailorData[j]);
3708         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3709         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3710             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3711             for(i = 0; i<rLen; i++) {
3712                 log_err(" %02X", resColl[i]);
3713             }
3714         }
3715     }
3716     ucol_close(coll);
3717 
3718     log_verbose("\n\nTailoring test for s with caron:");
3719     ruleLen = u_strlen(rule2);
3720     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3721     tLen = u_strlen(tailorData2[0]);
3722     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3723     for (j=1; j<3; j++) {
3724         tLen = u_strlen(tailorData2[j]);
3725         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3726         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3727             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3728             for(i = 0; i<rLen; i++) {
3729                 log_err(" %02X", resColl[i]);
3730             }
3731         }
3732     }
3733     ucol_close(coll);
3734 
3735     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3736     ruleLen = u_strlen(rule3);
3737     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3738     tLen = u_strlen(tailorData3[3]);
3739     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3740     log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3741     for(i = 0; i<kLen; i++) {
3742         log_verbose(" %02X", expColl[i]);
3743     }
3744     for (j=4; j<6; j++) {
3745         tLen = u_strlen(tailorData3[j]);
3746         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3747 
3748         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3749             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3750             for(i = 0; i<rLen; i++) {
3751                 log_err(" %02X", resColl[i]);
3752             }
3753         }
3754 
3755         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3756          for(i = 0; i<rLen; i++) {
3757              log_verbose(" %02X", resColl[i]);
3758          }
3759     }
3760     ucol_close(coll);
3761 }
3762 
3763 static void
TestTailor6179(void)3764 TestTailor6179(void)
3765 {
3766     UErrorCode status = U_ZERO_ERROR;
3767     int32_t i;
3768     UCollator *coll =NULL;
3769     uint8_t  resColl[100];
3770     int32_t  rLen, tLen, ruleLen;
3771     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3772     static const UChar rule1[]={
3773             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3774             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3775             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3776             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3777     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3778     static const UChar rule2[]={
3779             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3780             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3781             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3782             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3783             0x3C,0x3C,0x20,0x62,0};
3784 
3785     static const UChar tData1[][4]={
3786         {0x61, 0},
3787         {0x62, 0},
3788         { 0xFDD0,0x009E, 0}
3789     };
3790     static const UChar tData2[][4]={
3791         {0x61, 0},
3792         {0x62, 0},
3793         { 0xFDD0,0x009E, 0}
3794      };
3795 
3796     /*
3797      * These values from FractionalUCA.txt will change,
3798      * and need to be updated here.
3799      * TODO: Make this not check for particular sort keys.
3800      * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3801      */
3802     static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3803     static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3804     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3805     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3806 
3807     UParseError parseError;
3808 
3809     /* Test [Last Primary ignorable] */
3810 
3811     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3812     ruleLen = u_strlen(rule1);
3813     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3814     if (U_FAILURE(status)) {
3815         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3816         return;
3817     }
3818     tLen = u_strlen(tData1[0]);
3819     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3820     if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3821         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3822         for(i = 0; i<rLen; i++) {
3823             log_err(" %02X", resColl[i]);
3824         }
3825         log_err("\n");
3826     }
3827     tLen = u_strlen(tData1[1]);
3828     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3829     if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3830         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3831         for(i = 0; i<rLen; i++) {
3832             log_err(" %02X", resColl[i]);
3833         }
3834         log_err("\n");
3835     }
3836     ucol_close(coll);
3837 
3838 
3839     /* Test [Last Secondary ignorable] */
3840     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3841     ruleLen = u_strlen(rule2);
3842     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3843     if (U_FAILURE(status)) {
3844         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3845         log_info("  offset=%d  \"%s\" | \"%s\"\n",
3846                  parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3847         return;
3848     }
3849     tLen = u_strlen(tData2[0]);
3850     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3851     if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3852         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3853         for(i = 0; i<rLen; i++) {
3854             log_err(" %02X", resColl[i]);
3855         }
3856         log_err("\n");
3857     }
3858     tLen = u_strlen(tData2[1]);
3859     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3860     if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3861       log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3862       for(i = 0; i<rLen; i++) {
3863         log_err(" %02X", resColl[i]);
3864       }
3865       log_err("\n");
3866     }
3867     ucol_close(coll);
3868 }
3869 
3870 static void
TestUCAPrecontext(void)3871 TestUCAPrecontext(void)
3872 {
3873     UErrorCode status = U_ZERO_ERROR;
3874     int32_t i, j;
3875     UCollator *coll =NULL;
3876     uint8_t  resColl[100], prevColl[100];
3877     int32_t  rLen, tLen, ruleLen;
3878     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3879     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3880     /* & l middle-dot << a  a is an expansion. */
3881 
3882     UChar tData1[][20]={
3883             { 0xb7, 0},  /* standalone middle dot(0xb7) */
3884             { 0x387, 0}, /* standalone middle dot(0x387) */
3885             { 0x61, 0},  /* a */
3886             { 0x6C, 0},  /* l */
3887             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3888             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3889             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3890             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3891             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3892             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3893             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3894      };
3895 
3896     log_verbose("\n\nEN collation:");
3897     coll = ucol_open("en", &status);
3898     if (U_FAILURE(status)) {
3899         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3900         return;
3901     }
3902     for (j=0; j<11; j++) {
3903         tLen = u_strlen(tData1[j]);
3904         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3905         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3906             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3907                     j, tData1[j]);
3908         }
3909         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3910         for(i = 0; i<rLen; i++) {
3911             log_verbose(" %02X", resColl[i]);
3912         }
3913         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3914      }
3915      ucol_close(coll);
3916 
3917 
3918      log_verbose("\n\nJA collation:");
3919      coll = ucol_open("ja", &status);
3920      if (U_FAILURE(status)) {
3921          log_err("Tailoring test: &z <<a|- failed!");
3922          return;
3923      }
3924      for (j=0; j<11; j++) {
3925          tLen = u_strlen(tData1[j]);
3926          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3927          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3928              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3929                      j, tData1[j]);
3930          }
3931          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3932          for(i = 0; i<rLen; i++) {
3933              log_verbose(" %02X", resColl[i]);
3934          }
3935          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3936       }
3937       ucol_close(coll);
3938 
3939 
3940       log_verbose("\n\nTailoring test: & middle dot < a ");
3941       ruleLen = u_strlen(rule1);
3942       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3943       if (U_FAILURE(status)) {
3944           log_err("Tailoring test: & middle dot < a failed!");
3945           return;
3946       }
3947       for (j=0; j<11; j++) {
3948           tLen = u_strlen(tData1[j]);
3949           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3950           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3951               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3952                       j, tData1[j]);
3953           }
3954           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3955           for(i = 0; i<rLen; i++) {
3956               log_verbose(" %02X", resColl[i]);
3957           }
3958           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3959        }
3960        ucol_close(coll);
3961 
3962 
3963        log_verbose("\n\nTailoring test: & l middle-dot << a ");
3964        ruleLen = u_strlen(rule2);
3965        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3966        if (U_FAILURE(status)) {
3967            log_err("Tailoring test: & l middle-dot << a failed!");
3968            return;
3969        }
3970        for (j=0; j<11; j++) {
3971            tLen = u_strlen(tData1[j]);
3972            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3973            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3974                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3975                        j, tData1[j]);
3976            }
3977            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3978                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3979                        j, tData1[j]);
3980            }
3981            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3982            for(i = 0; i<rLen; i++) {
3983                log_verbose(" %02X", resColl[i]);
3984            }
3985            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3986         }
3987         ucol_close(coll);
3988 }
3989 
3990 static void
TestOutOfBuffer5468(void)3991 TestOutOfBuffer5468(void)
3992 {
3993     static const char *test = "\\u4e00";
3994     UChar ustr[256];
3995     int32_t ustr_length = u_unescape(test, ustr, 256);
3996     unsigned char shortKeyBuf[1];
3997     int32_t sortkey_length;
3998     UErrorCode status = U_ZERO_ERROR;
3999     static UCollator *coll = NULL;
4000 
4001     coll = ucol_open("root", &status);
4002     if(U_FAILURE(status)) {
4003       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4004       return;
4005     }
4006     ucol_setStrength(coll, UCOL_PRIMARY);
4007     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4008     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4009     if (U_FAILURE(status)) {
4010       log_err("Failed setting atributes\n");
4011       return;
4012     }
4013 
4014     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4015     if (sortkey_length != 4) {
4016         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4017     }
4018     log_verbose("length of sortKey is %d", sortkey_length);
4019     ucol_close(coll);
4020 }
4021 
4022 #define TSKC_DATA_SIZE 5
4023 #define TSKC_BUF_SIZE  50
4024 static void
TestSortKeyConsistency(void)4025 TestSortKeyConsistency(void)
4026 {
4027     UErrorCode icuRC = U_ZERO_ERROR;
4028     UCollator* ucol;
4029     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4030 
4031     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4032     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4033     int32_t i, j, i2;
4034 
4035     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4036     if (U_FAILURE(icuRC))
4037     {
4038         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4039         return;
4040     }
4041 
4042     for (i = 0; i < TSKC_DATA_SIZE; i++)
4043     {
4044         UCharIterator uiter;
4045         uint32_t state[2] = { 0, 0 };
4046         int32_t dataLen = i+1;
4047         for (j=0; j<TSKC_BUF_SIZE; j++)
4048             bufFull[i][j] = bufPart[i][j] = 0;
4049 
4050         /* Full sort key */
4051         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4052 
4053         /* Partial sort key */
4054         uiter_setString(&uiter, data, dataLen);
4055         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4056         if (U_FAILURE(icuRC))
4057         {
4058             log_err("ucol_nextSortKeyPart failed\n");
4059             ucol_close(ucol);
4060             return;
4061         }
4062 
4063         for (i2=0; i2<i; i2++)
4064         {
4065             UBool fullMatch = TRUE;
4066             UBool partMatch = TRUE;
4067             for (j=0; j<TSKC_BUF_SIZE; j++)
4068             {
4069                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4070                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4071             }
4072             if (fullMatch != partMatch) {
4073                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4074                                   : "partial key was consistent, but full key changed\n");
4075                 ucol_close(ucol);
4076                 return;
4077             }
4078         }
4079     }
4080 
4081     /*=============================================*/
4082    ucol_close(ucol);
4083 }
4084 
4085 /* ticket: 6101 */
TestCroatianSortKey(void)4086 static void TestCroatianSortKey(void) {
4087     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4088     UErrorCode status = U_ZERO_ERROR;
4089     UCollator *ucol;
4090     UCharIterator iter;
4091 
4092     static const UChar text[] = { 0x0044, 0xD81A };
4093 
4094     size_t length = UPRV_LENGTHOF(text);
4095 
4096     uint8_t textSortKey[32];
4097     size_t lenSortKey = 32;
4098     size_t actualSortKeyLen;
4099     uint32_t uStateInfo[2] = { 0, 0 };
4100 
4101     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4102     if (U_FAILURE(status)) {
4103         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4104         return;
4105     }
4106 
4107     uiter_setString(&iter, text, (int32_t)length);
4108 
4109     actualSortKeyLen = ucol_nextSortKeyPart(
4110         ucol, &iter, (uint32_t*)uStateInfo,
4111         textSortKey, (int32_t)lenSortKey, &status
4112         );
4113 
4114     if (actualSortKeyLen == lenSortKey) {
4115         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4116     }
4117 
4118     ucol_close(ucol);
4119 }
4120 
4121 /* ticket: 6140 */
4122 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4123  * they are both Hiragana and Katakana
4124  */
4125 #define SORTKEYLEN 50
TestHiragana(void)4126 static void TestHiragana(void) {
4127     UErrorCode status = U_ZERO_ERROR;
4128     UCollator* ucol;
4129     UCollationResult strcollresult;
4130     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4131     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4132     int32_t data1Len = UPRV_LENGTHOF(data1);
4133     int32_t data2Len = UPRV_LENGTHOF(data2);
4134     int32_t i, j;
4135     uint8_t sortKey1[SORTKEYLEN];
4136     uint8_t sortKey2[SORTKEYLEN];
4137 
4138     UCharIterator uiter1;
4139     UCharIterator uiter2;
4140     uint32_t state1[2] = { 0, 0 };
4141     uint32_t state2[2] = { 0, 0 };
4142     int32_t keySize1;
4143     int32_t keySize2;
4144 
4145     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4146             &status);
4147     if (U_FAILURE(status)) {
4148         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4149         return;
4150     }
4151 
4152     /* Start of full sort keys */
4153     /* Full sort key1 */
4154     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4155     /* Full sort key2 */
4156     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4157     if (keySize1 == keySize2) {
4158         for (i = 0; i < keySize1; i++) {
4159             if (sortKey1[i] != sortKey2[i]) {
4160                 log_err("Full sort keys are different. Should be equal.");
4161             }
4162         }
4163     } else {
4164         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4165     }
4166     /* End of full sort keys */
4167 
4168     /* Start of partial sort keys */
4169     /* Partial sort key1 */
4170     uiter_setString(&uiter1, data1, data1Len);
4171     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4172     /* Partial sort key2 */
4173     uiter_setString(&uiter2, data2, data2Len);
4174     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4175     if (U_SUCCESS(status) && keySize1 == keySize2) {
4176         for (j = 0; j < keySize1; j++) {
4177             if (sortKey1[j] != sortKey2[j]) {
4178                 log_err("Partial sort keys are different. Should be equal");
4179             }
4180         }
4181     } else {
4182         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4183     }
4184     /* End of partial sort keys */
4185 
4186     /* Start of strcoll */
4187     /* Use ucol_strcoll() to determine ordering */
4188     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4189     if (strcollresult != UCOL_EQUAL) {
4190         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4191     }
4192 
4193     ucol_close(ucol);
4194 }
4195 
4196 /* Convenient struct for running collation tests */
4197 typedef struct {
4198   const UChar source[MAX_TOKEN_LEN];  /* String on left */
4199   const UChar target[MAX_TOKEN_LEN];  /* String on right */
4200   UCollationResult result;            /* -1, 0 or +1, depending on collation */
4201 } OneTestCase;
4202 
4203 /*
4204  * Utility function to test one collation test case.
4205  * @param testcases Array of test cases.
4206  * @param n_testcases Size of the array testcases.
4207  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4208  * @param n_rules Size of the array str_rules.
4209  */
doTestOneTestCase(const OneTestCase testcases[],int n_testcases,const char * str_rules[],int n_rules)4210 static void doTestOneTestCase(const OneTestCase testcases[],
4211                               int n_testcases,
4212                               const char* str_rules[],
4213                               int n_rules)
4214 {
4215   int rule_no, testcase_no;
4216   UChar rule[500];
4217   int32_t length = 0;
4218   UErrorCode status = U_ZERO_ERROR;
4219   UParseError parse_error;
4220   UCollator  *myCollation;
4221 
4222   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4223 
4224     length = u_unescape(str_rules[rule_no], rule, 500);
4225     if (length == 0) {
4226         log_err("ERROR: The rule cannot be unescaped: %s\n");
4227         return;
4228     }
4229     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4230     if(U_FAILURE(status)){
4231         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4232         log_info("  offset=%d  \"%s\" | \"%s\"\n",
4233                  parse_error.offset,
4234                  aescstrdup(parse_error.preContext, -1),
4235                  aescstrdup(parse_error.postContext, -1));
4236         return;
4237     }
4238     log_verbose("Testing the <<* syntax\n");
4239     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4240     ucol_setStrength(myCollation, UCOL_TERTIARY);
4241     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4242       doTest(myCollation,
4243              testcases[testcase_no].source,
4244              testcases[testcase_no].target,
4245              testcases[testcase_no].result
4246              );
4247     }
4248     ucol_close(myCollation);
4249   }
4250 }
4251 
4252 const static OneTestCase rangeTestcases[] = {
4253   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4254   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4255   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4256 
4257   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4258   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4259   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4260   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4261   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4262 
4263   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4264   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4265   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4266   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4267 
4268   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4269   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4270   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4271   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4272   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4273   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4274   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4275   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4276 };
4277 
4278 static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
4279 
4280 const static OneTestCase rangeTestcasesSupplemental[] = {
4281   { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4282   { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4283   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4284   { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4285   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4286   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4287   { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4288 };
4289 
4290 static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
4291 
4292 const static OneTestCase rangeTestcasesQwerty[] = {
4293   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4294   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4295 
4296   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4297   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4298 
4299   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4300   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4301 
4302   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4303   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4304 
4305   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4306     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4307   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4308     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4309 };
4310 
4311 static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
4312 
TestSameStrengthList(void)4313 static void TestSameStrengthList(void)
4314 {
4315   const char* strRules[] = {
4316     /* Normal */
4317     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4318 
4319     /* Lists */
4320     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4321   };
4322   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4323 }
4324 
TestSameStrengthListQuoted(void)4325 static void TestSameStrengthListQuoted(void)
4326 {
4327   const char* strRules[] = {
4328     /* Lists with quoted characters */
4329     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4330     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4331 
4332     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4333     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4334 
4335     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4336     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4337   };
4338   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4339 }
4340 
TestSameStrengthListSupplemental(void)4341 static void TestSameStrengthListSupplemental(void)
4342 {
4343   const char* strRules[] = {
4344     "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4345     "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4346     "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4347     "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4348   };
4349   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4350 }
4351 
TestSameStrengthListQwerty(void)4352 static void TestSameStrengthListQwerty(void)
4353 {
4354   const char* strRules[] = {
4355     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4356     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4357     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4358     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4359     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4360 
4361     /* Quoted characters also will work if two quoted characters are not consecutive.  */
4362     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4363 
4364     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4365     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4366 
4367  };
4368   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4369 }
4370 
TestSameStrengthListQuotedQwerty(void)4371 static void TestSameStrengthListQuotedQwerty(void)
4372 {
4373   const char* strRules[] = {
4374     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4375     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4376     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4377 
4378     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4379     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4380    };
4381   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4382 }
4383 
TestSameStrengthListRanges(void)4384 static void TestSameStrengthListRanges(void)
4385 {
4386   const char* strRules[] = {
4387     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4388   };
4389   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4390 }
4391 
TestSameStrengthListSupplementalRanges(void)4392 static void TestSameStrengthListSupplementalRanges(void)
4393 {
4394   const char* strRules[] = {
4395     /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4396     "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4397   };
4398   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4399 }
4400 
TestSpecialCharacters(void)4401 static void TestSpecialCharacters(void)
4402 {
4403   const char* strRules[] = {
4404     /* Normal */
4405     "&';'<'+'<','<'-'<'&'<'*'",
4406 
4407     /* List */
4408     "&';'<*'+,-&*'",
4409 
4410     /* Range */
4411     "&';'<*'+'-'-&*'",
4412   };
4413 
4414   const static OneTestCase specialCharacterStrings[] = {
4415     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4416     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4417     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4418     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4419   };
4420   doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4421 }
4422 
TestPrivateUseCharacters(void)4423 static void TestPrivateUseCharacters(void)
4424 {
4425   const char* strRules[] = {
4426     /* Normal */
4427     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4428     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4429   };
4430 
4431   const static OneTestCase privateUseCharacterStrings[] = {
4432     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4433     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4434     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4435     { {0xe2da}, {0xe2db}, UCOL_LESS },
4436     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4437     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4438   };
4439   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4440 }
4441 
TestPrivateUseCharactersInList(void)4442 static void TestPrivateUseCharactersInList(void)
4443 {
4444   const char* strRules[] = {
4445     /* List */
4446     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4447     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4448     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4449   };
4450 
4451   const static OneTestCase privateUseCharacterStrings[] = {
4452     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4453     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4454     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4455     { {0xe2da}, {0xe2db}, UCOL_LESS },
4456     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4457     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4458   };
4459   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4460 }
4461 
TestPrivateUseCharactersInRange(void)4462 static void TestPrivateUseCharactersInRange(void)
4463 {
4464   const char* strRules[] = {
4465     /* Range */
4466     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4467     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4468     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4469   };
4470 
4471   const static OneTestCase privateUseCharacterStrings[] = {
4472     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4473     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4474     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4475     { {0xe2da}, {0xe2db}, UCOL_LESS },
4476     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4477     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4478   };
4479   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4480 }
4481 
TestInvalidListsAndRanges(void)4482 static void TestInvalidListsAndRanges(void)
4483 {
4484   const char* invalidRules[] = {
4485     /* Range not in starred expression */
4486     "&\\ufffe<\\uffff-\\U00010002",
4487 
4488     /* Range without start */
4489     "&a<*-c",
4490 
4491     /* Range without end */
4492     "&a<*b-",
4493 
4494     /* More than one hyphen */
4495     "&a<*b-g-l",
4496 
4497     /* Range in the wrong order */
4498     "&a<*k-b",
4499 
4500   };
4501 
4502   UChar rule[500];
4503   UErrorCode status = U_ZERO_ERROR;
4504   UParseError parse_error;
4505   int n_rules = UPRV_LENGTHOF(invalidRules);
4506   int rule_no;
4507   int length;
4508   UCollator  *myCollation;
4509 
4510   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4511 
4512     length = u_unescape(invalidRules[rule_no], rule, 500);
4513     if (length == 0) {
4514         log_err("ERROR: The rule cannot be unescaped: %s\n");
4515         return;
4516     }
4517     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4518     (void)myCollation;      /* Suppress set but not used warning. */
4519     if(!U_FAILURE(status)){
4520       log_err("ERROR: Could not cause a failure as expected: \n");
4521     }
4522     status = U_ZERO_ERROR;
4523   }
4524 }
4525 
4526 /*
4527  * This test ensures that characters placed before a character in a different script have the same lead byte
4528  * in their collation key before and after script reordering.
4529  */
TestBeforeRuleWithScriptReordering(void)4530 static void TestBeforeRuleWithScriptReordering(void)
4531 {
4532     UParseError error;
4533     UErrorCode status = U_ZERO_ERROR;
4534     UCollator  *myCollation;
4535     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4536     UChar rules[500];
4537     uint32_t rulesLength = 0;
4538     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4539     UCollationResult collResult;
4540 
4541     uint8_t baseKey[256];
4542     uint32_t baseKeyLength;
4543     uint8_t beforeKey[256];
4544     uint32_t beforeKeyLength;
4545 
4546     UChar base[] = { 0x03b1 }; /* base */
4547     int32_t baseLen = UPRV_LENGTHOF(base);
4548 
4549     UChar before[] = { 0x0e01 }; /* ko kai */
4550     int32_t beforeLen = UPRV_LENGTHOF(before);
4551 
4552     /*UChar *data[] = { before, base };
4553     genericRulesStarter(srules, data, 2);*/
4554 
4555     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4556 
4557     (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4558     (void)baseKeyLength;
4559 
4560     /* build collator */
4561     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4562 
4563     rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
4564     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4565     if(U_FAILURE(status)) {
4566         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4567         return;
4568     }
4569 
4570     /* check collation results - before rule applied but not script reordering */
4571     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4572     if (collResult != UCOL_GREATER) {
4573         log_err("Collation result not correct before script reordering = %d\n", collResult);
4574     }
4575 
4576     /* check the lead byte of the collation keys before script reordering */
4577     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4578     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4579     if (baseKey[0] != beforeKey[0]) {
4580       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4581    }
4582 
4583     /* reorder the scripts */
4584     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4585     if(U_FAILURE(status)) {
4586         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4587         return;
4588     }
4589 
4590     /* check collation results - before rule applied and after script reordering */
4591     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4592     if (collResult != UCOL_GREATER) {
4593         log_err("Collation result not correct after script reordering = %d\n", collResult);
4594     }
4595 
4596     /* check the lead byte of the collation keys after script reordering */
4597     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4598     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4599     if (baseKey[0] != beforeKey[0]) {
4600         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4601     }
4602 
4603     ucol_close(myCollation);
4604 }
4605 
4606 /*
4607  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4608  */
TestNonLeadBytesDuringCollationReordering(void)4609 static void TestNonLeadBytesDuringCollationReordering(void)
4610 {
4611     UErrorCode status = U_ZERO_ERROR;
4612     UCollator  *myCollation;
4613     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4614 
4615     uint8_t baseKey[256];
4616     uint32_t baseKeyLength;
4617     uint8_t reorderKey[256];
4618     uint32_t reorderKeyLength;
4619 
4620     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4621 
4622     uint32_t i;
4623 
4624 
4625     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4626 
4627     /* build collator tertiary */
4628     myCollation = ucol_open("", &status);
4629     ucol_setStrength(myCollation, UCOL_TERTIARY);
4630     if(U_FAILURE(status)) {
4631         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4632         return;
4633     }
4634     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4635 
4636     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4637     if(U_FAILURE(status)) {
4638         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4639         return;
4640     }
4641     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4642 
4643     if (baseKeyLength != reorderKeyLength) {
4644         log_err("Key lengths not the same during reordering.\n");
4645         return;
4646     }
4647 
4648     for (i = 1; i < baseKeyLength; i++) {
4649         if (baseKey[i] != reorderKey[i]) {
4650             log_err("Collation key bytes not the same at position %d.\n", i);
4651             return;
4652         }
4653     }
4654     ucol_close(myCollation);
4655 
4656     /* build collator quaternary */
4657     myCollation = ucol_open("", &status);
4658     ucol_setStrength(myCollation, UCOL_QUATERNARY);
4659     if(U_FAILURE(status)) {
4660         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4661         return;
4662     }
4663     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4664 
4665     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4666     if(U_FAILURE(status)) {
4667         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4668         return;
4669     }
4670     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4671 
4672     if (baseKeyLength != reorderKeyLength) {
4673         log_err("Key lengths not the same during reordering.\n");
4674         return;
4675     }
4676 
4677     for (i = 1; i < baseKeyLength; i++) {
4678         if (baseKey[i] != reorderKey[i]) {
4679             log_err("Collation key bytes not the same at position %d.\n", i);
4680             return;
4681         }
4682     }
4683     ucol_close(myCollation);
4684 }
4685 
4686 /*
4687  * Test reordering API.
4688  */
TestReorderingAPI(void)4689 static void TestReorderingAPI(void)
4690 {
4691     UErrorCode status = U_ZERO_ERROR;
4692     UCollator  *myCollation;
4693     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4694     int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4695     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4696     int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4697     UCollationResult collResult;
4698     int32_t retrievedReorderCodesLength;
4699     int32_t retrievedReorderCodes[10];
4700     UChar greekString[] = { 0x03b1 };
4701     UChar punctuationString[] = { 0x203e };
4702     int loopIndex;
4703 
4704     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4705 
4706     /* build collator tertiary */
4707     myCollation = ucol_open("", &status);
4708     ucol_setStrength(myCollation, UCOL_TERTIARY);
4709     if(U_FAILURE(status)) {
4710         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4711         return;
4712     }
4713 
4714     /* set the reorderding */
4715     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4716     if (U_FAILURE(status)) {
4717         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4718         return;
4719     }
4720 
4721     /* get the reordering */
4722     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4723     if (status != U_BUFFER_OVERFLOW_ERROR) {
4724         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4725         return;
4726     }
4727     status = U_ZERO_ERROR;
4728     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4729         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4730         return;
4731     }
4732     /* now let's really get it */
4733     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4734     if (U_FAILURE(status)) {
4735         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4736         return;
4737     }
4738     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4739         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4740         return;
4741     }
4742     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4743         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4744             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4745             return;
4746         }
4747     }
4748     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4749     if (collResult != UCOL_LESS) {
4750         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4751         return;
4752     }
4753 
4754     /* clear the reordering */
4755     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4756     if (U_FAILURE(status)) {
4757         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4758         return;
4759     }
4760 
4761     /* get the reordering again */
4762     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4763     if (retrievedReorderCodesLength != 0) {
4764         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4765         return;
4766     }
4767 
4768     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4769     if (collResult != UCOL_GREATER) {
4770         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4771         return;
4772     }
4773 
4774     /* clear the reordering using [NONE] */
4775     ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4776     if (U_FAILURE(status)) {
4777         log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4778         return;
4779     }
4780 
4781     /* get the reordering again */
4782     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4783     if (retrievedReorderCodesLength != 0) {
4784         log_err_status(status,
4785                        "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4786                        retrievedReorderCodesLength);
4787         return;
4788     }
4789 
4790     /* test for error condition on duplicate reorder codes */
4791     ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4792     if (!U_FAILURE(status)) {
4793         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4794         return;
4795     }
4796 
4797     status = U_ZERO_ERROR;
4798     /* test for reorder codes after a reset code */
4799     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4800     if (!U_FAILURE(status)) {
4801         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4802         return;
4803     }
4804 
4805     ucol_close(myCollation);
4806 }
4807 
4808 /*
4809  * Test reordering API.
4810  */
TestReorderingAPIWithRuleCreatedCollator(void)4811 static void TestReorderingAPIWithRuleCreatedCollator(void)
4812 {
4813     UErrorCode status = U_ZERO_ERROR;
4814     UCollator  *myCollation;
4815     UChar rules[90];
4816     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4817     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4818     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4819     UCollationResult collResult;
4820     int32_t retrievedReorderCodesLength;
4821     int32_t retrievedReorderCodes[10];
4822     static const UChar greekString[] = { 0x03b1 };
4823     static const UChar punctuationString[] = { 0x203e };
4824     static const UChar hanString[] = { 0x65E5, 0x672C };
4825     int loopIndex;
4826 
4827     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4828 
4829     /* build collator from rules */
4830     u_uastrcpy(rules, "[reorder Hani Grek]");
4831     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4832     if(U_FAILURE(status)) {
4833         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4834         return;
4835     }
4836 
4837     /* get the reordering */
4838     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4839     if (U_FAILURE(status)) {
4840         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4841         return;
4842     }
4843     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4844         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4845         return;
4846     }
4847     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4848         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4849             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4850             return;
4851         }
4852     }
4853     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4854     if (collResult != UCOL_GREATER) {
4855         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4856         return;
4857     }
4858 
4859     /* set the reordering */
4860     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4861     if (U_FAILURE(status)) {
4862         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4863         return;
4864     }
4865 
4866     /* get the reordering */
4867     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4868     if (status != U_BUFFER_OVERFLOW_ERROR) {
4869         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4870         return;
4871     }
4872     status = U_ZERO_ERROR;
4873     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4874         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4875         return;
4876     }
4877     /* now let's really get it */
4878     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4879     if (U_FAILURE(status)) {
4880         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4881         return;
4882     }
4883     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4884         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4885         return;
4886     }
4887     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4888         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4889             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4890             return;
4891         }
4892     }
4893     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4894     if (collResult != UCOL_LESS) {
4895         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4896         return;
4897     }
4898 
4899     /* clear the reordering */
4900     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4901     if (U_FAILURE(status)) {
4902         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4903         return;
4904     }
4905 
4906     /* get the reordering again */
4907     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4908     if (retrievedReorderCodesLength != 0) {
4909         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4910         return;
4911     }
4912 
4913     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4914     if (collResult != UCOL_GREATER) {
4915         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4916         return;
4917     }
4918 
4919     /* reset the reordering */
4920     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4921     if (U_FAILURE(status)) {
4922         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4923         return;
4924     }
4925     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4926     if (U_FAILURE(status)) {
4927         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4928         return;
4929     }
4930     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4931         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4932         return;
4933     }
4934     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4935         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4936             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4937             return;
4938         }
4939     }
4940 
4941     ucol_close(myCollation);
4942 }
4943 
containsExpectedScript(const int32_t scripts[],int32_t length,int32_t expectedScript)4944 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4945     int32_t i;
4946     for (i = 0; i < length; ++i) {
4947         if (expectedScript == scripts[i]) { return TRUE; }
4948     }
4949     return FALSE;
4950 }
4951 
TestEquivalentReorderingScripts(void)4952 static void TestEquivalentReorderingScripts(void) {
4953     // Beginning with ICU 55, collation reordering moves single scripts
4954     // rather than groups of scripts,
4955     // except where scripts share a range and sort primary-equal.
4956     UErrorCode status = U_ZERO_ERROR;
4957     int32_t equivalentScripts[100];
4958     int32_t length;
4959     int i;
4960     int32_t prevScript;
4961     /* These scripts are expected to be equivalent. */
4962     static const int32_t expectedScripts[] = {
4963         USCRIPT_HIRAGANA,
4964         USCRIPT_KATAKANA,
4965         USCRIPT_KATAKANA_OR_HIRAGANA
4966     };
4967 
4968     equivalentScripts[0] = 0;
4969     length = ucol_getEquivalentReorderCodes(
4970             USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4971     if (U_FAILURE(status)) {
4972         log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4973         return;
4974     }
4975     if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4976         log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4977                 "length expected 1, was = %d; expected [%d] was [%d]\n",
4978                 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4979     }
4980 
4981     length = ucol_getEquivalentReorderCodes(
4982             USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4983     if (U_FAILURE(status)) {
4984         log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4985         return;
4986     }
4987     if (length != UPRV_LENGTHOF(expectedScripts)) {
4988         log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4989                 "expected %d, was = %d\n",
4990                 UPRV_LENGTHOF(expectedScripts), length);
4991     }
4992     prevScript = -1;
4993     for (i = 0; i < length; ++i) {
4994         int32_t script = equivalentScripts[i];
4995         if (script <= prevScript) {
4996             log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4997         }
4998         prevScript = script;
4999     }
5000     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5001         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5002             log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5003                     expectedScripts[i]);
5004         }
5005     }
5006 
5007     length = ucol_getEquivalentReorderCodes(
5008             USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5009     if (U_FAILURE(status)) {
5010         log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5011         return;
5012     }
5013     if (length != UPRV_LENGTHOF(expectedScripts)) {
5014         log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5015                 "expected %d, was = %d\n",
5016                 UPRV_LENGTHOF(expectedScripts), length);
5017     }
5018     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5019         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5020             log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5021                     expectedScripts[i]);
5022         }
5023     }
5024 
5025     length = ucol_getEquivalentReorderCodes(
5026             USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5027     if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
5028         log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5029                 "expected %d, was = %d\n",
5030                 UPRV_LENGTHOF(expectedScripts), length);
5031     }
5032 
5033     length = ucol_getEquivalentReorderCodes(
5034             USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5035     if (U_FAILURE(status) || length != 3) {
5036         log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5037                 "expected 3, was = %d\n", length);
5038     }
5039     length = ucol_getEquivalentReorderCodes(
5040             USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5041     if (U_FAILURE(status) || length != 3) {
5042         log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5043                 "expected 3, was = %d\n", length);
5044     }
5045     length = ucol_getEquivalentReorderCodes(
5046             USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5047     if (U_FAILURE(status) || length != 3) {
5048         log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5049                 "expected 3, was = %d\n", length);
5050     }
5051 
5052     length = ucol_getEquivalentReorderCodes(
5053             USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5054     if (U_FAILURE(status) || length != 2) {
5055         log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5056                 "expected 2, was = %d\n", length);
5057     }
5058     length = ucol_getEquivalentReorderCodes(
5059             USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5060     if (U_FAILURE(status) || length != 2) {
5061         log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5062                 "expected 2, was = %d\n", length);
5063     }
5064 }
5065 
TestReorderingAcrossCloning(void)5066 static void TestReorderingAcrossCloning(void)
5067 {
5068     UErrorCode status = U_ZERO_ERROR;
5069     UCollator  *myCollation;
5070     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5071     UCollator *clonedCollation;
5072     int32_t retrievedReorderCodesLength;
5073     int32_t retrievedReorderCodes[10];
5074     int loopIndex;
5075 
5076     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5077 
5078     /* build collator tertiary */
5079     myCollation = ucol_open("", &status);
5080     ucol_setStrength(myCollation, UCOL_TERTIARY);
5081     if(U_FAILURE(status)) {
5082         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5083         return;
5084     }
5085 
5086     /* set the reorderding */
5087     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5088     if (U_FAILURE(status)) {
5089         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5090         return;
5091     }
5092 
5093     /* clone the collator */
5094     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5095     if (U_FAILURE(status)) {
5096         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5097         return;
5098     }
5099 
5100     /* get the reordering */
5101     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
5102     if (U_FAILURE(status)) {
5103         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5104         return;
5105     }
5106     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5107         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
5108         return;
5109     }
5110     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5111         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5112             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5113             return;
5114         }
5115     }
5116 
5117     /*uprv_free(buffer);*/
5118     ucol_close(myCollation);
5119     ucol_close(clonedCollation);
5120 }
5121 
5122 /*
5123  * Utility function to test one collation reordering test case set.
5124  * @param testcases Array of test cases.
5125  * @param n_testcases Size of the array testcases.
5126  * @param reorderTokens Array of reordering codes.
5127  * @param reorderTokensLen Size of the array reorderTokens.
5128  */
doTestOneReorderingAPITestCase(const OneTestCase testCases[],uint32_t testCasesLen,const int32_t reorderTokens[],int32_t reorderTokensLen)5129 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5130 {
5131     uint32_t testCaseNum;
5132     UErrorCode status = U_ZERO_ERROR;
5133     UCollator  *myCollation;
5134 
5135     myCollation = ucol_open("", &status);
5136     if (U_FAILURE(status)) {
5137         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5138         return;
5139     }
5140     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5141     if(U_FAILURE(status)) {
5142         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5143         return;
5144     }
5145 
5146     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5147         doTest(myCollation,
5148             testCases[testCaseNum].source,
5149             testCases[testCaseNum].target,
5150             testCases[testCaseNum].result
5151         );
5152     }
5153     ucol_close(myCollation);
5154 }
5155 
TestGreekFirstReorder(void)5156 static void TestGreekFirstReorder(void)
5157 {
5158     const char* strRules[] = {
5159         "[reorder Grek]"
5160     };
5161 
5162     const int32_t apiRules[] = {
5163         USCRIPT_GREEK
5164     };
5165 
5166     const static OneTestCase privateUseCharacterStrings[] = {
5167         { {0x0391}, {0x0391}, UCOL_EQUAL },
5168         { {0x0041}, {0x0391}, UCOL_GREATER },
5169         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5170         { {0x0060}, {0x0391}, UCOL_LESS },
5171         { {0x0391}, {0xe2dc}, UCOL_LESS },
5172         { {0x0391}, {0x0060}, UCOL_GREATER },
5173     };
5174 
5175     /* Test rules creation */
5176     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5177 
5178     /* Test collation reordering API */
5179     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5180 }
5181 
TestGreekLastReorder(void)5182 static void TestGreekLastReorder(void)
5183 {
5184     const char* strRules[] = {
5185         "[reorder Zzzz Grek]"
5186     };
5187 
5188     const int32_t apiRules[] = {
5189         USCRIPT_UNKNOWN, USCRIPT_GREEK
5190     };
5191 
5192     const static OneTestCase privateUseCharacterStrings[] = {
5193         { {0x0391}, {0x0391}, UCOL_EQUAL },
5194         { {0x0041}, {0x0391}, UCOL_LESS },
5195         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5196         { {0x0060}, {0x0391}, UCOL_LESS },
5197         { {0x0391}, {0xe2dc}, UCOL_GREATER },
5198     };
5199 
5200     /* Test rules creation */
5201     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5202 
5203     /* Test collation reordering API */
5204     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5205 }
5206 
TestNonScriptReorder(void)5207 static void TestNonScriptReorder(void)
5208 {
5209     const char* strRules[] = {
5210         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5211     };
5212 
5213     const int32_t apiRules[] = {
5214         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5215         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5216         UCOL_REORDER_CODE_CURRENCY
5217     };
5218 
5219     const static OneTestCase privateUseCharacterStrings[] = {
5220         { {0x0391}, {0x0041}, UCOL_LESS },
5221         { {0x0041}, {0x0391}, UCOL_GREATER },
5222         { {0x0060}, {0x0041}, UCOL_LESS },
5223         { {0x0060}, {0x0391}, UCOL_GREATER },
5224         { {0x0024}, {0x0041}, UCOL_GREATER },
5225     };
5226 
5227     /* Test rules creation */
5228     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5229 
5230     /* Test collation reordering API */
5231     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5232 }
5233 
TestHaniReorder(void)5234 static void TestHaniReorder(void)
5235 {
5236     const char* strRules[] = {
5237         "[reorder Hani]"
5238     };
5239     const int32_t apiRules[] = {
5240         USCRIPT_HAN
5241     };
5242 
5243     const static OneTestCase privateUseCharacterStrings[] = {
5244         { {0x4e00}, {0x0041}, UCOL_LESS },
5245         { {0x4e00}, {0x0060}, UCOL_GREATER },
5246         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5247         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5248         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5249         { {0xfa27}, {0x0041}, UCOL_LESS },
5250         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5251     };
5252 
5253     /* Test rules creation */
5254     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5255 
5256     /* Test collation reordering API */
5257     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5258 }
5259 
TestHaniReorderWithOtherRules(void)5260 static void TestHaniReorderWithOtherRules(void)
5261 {
5262     const char* strRules[] = {
5263         "[reorder Hani] &b<a"
5264     };
5265     /*const int32_t apiRules[] = {
5266         USCRIPT_HAN
5267     };*/
5268 
5269     const static OneTestCase privateUseCharacterStrings[] = {
5270         { {0x4e00}, {0x0041}, UCOL_LESS },
5271         { {0x4e00}, {0x0060}, UCOL_GREATER },
5272         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5273         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5274         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5275         { {0xfa27}, {0x0041}, UCOL_LESS },
5276         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5277         { {0x0062}, {0x0061}, UCOL_LESS },
5278     };
5279 
5280     /* Test rules creation */
5281     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5282 }
5283 
TestMultipleReorder(void)5284 static void TestMultipleReorder(void)
5285 {
5286     const char* strRules[] = {
5287         "[reorder Grek Zzzz DIGIT Latn Hani]"
5288     };
5289 
5290     const int32_t apiRules[] = {
5291         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5292     };
5293 
5294     const static OneTestCase collationTestCases[] = {
5295         { {0x0391}, {0x0041}, UCOL_LESS},
5296         { {0x0031}, {0x0041}, UCOL_LESS},
5297         { {0x0041}, {0x4e00}, UCOL_LESS},
5298     };
5299 
5300     /* Test rules creation */
5301     doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
5302 
5303     /* Test collation reordering API */
5304     doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
5305 }
5306 
5307 /*
5308  * Test that covers issue reported in ticket 8814
5309  */
TestReorderWithNumericCollation(void)5310 static void TestReorderWithNumericCollation(void)
5311 {
5312     UErrorCode status = U_ZERO_ERROR;
5313     UCollator  *myCollation;
5314     UCollator  *myReorderCollation;
5315     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5316     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5317     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5318     UChar fortyS[] = { 0x0053 };
5319     UChar fortyThreeP[] = { 0x0050 };
5320     uint8_t fortyS_sortKey[128];
5321     int32_t fortyS_sortKey_Length;
5322     uint8_t fortyThreeP_sortKey[128];
5323     int32_t fortyThreeP_sortKey_Length;
5324     uint8_t fortyS_sortKey_reorder[128];
5325     int32_t fortyS_sortKey_reorder_Length;
5326     uint8_t fortyThreeP_sortKey_reorder[128];
5327     int32_t fortyThreeP_sortKey_reorder_Length;
5328     UCollationResult collResult;
5329     UCollationResult collResultReorder;
5330 
5331     log_verbose("Testing reordering with and without numeric collation\n");
5332 
5333     /* build collator tertiary with numeric */
5334     myCollation = ucol_open("", &status);
5335     /*
5336     ucol_setStrength(myCollation, UCOL_TERTIARY);
5337     */
5338     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5339     if(U_FAILURE(status)) {
5340         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5341         return;
5342     }
5343 
5344     /* build collator tertiary with numeric and reordering */
5345     myReorderCollation = ucol_open("", &status);
5346     /*
5347     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5348     */
5349     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5350     ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5351     if(U_FAILURE(status)) {
5352         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5353         return;
5354     }
5355 
5356     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5357     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5358     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5359     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5360 
5361     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5362         log_err_status(status, "ERROR: couldn't generate sort keys\n");
5363         return;
5364     }
5365     collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5366     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5367     /*
5368     fprintf(stderr, "\tcollResult = %x\n", collResult);
5369     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5370     fprintf(stderr, "\nfortyS\n");
5371     for (i = 0; i < fortyS_sortKey_Length; i++) {
5372         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5373     }
5374     fprintf(stderr, "\nfortyThreeP\n");
5375     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5376         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5377     }
5378     */
5379     if (collResult != collResultReorder) {
5380         log_err_status(status, "ERROR: collation results should have been the same.\n");
5381         return;
5382     }
5383 
5384     ucol_close(myCollation);
5385     ucol_close(myReorderCollation);
5386 }
5387 
compare_uint8_t_arrays(const uint8_t * a,const uint8_t * b)5388 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5389 {
5390   for (; *a == *b; ++a, ++b) {
5391     if (*a == 0) {
5392       return 0;
5393     }
5394   }
5395   return (*a < *b ? -1 : 1);
5396 }
5397 
TestImportRulesDeWithPhonebook(void)5398 static void TestImportRulesDeWithPhonebook(void)
5399 {
5400   const char* normalRules[] = {
5401     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5402     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5403     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5404   };
5405   const OneTestCase normalTests[] = {
5406     { {0x00e6}, {0x00c6}, UCOL_LESS},
5407     { {0x00fc}, {0x00dc}, UCOL_GREATER},
5408   };
5409 
5410   const char* importRules[] = {
5411     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5412     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5413     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5414   };
5415   const OneTestCase importTests[] = {
5416     { {0x00e6}, {0x00c6}, UCOL_LESS},
5417     { {0x00fc}, {0x00dc}, UCOL_LESS},
5418   };
5419 
5420   doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5421   doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
5422 }
5423 
5424 #if 0
5425 static void TestImportRulesFiWithEor(void)
5426 {
5427   /* DUCET. */
5428   const char* defaultRules[] = {
5429     "&a<b",                                    /* Dummy rule. */
5430   };
5431 
5432   const OneTestCase defaultTests[] = {
5433     { {0x0110}, {0x00F0}, UCOL_LESS},
5434     { {0x00a3}, {0x00a5}, UCOL_LESS},
5435     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5436   };
5437 
5438   /* European Ordering rules: ignore currency characters. */
5439   const char* eorRules[] = {
5440     "[import root-u-co-eor]",
5441   };
5442 
5443   const OneTestCase eorTests[] = {
5444     { {0x0110}, {0x00F0}, UCOL_LESS},
5445     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5446     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5447   };
5448 
5449   const char* fiStdRules[] = {
5450     "[import fi-u-co-standard]",
5451   };
5452 
5453   const OneTestCase fiStdTests[] = {
5454     { {0x0110}, {0x00F0}, UCOL_GREATER},
5455     { {0x00a3}, {0x00a5}, UCOL_LESS},
5456     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5457   };
5458 
5459   /* Both European Ordering Rules and Fi Standard Rules. */
5460   const char* eorFiStdRules[] = {
5461     "[import root-u-co-eor][import fi-u-co-standard]",
5462   };
5463 
5464   /* This is essentially same as the one before once fi.txt is updated with import. */
5465   const char* fiEorRules[] = {
5466     "[import fi-u-co-eor]",
5467   };
5468 
5469   const OneTestCase fiEorTests[] = {
5470     { {0x0110}, {0x00F0}, UCOL_GREATER},
5471     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5472     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5473   };
5474 
5475   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5476   doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5477   doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5478   doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
5479 
5480   log_knownIssue("8962", NULL);
5481   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5482         eor{
5483             Sequence{
5484                 "[import root-u-co-eor][import fi-u-co-standard]"
5485             }
5486             Version{"21.0"}
5487         }
5488   */
5489   /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5490 
5491 }
5492 #endif
5493 
5494 #if 0
5495 /*
5496  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5497  * the resource files are built with -includeUnihanColl option.
5498  * TODO: Uncomment this function and make it work when unihan rules are built by default.
5499  */
5500 static void TestImportRulesCJKWithUnihan(void)
5501 {
5502   /* DUCET. */
5503   const char* defaultRules[] = {
5504     "&a<b",                                    /* Dummy rule. */
5505   };
5506 
5507   const OneTestCase defaultTests[] = {
5508     { {0x3402}, {0x4e1e}, UCOL_GREATER},
5509   };
5510 
5511   /* European Ordering rules: ignore currency characters. */
5512   const char* unihanRules[] = {
5513     "[import ko-u-co-unihan]",
5514   };
5515 
5516   const OneTestCase unihanTests[] = {
5517     { {0x3402}, {0x4e1e}, UCOL_LESS},
5518   };
5519 
5520   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5521   doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
5522 
5523 }
5524 #endif
5525 
TestImport(void)5526 static void TestImport(void)
5527 {
5528     UCollator* vicoll;
5529     UCollator* escoll;
5530     UCollator* viescoll;
5531     UCollator* importviescoll;
5532     UParseError error;
5533     UErrorCode status = U_ZERO_ERROR;
5534     UChar* virules;
5535     int32_t viruleslength;
5536     UChar* esrules;
5537     int32_t esruleslength;
5538     UChar* viesrules;
5539     int32_t viesruleslength;
5540     char srules[500] = "[import vi][import es]";
5541     UChar rules[500];
5542     uint32_t length = 0;
5543     int32_t itemCount;
5544     int32_t i, k;
5545     UChar32 start;
5546     UChar32 end;
5547     UChar str[500];
5548     int32_t strLength;
5549 
5550     uint8_t sk1[500];
5551     uint8_t sk2[500];
5552 
5553     UBool b;
5554     USet* tailoredSet;
5555     USet* importTailoredSet;
5556 
5557 
5558     vicoll = ucol_open("vi", &status);
5559     if(U_FAILURE(status)){
5560         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5561         return;
5562     }
5563 
5564     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5565     if(viruleslength == 0) {
5566         log_data_err("missing vi tailoring rule string\n");
5567         ucol_close(vicoll);
5568         return;
5569     }
5570     escoll = ucol_open("es", &status);
5571     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5572     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5573     viesrules[0] = 0;
5574     u_strcat(viesrules, virules);
5575     u_strcat(viesrules, esrules);
5576     viesruleslength = viruleslength + esruleslength;
5577     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5578 
5579     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5580     length = u_unescape(srules, rules, 500);
5581     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5582     if(U_FAILURE(status)){
5583         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5584         return;
5585     }
5586 
5587     tailoredSet = ucol_getTailoredSet(viescoll, &status);
5588     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5589 
5590     if(!uset_equals(tailoredSet, importTailoredSet)){
5591         log_err("Tailored sets not equal");
5592     }
5593 
5594     uset_close(importTailoredSet);
5595 
5596     itemCount = uset_getItemCount(tailoredSet);
5597 
5598     for( i = 0; i < itemCount; i++){
5599         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5600         if(strLength < 2){
5601             for (; start <= end; start++){
5602                 k = 0;
5603                 U16_APPEND(str, k, 500, start, b);
5604                 (void)b;    /* Suppress set but not used warning. */
5605                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5606                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5607                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5608                     log_err("Sort key for %s not equal\n", str);
5609                     break;
5610                 }
5611             }
5612         }else{
5613             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5614             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5615             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5616                 log_err("ZZSort key for %s not equal\n", str);
5617                 break;
5618             }
5619 
5620         }
5621     }
5622 
5623     uset_close(tailoredSet);
5624 
5625     uprv_free(viesrules);
5626 
5627     ucol_close(vicoll);
5628     ucol_close(escoll);
5629     ucol_close(viescoll);
5630     ucol_close(importviescoll);
5631 }
5632 
TestImportWithType(void)5633 static void TestImportWithType(void)
5634 {
5635     UCollator* vicoll;
5636     UCollator* decoll;
5637     UCollator* videcoll;
5638     UCollator* importvidecoll;
5639     UParseError error;
5640     UErrorCode status = U_ZERO_ERROR;
5641     const UChar* virules;
5642     int32_t viruleslength;
5643     const UChar* derules;
5644     int32_t deruleslength;
5645     UChar* viderules;
5646     int32_t videruleslength;
5647     const char srules[500] = "[import vi][import de-u-co-phonebk]";
5648     UChar rules[500];
5649     uint32_t length = 0;
5650     int32_t itemCount;
5651     int32_t i, k;
5652     UChar32 start;
5653     UChar32 end;
5654     UChar str[500];
5655     int32_t strLength;
5656 
5657     uint8_t sk1[500];
5658     uint8_t sk2[500];
5659 
5660     USet* tailoredSet;
5661     USet* importTailoredSet;
5662 
5663     vicoll = ucol_open("vi", &status);
5664     if(U_FAILURE(status)){
5665         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5666         return;
5667     }
5668     virules = ucol_getRules(vicoll, &viruleslength);
5669     if(viruleslength == 0) {
5670         log_data_err("missing vi tailoring rule string\n");
5671         ucol_close(vicoll);
5672         return;
5673     }
5674     /* decoll = ucol_open("de@collation=phonebook", &status); */
5675     decoll = ucol_open("de-u-co-phonebk", &status);
5676     if(U_FAILURE(status)){
5677         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5678         return;
5679     }
5680 
5681 
5682     derules = ucol_getRules(decoll, &deruleslength);
5683     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5684     viderules[0] = 0;
5685     u_strcat(viderules, virules);
5686     u_strcat(viderules, derules);
5687     videruleslength = viruleslength + deruleslength;
5688     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5689 
5690     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5691     length = u_unescape(srules, rules, 500);
5692     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5693     if(U_FAILURE(status)){
5694         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5695         return;
5696     }
5697 
5698     tailoredSet = ucol_getTailoredSet(videcoll, &status);
5699     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5700 
5701     if(!uset_equals(tailoredSet, importTailoredSet)){
5702         log_err("Tailored sets not equal");
5703     }
5704 
5705     uset_close(importTailoredSet);
5706 
5707     itemCount = uset_getItemCount(tailoredSet);
5708 
5709     for( i = 0; i < itemCount; i++){
5710         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5711         if(strLength < 2){
5712             for (; start <= end; start++){
5713                 k = 0;
5714                 U16_APPEND_UNSAFE(str, k, start);
5715                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5716                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5717                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5718                     log_err("Sort key for %s not equal\n", str);
5719                     break;
5720                 }
5721             }
5722         }else{
5723             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5724             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5725             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5726                 log_err("Sort key for %s not equal\n", str);
5727                 break;
5728             }
5729 
5730         }
5731     }
5732 
5733     uset_close(tailoredSet);
5734 
5735     uprv_free(viderules);
5736 
5737     ucol_close(videcoll);
5738     ucol_close(importvidecoll);
5739     ucol_close(vicoll);
5740     ucol_close(decoll);
5741 }
5742 
5743 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5744 static const UChar longUpperStr1[]= { /* 155 chars */
5745     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5746     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5747     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5748     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5749     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5750     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5751     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5752     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5753     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5754     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5755 };
5756 
5757 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5758 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5759     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5760     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5763     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5764 };
5765 
5766 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5767 static const UChar longUpperStr3[]= { /* 324 chars */
5768     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5769     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5780 };
5781 
5782 typedef struct {
5783     const UChar * longUpperStrPtr;
5784     int32_t       longUpperStrLen;
5785 } LongUpperStrItem;
5786 
5787 /* String pointers must be in reverse collation order of the corresponding strings */
5788 static const LongUpperStrItem longUpperStrItems[] = {
5789     { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5790     { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5791     { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
5792     { NULL,          0                           }
5793 };
5794 
5795 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5796 
5797 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
TestCaseLevelBufferOverflow(void)5798 static void TestCaseLevelBufferOverflow(void)
5799 {
5800     UErrorCode status = U_ZERO_ERROR;
5801     UCollator * ucol = ucol_open("root", &status);
5802     if ( U_SUCCESS(status) ) {
5803         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5804         if ( U_SUCCESS(status) ) {
5805             const LongUpperStrItem * itemPtr;
5806             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5807             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5808                 int32_t sortKeyLen;
5809                 if (itemPtr > longUpperStrItems) {
5810                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5811                 }
5812                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5813                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5814                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5815                     break;
5816                 }
5817                 if ( itemPtr > longUpperStrItems ) {
5818                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5819                     if (compareResult >= 0) {
5820                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5821                     }
5822                 }
5823             }
5824         } else {
5825             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5826         }
5827         ucol_close(ucol);
5828     } else {
5829         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5830     }
5831 }
5832 
5833 /* Test for #10595 */
5834 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5835 #define KEY_PART_SIZE 16
5836 
TestNextSortKeyPartJaIdentical(void)5837 static void TestNextSortKeyPartJaIdentical(void)
5838 {
5839     UErrorCode status = U_ZERO_ERROR;
5840     UCollator *coll;
5841     uint8_t keyPart[KEY_PART_SIZE];
5842     UCharIterator iter;
5843     uint32_t state[2] = {0, 0};
5844     int32_t keyPartLen;
5845 
5846     coll = ucol_open("ja", &status);
5847     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5848     if (U_FAILURE(status)) {
5849         log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5850         return;
5851     }
5852 
5853     uiter_setString(&iter, testJapaneseName, 5);
5854     keyPartLen = KEY_PART_SIZE;
5855     while (keyPartLen == KEY_PART_SIZE) {
5856         keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5857         if (U_FAILURE(status)) {
5858             log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5859             break;
5860         }
5861     }
5862 
5863     ucol_close(coll);
5864 }
5865 
5866 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5867 
addMiscCollTest(TestNode ** root)5868 void addMiscCollTest(TestNode** root)
5869 {
5870     TEST(TestRuleOptions);
5871     TEST(TestBeforePrefixFailure);
5872     TEST(TestContractionClosure);
5873     TEST(TestPrefixCompose);
5874     TEST(TestStrCollIdenticalPrefix);
5875     TEST(TestPrefix);
5876     TEST(TestNewJapanese);
5877     /*TEST(TestLimitations);*/
5878     TEST(TestNonChars);
5879     TEST(TestExtremeCompression);
5880     TEST(TestSurrogates);
5881     TEST(TestVariableTopSetting);
5882     TEST(TestMaxVariable);
5883     TEST(TestBocsuCoverage);
5884     TEST(TestCyrillicTailoring);
5885     TEST(TestCase);
5886     TEST(IncompleteCntTest);
5887     TEST(BlackBirdTest);
5888     TEST(FunkyATest);
5889     TEST(BillFairmanTest);
5890     TEST(TestChMove);
5891     TEST(TestImplicitTailoring);
5892     TEST(TestFCDProblem);
5893     TEST(TestEmptyRule);
5894     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5895     TEST(TestJ815);
5896     TEST(TestUpperCaseFirst);
5897     TEST(TestBefore);
5898     TEST(TestHangulTailoring);
5899     TEST(TestUCARules);
5900     TEST(TestIncrementalNormalize);
5901     TEST(TestComposeDecompose);
5902     TEST(TestCompressOverlap);
5903     TEST(TestContraction);
5904     TEST(TestExpansion);
5905     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5906     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5907     TEST(TestOptimize);
5908     TEST(TestSuppressContractions);
5909     TEST(Alexis2);
5910     TEST(TestHebrewUCA);
5911     TEST(TestPartialSortKeyTermination);
5912     TEST(TestSettings);
5913     TEST(TestEquals);
5914     TEST(TestJ2726);
5915     TEST(NullRule);
5916     TEST(TestNumericCollation);
5917     TEST(TestTibetanConformance);
5918     TEST(TestPinyinProblem);
5919     TEST(TestSeparateTrees);
5920     TEST(TestBeforePinyin);
5921     TEST(TestBeforeTightening);
5922     /*TEST(TestMoreBefore);*/
5923     TEST(TestTailorNULL);
5924     TEST(TestUpperFirstQuaternary);
5925     TEST(TestJ4960);
5926     TEST(TestJ5223);
5927     TEST(TestJ5232);
5928     TEST(TestJ5367);
5929     TEST(TestHiragana);
5930     TEST(TestSortKeyConsistency);
5931     TEST(TestVI5913);  /* VI, RO tailored rules */
5932     TEST(TestCroatianSortKey);
5933     TEST(TestTailor6179);
5934     TEST(TestUCAPrecontext);
5935     TEST(TestOutOfBuffer5468);
5936     TEST(TestSameStrengthList);
5937 
5938     TEST(TestSameStrengthListQuoted);
5939     TEST(TestSameStrengthListSupplemental);
5940     TEST(TestSameStrengthListQwerty);
5941     TEST(TestSameStrengthListQuotedQwerty);
5942     TEST(TestSameStrengthListRanges);
5943     TEST(TestSameStrengthListSupplementalRanges);
5944     TEST(TestSpecialCharacters);
5945     TEST(TestPrivateUseCharacters);
5946     TEST(TestPrivateUseCharactersInList);
5947     TEST(TestPrivateUseCharactersInRange);
5948     TEST(TestInvalidListsAndRanges);
5949     TEST(TestImportRulesDeWithPhonebook);
5950     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5951     /* TEST(TestImportRulesCJKWithUnihan); */
5952     TEST(TestImport);
5953     TEST(TestImportWithType);
5954 
5955     TEST(TestBeforeRuleWithScriptReordering);
5956     TEST(TestNonLeadBytesDuringCollationReordering);
5957     TEST(TestReorderingAPI);
5958     TEST(TestReorderingAPIWithRuleCreatedCollator);
5959     TEST(TestEquivalentReorderingScripts);
5960     TEST(TestGreekFirstReorder);
5961     TEST(TestGreekLastReorder);
5962     TEST(TestNonScriptReorder);
5963     TEST(TestHaniReorder);
5964     TEST(TestHaniReorderWithOtherRules);
5965     TEST(TestMultipleReorder);
5966     TEST(TestReorderingAcrossCloning);
5967     TEST(TestReorderWithNumericCollation);
5968 
5969     TEST(TestCaseLevelBufferOverflow);
5970     TEST(TestNextSortKeyPartJaIdentical);
5971 }
5972 
5973 #endif /* #if !UCONFIG_NO_COLLATION */
5974