• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /********************************************************************
3  * COPYRIGHT:
4  * Copyright (c) 2001-2015, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  ********************************************************************/
7 /*******************************************************************************
8 *
9 * File cmsccoll.C
10 *
11 *******************************************************************************/
12 /**
13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
14  * to fit.
15  */
16 
17 #include <stdio.h>
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_COLLATION
22 
23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h"
26 #include "cintltst.h"
27 #include "ccolltst.h"
28 #include "callcoll.h"
29 #include "unicode/ustring.h"
30 #include "string.h"
31 #include "ucol_imp.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "uassert.h"
35 #include "unicode/parseerr.h"
36 #include "unicode/ucnv.h"
37 #include "unicode/ures.h"
38 #include "unicode/uscript.h"
39 #include "unicode/utf16.h"
40 #include "uparse.h"
41 #include "putilimp.h"
42 
43 
44 #define LEN(a) (sizeof(a)/sizeof(a[0]))
45 
46 #define MAX_TOKEN_LEN 16
47 
48 typedef UCollationResult tst_strcoll(void *collator, const int object,
49                         const UChar *source, const int sLen,
50                         const UChar *target, const int tLen);
51 
52 
53 
54 const static char cnt1[][10] = {
55 
56   "AA",
57   "AC",
58   "AZ",
59   "AQ",
60   "AB",
61   "ABZ",
62   "ABQ",
63   "Z",
64   "ABC",
65   "Q",
66   "B"
67 };
68 
69 const static char cnt2[][10] = {
70   "DA",
71   "DAD",
72   "DAZ",
73   "MAR",
74   "Z",
75   "DAVIS",
76   "MARK",
77   "DAV",
78   "DAVI"
79 };
80 
IncompleteCntTest(void)81 static void IncompleteCntTest(void)
82 {
83   UErrorCode status = U_ZERO_ERROR;
84   UChar temp[90];
85   UChar t1[90];
86   UChar t2[90];
87 
88   UCollator *coll =  NULL;
89   uint32_t i = 0, j = 0;
90   uint32_t size = 0;
91 
92   u_uastrcpy(temp, " & Z < ABC < Q < B");
93 
94   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
95 
96   if(U_SUCCESS(status)) {
97     size = sizeof(cnt1)/sizeof(cnt1[0]);
98     for(i = 0; i < size-1; i++) {
99       for(j = i+1; j < size; j++) {
100         UCollationElements *iter;
101         u_uastrcpy(t1, cnt1[i]);
102         u_uastrcpy(t2, cnt1[j]);
103         doTest(coll, t1, t2, UCOL_LESS);
104         /* synwee : added collation element iterator test */
105         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
106         if (U_FAILURE(status)) {
107           log_err("Creation of iterator failed\n");
108           break;
109         }
110         backAndForth(iter);
111         ucol_closeElements(iter);
112       }
113     }
114   }
115 
116   ucol_close(coll);
117 
118 
119   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
120   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
121 
122   if(U_SUCCESS(status)) {
123     size = sizeof(cnt2)/sizeof(cnt2[0]);
124     for(i = 0; i < size-1; i++) {
125       for(j = i+1; j < size; j++) {
126         UCollationElements *iter;
127         u_uastrcpy(t1, cnt2[i]);
128         u_uastrcpy(t2, cnt2[j]);
129         doTest(coll, t1, t2, UCOL_LESS);
130 
131         /* synwee : added collation element iterator test */
132         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
133         if (U_FAILURE(status)) {
134           log_err("Creation of iterator failed\n");
135           break;
136         }
137         backAndForth(iter);
138         ucol_closeElements(iter);
139       }
140     }
141   }
142 
143   ucol_close(coll);
144 
145 
146 }
147 
148 const static char shifted[][20] = {
149   "black bird",
150   "black-bird",
151   "blackbird",
152   "black Bird",
153   "black-Bird",
154   "blackBird",
155   "black birds",
156   "black-birds",
157   "blackbirds"
158 };
159 
160 const static UCollationResult shiftedTert[] = {
161   UCOL_EQUAL,
162   UCOL_EQUAL,
163   UCOL_EQUAL,
164   UCOL_LESS,
165   UCOL_EQUAL,
166   UCOL_EQUAL,
167   UCOL_LESS,
168   UCOL_EQUAL,
169   UCOL_EQUAL
170 };
171 
172 const static char nonignorable[][20] = {
173   "black bird",
174   "black Bird",
175   "black birds",
176   "black-bird",
177   "black-Bird",
178   "black-birds",
179   "blackbird",
180   "blackBird",
181   "blackbirds"
182 };
183 
BlackBirdTest(void)184 static void BlackBirdTest(void) {
185   UErrorCode status = U_ZERO_ERROR;
186   UChar t1[90];
187   UChar t2[90];
188 
189   uint32_t i = 0, j = 0;
190   uint32_t size = 0;
191   UCollator *coll = ucol_open("en_US", &status);
192 
193   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
194   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
195 
196   if(U_SUCCESS(status)) {
197     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
198     for(i = 0; i < size-1; i++) {
199       for(j = i+1; j < size; j++) {
200         u_uastrcpy(t1, nonignorable[i]);
201         u_uastrcpy(t2, nonignorable[j]);
202         doTest(coll, t1, t2, UCOL_LESS);
203       }
204     }
205   }
206 
207   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
208   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
209 
210   if(U_SUCCESS(status)) {
211     size = sizeof(shifted)/sizeof(shifted[0]);
212     for(i = 0; i < size-1; i++) {
213       for(j = i+1; j < size; j++) {
214         u_uastrcpy(t1, shifted[i]);
215         u_uastrcpy(t2, shifted[j]);
216         doTest(coll, t1, t2, UCOL_LESS);
217       }
218     }
219   }
220 
221   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
222   if(U_SUCCESS(status)) {
223     size = sizeof(shifted)/sizeof(shifted[0]);
224     for(i = 1; i < size; i++) {
225       u_uastrcpy(t1, shifted[i-1]);
226       u_uastrcpy(t2, shifted[i]);
227       doTest(coll, t1, t2, shiftedTert[i]);
228     }
229   }
230 
231   ucol_close(coll);
232 }
233 
234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
235     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
236     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
237     {0x0041/*'A'*/, 0x0300, 0x0000},
238     {0x00C0, 0x0301, 0x0000},
239     /* this would work with forced normalization */
240     {0x00C0, 0x0316, 0x0000}
241 };
242 
243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
244     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
245     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
246     {0x00C0, 0},
247     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
248     /* this would work with forced normalization */
249     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
250 };
251 
252 const static UCollationResult results[] = {
253     UCOL_GREATER,
254     UCOL_EQUAL,
255     UCOL_EQUAL,
256     UCOL_GREATER,
257     UCOL_EQUAL
258 };
259 
FunkyATest(void)260 static void FunkyATest(void)
261 {
262 
263     int32_t i;
264     UErrorCode status = U_ZERO_ERROR;
265     UCollator  *myCollation;
266     myCollation = ucol_open("en_US", &status);
267     if(U_FAILURE(status)){
268         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
269         return;
270     }
271     log_verbose("Testing some A letters, for some reason\n");
272     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
273     ucol_setStrength(myCollation, UCOL_TERTIARY);
274     for (i = 0; i < 4 ; i++)
275     {
276         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
277     }
278     ucol_close(myCollation);
279 }
280 
281 UColAttributeValue caseFirst[] = {
282     UCOL_OFF,
283     UCOL_LOWER_FIRST,
284     UCOL_UPPER_FIRST
285 };
286 
287 
288 UColAttributeValue alternateHandling[] = {
289     UCOL_NON_IGNORABLE,
290     UCOL_SHIFTED
291 };
292 
293 UColAttributeValue caseLevel[] = {
294     UCOL_OFF,
295     UCOL_ON
296 };
297 
298 UColAttributeValue strengths[] = {
299     UCOL_PRIMARY,
300     UCOL_SECONDARY,
301     UCOL_TERTIARY,
302     UCOL_QUATERNARY,
303     UCOL_IDENTICAL
304 };
305 
306 #if 0
307 static const char * strengthsC[] = {
308     "UCOL_PRIMARY",
309     "UCOL_SECONDARY",
310     "UCOL_TERTIARY",
311     "UCOL_QUATERNARY",
312     "UCOL_IDENTICAL"
313 };
314 
315 static const char * caseFirstC[] = {
316     "UCOL_OFF",
317     "UCOL_LOWER_FIRST",
318     "UCOL_UPPER_FIRST"
319 };
320 
321 
322 static const char * alternateHandlingC[] = {
323     "UCOL_NON_IGNORABLE",
324     "UCOL_SHIFTED"
325 };
326 
327 static const char * caseLevelC[] = {
328     "UCOL_OFF",
329     "UCOL_ON"
330 };
331 
332 /* not used currently - does not test only prints */
333 static void PrintMarkDavis(void)
334 {
335   UErrorCode status = U_ZERO_ERROR;
336   UChar m[256];
337   uint8_t sortkey[256];
338   UCollator *coll = ucol_open("en_US", &status);
339   uint32_t h,i,j,k, sortkeysize;
340   uint32_t sizem = 0;
341   char buffer[512];
342   uint32_t len = 512;
343 
344   log_verbose("PrintMarkDavis");
345 
346   u_uastrcpy(m, "Mark Davis");
347   sizem = u_strlen(m);
348 
349 
350   m[1] = 0xe4;
351 
352   for(i = 0; i<sizem; i++) {
353     fprintf(stderr, "\\u%04X ", m[i]);
354   }
355   fprintf(stderr, "\n");
356 
357   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
358     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
359     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
360 
361     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
362       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
363       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
364 
365       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
366         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
367         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
368 
369         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
370           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
371           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
372           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
373           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
374         }
375 
376       }
377 
378     }
379 
380   }
381 }
382 #endif
383 
BillFairmanTest(void)384 static void BillFairmanTest(void) {
385 /*
386 ** check for actual locale via ICU resource bundles
387 **
388 ** lp points to the original locale ("fr_FR_....")
389 */
390 
391     UResourceBundle *lr,*cr;
392     UErrorCode              lec = U_ZERO_ERROR;
393     const char *lp = "fr_FR_you_ll_never_find_this_locale";
394 
395     log_verbose("BillFairmanTest\n");
396 
397     lr = ures_open(NULL,lp,&lec);
398     if (lr) {
399         cr = ures_getByKey(lr,"collations",0,&lec);
400         if (cr) {
401             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
402             if (lp) {
403                 if (U_SUCCESS(lec)) {
404                     if(strcmp(lp, "fr") != 0) {
405                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
406                     }
407                 }
408             }
409             ures_close(cr);
410         }
411         ures_close(lr);
412     }
413 }
414 
415 const static char chTest[][20] = {
416   "c",
417   "C",
418   "ca", "cb", "cx", "cy", "CZ",
419   "c\\u030C", "C\\u030C",
420   "h",
421   "H",
422   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
423   "ch", "cH", "Ch", "CH",
424   "cha", "charly", "che", "chh", "chch", "chr",
425   "i", "I", "iarly",
426   "r", "R",
427   "r\\u030C", "R\\u030C",
428   "s",
429   "S",
430   "s\\u030C", "S\\u030C",
431   "z", "Z",
432   "z\\u030C", "Z\\u030C"
433 };
434 
TestChMove(void)435 static void TestChMove(void) {
436     UChar t1[256] = {0};
437     UChar t2[256] = {0};
438 
439     uint32_t i = 0, j = 0;
440     uint32_t size = 0;
441     UErrorCode status = U_ZERO_ERROR;
442 
443     UCollator *coll = ucol_open("cs", &status);
444 
445     if(U_SUCCESS(status)) {
446         size = sizeof(chTest)/sizeof(chTest[0]);
447         for(i = 0; i < size-1; i++) {
448             for(j = i+1; j < size; j++) {
449                 u_unescape(chTest[i], t1, 256);
450                 u_unescape(chTest[j], t2, 256);
451                 doTest(coll, t1, t2, UCOL_LESS);
452             }
453         }
454     }
455     else {
456         log_data_err("Can't open collator");
457     }
458     ucol_close(coll);
459 }
460 
461 
462 
463 
464 /*
465 const static char impTest[][20] = {
466   "\\u4e00",
467     "a",
468     "A",
469     "b",
470     "B",
471     "\\u4e01"
472 };
473 */
474 
475 
TestImplicitTailoring(void)476 static void TestImplicitTailoring(void) {
477   static const struct {
478     const char *rules;
479     const char *data[10];
480     const uint32_t len;
481   } tests[] = {
482       {
483         /* Tailor b and c before U+4E00. */
484         "&[before 1]\\u4e00 < b < c "
485         /* Now, before U+4E00 is c; put d and e after that. */
486         "&[before 1]\\u4e00 < d < e",
487         { "b", "c", "d", "e", "\\u4e00"}, 5 },
488       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
489       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
490       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
491   };
492 
493   int32_t i = 0;
494 
495   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
496       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
497   }
498 
499 /*
500   UChar t1[256] = {0};
501   UChar t2[256] = {0};
502 
503   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
504 
505   uint32_t i = 0, j = 0;
506   uint32_t size = 0;
507   uint32_t ruleLen = 0;
508   UErrorCode status = U_ZERO_ERROR;
509   UCollator *coll = NULL;
510   ruleLen = u_unescape(rule, t1, 256);
511 
512   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
513 
514   if(U_SUCCESS(status)) {
515     size = sizeof(impTest)/sizeof(impTest[0]);
516     for(i = 0; i < size-1; i++) {
517       for(j = i+1; j < size; j++) {
518         u_unescape(impTest[i], t1, 256);
519         u_unescape(impTest[j], t2, 256);
520         doTest(coll, t1, t2, UCOL_LESS);
521       }
522     }
523   }
524   else {
525     log_err("Can't open collator");
526   }
527   ucol_close(coll);
528   */
529 }
530 
TestFCDProblem(void)531 static void TestFCDProblem(void) {
532   UChar t1[256] = {0};
533   UChar t2[256] = {0};
534 
535   const char *s1 = "\\u0430\\u0306\\u0325";
536   const char *s2 = "\\u04D1\\u0325";
537 
538   UErrorCode status = U_ZERO_ERROR;
539   UCollator *coll = ucol_open("", &status);
540   u_unescape(s1, t1, 256);
541   u_unescape(s2, t2, 256);
542 
543   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
544   doTest(coll, t1, t2, UCOL_EQUAL);
545 
546   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
547   doTest(coll, t1, t2, UCOL_EQUAL);
548 
549   ucol_close(coll);
550 }
551 
552 /*
553 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
554 We're only using NFC/NFD in this test.
555 */
556 #define NORM_BUFFER_TEST_LEN 18
557 typedef struct {
558   UChar32 u;
559   UChar NFC[NORM_BUFFER_TEST_LEN];
560   UChar NFD[NORM_BUFFER_TEST_LEN];
561 } tester;
562 
TestComposeDecompose(void)563 static void TestComposeDecompose(void) {
564     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
565     static const UChar UNICODESET_STR[] = {
566         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
567         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
568         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
569     };
570     int32_t noOfLoc;
571     int32_t i = 0, j = 0;
572 
573     UErrorCode status = U_ZERO_ERROR;
574     const char *locName = NULL;
575     uint32_t nfcSize;
576     uint32_t nfdSize;
577     tester **t;
578     uint32_t noCases = 0;
579     UCollator *coll = NULL;
580     UChar32 u = 0;
581     UChar comp[NORM_BUFFER_TEST_LEN];
582     uint32_t len = 0;
583     UCollationElements *iter;
584     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
585     int32_t charsToTestSize;
586 
587     noOfLoc = uloc_countAvailable();
588 
589     coll = ucol_open("", &status);
590     if (U_FAILURE(status)) {
591         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
592         return;
593     }
594     charsToTestSize = uset_size(charsToTest);
595     if (charsToTestSize <= 0) {
596         log_err("Set was zero. Missing data?\n");
597         return;
598     }
599     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
600     t[0] = (tester *)malloc(sizeof(tester));
601     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
602 
603     for(u = 0; u < charsToTestSize; u++) {
604         UChar32 ch = uset_charAt(charsToTest, u);
605         len = 0;
606         U16_APPEND_UNSAFE(comp, len, ch);
607         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
608         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
609 
610         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
611           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
612             t[noCases]->u = ch;
613             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
614                 u_strncpy(t[noCases]->NFC, comp, len);
615                 t[noCases]->NFC[len] = 0;
616             }
617             noCases++;
618             t[noCases] = (tester *)malloc(sizeof(tester));
619             uprv_memset(t[noCases], 0, sizeof(tester));
620         }
621     }
622     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
623     uset_close(charsToTest);
624     charsToTest = NULL;
625 
626     for(u=0; u<(UChar32)noCases; u++) {
627         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
628             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
629             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
630         }
631     }
632     /*
633     for(u = 0; u < charsToTestSize; u++) {
634       if(!(u&0xFFFF)) {
635         log_verbose("%08X ", u);
636       }
637       uprv_memset(t[noCases], 0, sizeof(tester));
638       t[noCases]->u = u;
639       len = 0;
640       U16_APPEND_UNSAFE(comp, len, u);
641       comp[len] = 0;
642       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
643       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
644       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
645       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
646     }
647     */
648 
649     ucol_close(coll);
650 
651     log_verbose("Testing locales, number of cases = %i\n", noCases);
652     for(i = 0; i<noOfLoc; i++) {
653         status = U_ZERO_ERROR;
654         locName = uloc_getAvailable(i);
655         if(hasCollationElements(locName)) {
656             char cName[256];
657             UChar name[256];
658             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
659 
660             for(j = 0; j<nameSize; j++) {
661                 cName[j] = (char)name[j];
662             }
663             cName[nameSize] = 0;
664             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
665 
666             coll = ucol_open(locName, &status);
667             ucol_setStrength(coll, UCOL_IDENTICAL);
668             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
669 
670             for(u=0; u<(UChar32)noCases; u++) {
671                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
672                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
673                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
674                     log_verbose("Testing NFC\n");
675                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
676                     backAndForth(iter);
677                     log_verbose("Testing NFD\n");
678                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
679                     backAndForth(iter);
680                 }
681             }
682             ucol_closeElements(iter);
683             ucol_close(coll);
684         }
685     }
686     for(u = 0; u <= (UChar32)noCases; u++) {
687         free(t[u]);
688     }
689     free(t);
690 }
691 
TestEmptyRule(void)692 static void TestEmptyRule(void) {
693   UErrorCode status = U_ZERO_ERROR;
694   UChar rulez[] = { 0 };
695   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
696 
697   ucol_close(coll);
698 }
699 
TestUCARules(void)700 static void TestUCARules(void) {
701   UErrorCode status = U_ZERO_ERROR;
702   UChar b[256];
703   UChar *rules = b;
704   uint32_t ruleLen = 0;
705   UCollator *UCAfromRules = NULL;
706   UCollator *coll = ucol_open("", &status);
707   if(status == U_FILE_ACCESS_ERROR) {
708     log_data_err("Is your data around?\n");
709     return;
710   } else if(U_FAILURE(status)) {
711     log_err("Error opening collator\n");
712     return;
713   }
714   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
715 
716   log_verbose("TestUCARules\n");
717   if(ruleLen > 256) {
718     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
719     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
720   }
721   log_verbose("Rules length is %d\n", ruleLen);
722   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
723   if(U_SUCCESS(status)) {
724     ucol_close(UCAfromRules);
725   } else {
726     log_verbose("Unable to create a collator from UCARules!\n");
727   }
728 /*
729   u_unescape(blah, b, 256);
730   ucol_getSortKey(coll, b, 1, res, 256);
731 */
732   ucol_close(coll);
733   if(rules != b) {
734     free(rules);
735   }
736 }
737 
738 
739 /* Pinyin tonal order */
740 /*
741     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
742           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
743     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
744     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
745     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
746     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
747       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
748 .. (\u00fc)
749 
750 However, in testing we got the following order:
751     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
752           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
753     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
754 .. (\u0113)
755     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
756     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
757     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
758 .. (\u01d8)
759       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
760 */
761 
TestBefore(void)762 static void TestBefore(void) {
763   const static char *data[] = {
764       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
765       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
766       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
767       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
768       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
769       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
770   };
771   genericRulesStarter(
772     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
773     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
774     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
775     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
776     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
777     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
778     data, sizeof(data)/sizeof(data[0]));
779 }
780 
781 #if 0
782 /* superceded by TestBeforePinyin */
783 static void TestJ784(void) {
784   const static char *data[] = {
785       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
786       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
787       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
788       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
789       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
790       "\\u00fc",
791            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
792   };
793   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
794 }
795 #endif
796 
797 #if 0
798 /* superceded by the changes to the lv locale */
799 static void TestJ831(void) {
800   const static char *data[] = {
801     "I",
802       "i",
803       "Y",
804       "y"
805   };
806   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
807 }
808 #endif
809 
TestJ815(void)810 static void TestJ815(void) {
811   const static char *data[] = {
812     "aa",
813       "Aa",
814       "ab",
815       "Ab",
816       "ad",
817       "Ad",
818       "ae",
819       "Ae",
820       "\\u00e6",
821       "\\u00c6",
822       "af",
823       "Af",
824       "b",
825       "B"
826   };
827   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
828   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
829 }
830 
831 
TestCase(void)832 static void TestCase(void)
833 {
834     const static UChar gRules[MAX_TOKEN_LEN] =
835     /*" & 0 < 1,\u2461<a,A"*/
836     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
837 
838     const static UChar testCase[][MAX_TOKEN_LEN] =
839     {
840         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
841         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
842         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
843         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
844     };
845 
846     const static UCollationResult caseTestResults[][9] =
847     {
848         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
849         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
850         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
851         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
852     };
853 
854     const static UColAttributeValue caseTestAttributes[][2] =
855     {
856         { UCOL_LOWER_FIRST, UCOL_OFF},
857         { UCOL_UPPER_FIRST, UCOL_OFF},
858         { UCOL_LOWER_FIRST, UCOL_ON},
859         { UCOL_UPPER_FIRST, UCOL_ON}
860     };
861     int32_t i,j,k;
862     UErrorCode status = U_ZERO_ERROR;
863     UCollationElements *iter;
864     UCollator  *myCollation;
865     myCollation = ucol_open("en_US", &status);
866 
867     if(U_FAILURE(status)){
868         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
869         return;
870     }
871     log_verbose("Testing different case settings\n");
872     ucol_setStrength(myCollation, UCOL_TERTIARY);
873 
874     for(k = 0; k<4; k++) {
875       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
876       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
877       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
878       for (i = 0; i < 3 ; i++) {
879         for(j = i+1; j<4; j++) {
880           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
881         }
882       }
883     }
884     ucol_close(myCollation);
885 
886     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
887     if(U_FAILURE(status)){
888         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
889         return;
890     }
891     log_verbose("Testing different case settings with custom rules\n");
892     ucol_setStrength(myCollation, UCOL_TERTIARY);
893 
894     for(k = 0; k<4; k++) {
895       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
896       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
897       for (i = 0; i < 3 ; i++) {
898         for(j = i+1; j<4; j++) {
899           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
900           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
901           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
902           backAndForth(iter);
903           ucol_closeElements(iter);
904           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
905           backAndForth(iter);
906           ucol_closeElements(iter);
907         }
908       }
909     }
910     ucol_close(myCollation);
911     {
912       const static char *lowerFirst[] = {
913         "h",
914         "H",
915         "ch",
916         "Ch",
917         "CH",
918         "cha",
919         "chA",
920         "Cha",
921         "ChA",
922         "CHa",
923         "CHA",
924         "i",
925         "I"
926       };
927 
928       const static char *upperFirst[] = {
929         "H",
930         "h",
931         "CH",
932         "Ch",
933         "ch",
934         "CHA",
935         "CHa",
936         "ChA",
937         "Cha",
938         "chA",
939         "cha",
940         "I",
941         "i"
942       };
943       log_verbose("mixed case test\n");
944       log_verbose("lower first, case level off\n");
945       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
946       log_verbose("upper first, case level off\n");
947       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
948       log_verbose("lower first, case level on\n");
949       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
950       log_verbose("upper first, case level on\n");
951       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
952     }
953 
954 }
955 
TestIncrementalNormalize(void)956 static void TestIncrementalNormalize(void) {
957 
958     /*UChar baseA     =0x61;*/
959     UChar baseA     =0x41;
960 /*    UChar baseB     = 0x42;*/
961     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
962     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
963     /*
964         0x316 is combining grave accent below, cc=220
965         0x321 is combining palatalized hook below, cc=202
966         0x300 is combining grave accent, cc=230
967     */
968 
969 #define MAXSLEN 2000
970     /*int          maxSLen   = 64000;*/
971     int          sLen;
972     int          i;
973 
974     UCollator        *coll;
975     UErrorCode       status = U_ZERO_ERROR;
976     UCollationResult result;
977 
978     int32_t myQ = getTestOption(QUICK_OPTION);
979 
980     if(getTestOption(QUICK_OPTION) < 0) {
981         setTestOption(QUICK_OPTION, 1);
982     }
983 
984     {
985         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
986         /*          most buffers along the way.*/
987         UChar            strA[MAXSLEN+1];
988         UChar            strB[MAXSLEN+1];
989 
990         coll = ucol_open("en_US", &status);
991         if(status == U_FILE_ACCESS_ERROR) {
992           log_data_err("Is your data around?\n");
993           return;
994         } else if(U_FAILURE(status)) {
995           log_err("Error opening collator\n");
996           return;
997         }
998         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
999 
1000         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
1001         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1002         /*for (sLen = 1000; sLen<1001; sLen++) {*/
1003         for (sLen = 500; sLen<501; sLen++) {
1004         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1005             strA[0] = baseA;
1006             strB[0] = baseA;
1007             for (i=1; i<=sLen-1; i++) {
1008                 strA[i] = ccMix[i % 3];
1009                 strB[sLen-i] = ccMix[i % 3];
1010             }
1011             strA[sLen]   = 0;
1012             strB[sLen]   = 0;
1013 
1014             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1015             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1016             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1017             doTest(coll, strA, strB, UCOL_EQUAL);
1018         }
1019     }
1020 
1021     setTestOption(QUICK_OPTION, myQ);
1022 
1023 
1024     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1025     /*         of the string.  Checks a couple of edge cases.*/
1026 
1027     {
1028         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1029         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1030         ucol_setStrength(coll, UCOL_TERTIARY);
1031         doTest(coll, strA, strB, UCOL_EQUAL);
1032     }
1033 
1034     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1035 
1036     {
1037       /* New UCA  3.1.1.
1038        * test below used a code point from Desseret, which sorts differently
1039        * than d800 dc00
1040        */
1041         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1042         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1043         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1044         ucol_setStrength(coll, UCOL_TERTIARY);
1045         doTest(coll, strA, strB, UCOL_GREATER);
1046     }
1047 
1048     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1049 
1050     {
1051         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1052         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1053         char  sortKeyA[50];
1054         char  sortKeyAz[50];
1055         char  sortKeyB[50];
1056         char  sortKeyBz[50];
1057         int   r;
1058 
1059         /* there used to be -3 here. Hmmmm.... */
1060         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1061         result = ucol_strcoll(coll, strA, 3, strB, 3);
1062         if (result != UCOL_GREATER) {
1063             log_err("ERROR 1 in test 4\n");
1064         }
1065         result = ucol_strcoll(coll, strA, -1, strB, -1);
1066         if (result != UCOL_EQUAL) {
1067             log_err("ERROR 2 in test 4\n");
1068         }
1069 
1070         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1071         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1072         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1073         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1074 
1075         r = strcmp(sortKeyA, sortKeyAz);
1076         if (r <= 0) {
1077             log_err("Error 3 in test 4\n");
1078         }
1079         r = strcmp(sortKeyA, sortKeyB);
1080         if (r <= 0) {
1081             log_err("Error 4 in test 4\n");
1082         }
1083         r = strcmp(sortKeyAz, sortKeyBz);
1084         if (r != 0) {
1085             log_err("Error 5 in test 4\n");
1086         }
1087 
1088         ucol_setStrength(coll, UCOL_IDENTICAL);
1089         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1090         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1091         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1092         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1093 
1094         r = strcmp(sortKeyA, sortKeyAz);
1095         if (r <= 0) {
1096             log_err("Error 6 in test 4\n");
1097         }
1098         r = strcmp(sortKeyA, sortKeyB);
1099         if (r <= 0) {
1100             log_err("Error 7 in test 4\n");
1101         }
1102         r = strcmp(sortKeyAz, sortKeyBz);
1103         if (r != 0) {
1104             log_err("Error 8 in test 4\n");
1105         }
1106         ucol_setStrength(coll, UCOL_TERTIARY);
1107     }
1108 
1109 
1110     /*  Test 5:  Null characters in non-normal source strings.*/
1111 
1112     {
1113         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1114         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1115         char  sortKeyA[50];
1116         char  sortKeyAz[50];
1117         char  sortKeyB[50];
1118         char  sortKeyBz[50];
1119         int   r;
1120 
1121         result = ucol_strcoll(coll, strA, 6, strB, 6);
1122         if (result != UCOL_GREATER) {
1123             log_err("ERROR 1 in test 5\n");
1124         }
1125         result = ucol_strcoll(coll, strA, -1, strB, -1);
1126         if (result != UCOL_EQUAL) {
1127             log_err("ERROR 2 in test 5\n");
1128         }
1129 
1130         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1131         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1132         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1133         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1134 
1135         r = strcmp(sortKeyA, sortKeyAz);
1136         if (r <= 0) {
1137             log_err("Error 3 in test 5\n");
1138         }
1139         r = strcmp(sortKeyA, sortKeyB);
1140         if (r <= 0) {
1141             log_err("Error 4 in test 5\n");
1142         }
1143         r = strcmp(sortKeyAz, sortKeyBz);
1144         if (r != 0) {
1145             log_err("Error 5 in test 5\n");
1146         }
1147 
1148         ucol_setStrength(coll, UCOL_IDENTICAL);
1149         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1150         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1151         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1152         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1153 
1154         r = strcmp(sortKeyA, sortKeyAz);
1155         if (r <= 0) {
1156             log_err("Error 6 in test 5\n");
1157         }
1158         r = strcmp(sortKeyA, sortKeyB);
1159         if (r <= 0) {
1160             log_err("Error 7 in test 5\n");
1161         }
1162         r = strcmp(sortKeyAz, sortKeyBz);
1163         if (r != 0) {
1164             log_err("Error 8 in test 5\n");
1165         }
1166         ucol_setStrength(coll, UCOL_TERTIARY);
1167     }
1168 
1169 
1170     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1171 
1172     {
1173         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1174         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1175 
1176         result = ucol_strcoll(coll, strA, 5, strB, 5);
1177         if (result != UCOL_LESS) {
1178             log_err("Error 1 in test 6\n");
1179         }
1180         result = ucol_strcoll(coll, strA, -1, strB, -1);
1181         if (result != UCOL_EQUAL) {
1182             log_err("Error 2 in test 6\n");
1183         }
1184     }
1185 
1186     ucol_close(coll);
1187 }
1188 
1189 
1190 
1191 #if 0
1192 static void TestGetCaseBit(void) {
1193   static const char *caseBitData[] = {
1194     "a", "A", "ch", "Ch", "CH",
1195       "\\uFF9E", "\\u0009"
1196   };
1197 
1198   static const uint8_t results[] = {
1199     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1200       UCOL_UPPER_CASE, UCOL_LOWER_CASE
1201   };
1202 
1203   uint32_t i, blen = 0;
1204   UChar b[256] = {0};
1205   UErrorCode status = U_ZERO_ERROR;
1206   UCollator *UCA = ucol_open("", &status);
1207   uint8_t res = 0;
1208 
1209   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
1210     blen = u_unescape(caseBitData[i], b, 256);
1211     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1212     if(results[i] != res) {
1213       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1214     }
1215   }
1216 }
1217 #endif
1218 
TestHangulTailoring(void)1219 static void TestHangulTailoring(void) {
1220     static const char *koreanData[] = {
1221         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1222             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1223             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1224             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1225             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1226             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1227     };
1228 
1229     const char *rules =
1230         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1231         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1232         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1233         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1234         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1235         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1236 
1237 
1238   UErrorCode status = U_ZERO_ERROR;
1239   UChar rlz[2048] = { 0 };
1240   uint32_t rlen = u_unescape(rules, rlz, 2048);
1241 
1242   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1243   if(status == U_FILE_ACCESS_ERROR) {
1244     log_data_err("Is your data around?\n");
1245     return;
1246   } else if(U_FAILURE(status)) {
1247     log_err("Error opening collator\n");
1248     return;
1249   }
1250 
1251   log_verbose("Using start of korean rules\n");
1252 
1253   if(U_SUCCESS(status)) {
1254     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
1255   } else {
1256     log_err("Unable to open collator with rules %s\n", rules);
1257   }
1258 
1259   ucol_close(coll);
1260 
1261   log_verbose("Using ko__LOTUS locale\n");
1262   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
1263 }
1264 
1265 /*
1266  * The secondary/tertiary compression middle byte
1267  * as used by the current implementation.
1268  * Subject to change as the sort key compression changes.
1269  * See class CollationKeys.
1270  */
1271 enum {
1272     SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1273     TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1274 };
1275 
TestCompressOverlap(void)1276 static void TestCompressOverlap(void) {
1277     UChar       secstr[150];
1278     UChar       tertstr[150];
1279     UErrorCode  status = U_ZERO_ERROR;
1280     UCollator  *coll;
1281     uint8_t     result[500];
1282     uint32_t    resultlen;
1283     int         count = 0;
1284     uint8_t    *tempptr;
1285 
1286     coll = ucol_open("", &status);
1287 
1288     if (U_FAILURE(status)) {
1289         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1290         return;
1291     }
1292     while (count < 149) {
1293         secstr[count] = 0x0020; /* [06, 05, 05] */
1294         tertstr[count] = 0x0020;
1295         count ++;
1296     }
1297 
1298     /* top down compression ----------------------------------- */
1299     secstr[count] = 0x0332; /* [, 87, 05] */
1300     tertstr[count] = 0x3000; /* [06, 05, 07] */
1301 
1302     /* no compression secstr should have 150 secondary bytes, tertstr should
1303     have 150 tertiary bytes.
1304     with correct compression, secstr should have 6 secondary
1305     bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1306     resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
1307     (void)resultlen;    /* Suppress set but not used warning. */
1308     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1309     while (*(tempptr + 1) != 1) {
1310         /* the last secondary collation element is not checked since it is not
1311         part of the compression */
1312         if (*tempptr < SEC_COMMON_MIDDLE) {
1313             log_err("Secondary top down compression overlapped\n");
1314         }
1315         tempptr ++;
1316     }
1317 
1318     /* tertiary top/bottom/common for en_US is similar to the secondary
1319     top/bottom/common */
1320     resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
1321     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1322     while (*(tempptr + 1) != 0) {
1323         /* the last secondary collation element is not checked since it is not
1324         part of the compression */
1325         if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1326             log_err("Tertiary top down compression overlapped\n");
1327         }
1328         tempptr ++;
1329     }
1330 
1331     /* bottom up compression ------------------------------------- */
1332     secstr[count] = 0;
1333     tertstr[count] = 0;
1334     resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
1335     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1336     while (*(tempptr + 1) != 1) {
1337         /* the last secondary collation element is not checked since it is not
1338         part of the compression */
1339         if (*tempptr > SEC_COMMON_MIDDLE) {
1340             log_err("Secondary bottom up compression overlapped\n");
1341         }
1342         tempptr ++;
1343     }
1344 
1345     /* tertiary top/bottom/common for en_US is similar to the secondary
1346     top/bottom/common */
1347     resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
1348     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1349     while (*(tempptr + 1) != 0) {
1350         /* the last secondary collation element is not checked since it is not
1351         part of the compression */
1352         if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1353             log_err("Tertiary bottom up compression overlapped\n");
1354         }
1355         tempptr ++;
1356     }
1357 
1358     ucol_close(coll);
1359 }
1360 
TestCyrillicTailoring(void)1361 static void TestCyrillicTailoring(void) {
1362   static const char *test[] = {
1363     "\\u0410b",
1364       "\\u0410\\u0306a",
1365       "\\u04d0A"
1366   };
1367 
1368     /* Russian overrides contractions, so this test is not valid anymore */
1369     /*genericLocaleStarter("ru", test, 3);*/
1370 
1371     // Most of the following are commented out because UCA 8.0
1372     // drops most of the Cyrillic contractions from the default order.
1373     // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1374 
1375     // genericLocaleStarter("root", test, 3);
1376     // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1377     // genericRulesStarter("&Z < \\u0410", test, 3);
1378     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1379     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1380     // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1381     // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1382 }
1383 
TestSuppressContractions(void)1384 static void TestSuppressContractions(void) {
1385 
1386   static const char *testNoCont2[] = {
1387       "\\u0410\\u0302a",
1388       "\\u0410\\u0306b",
1389       "\\u0410c"
1390   };
1391   static const char *testNoCont[] = {
1392       "a\\u0410",
1393       "A\\u0410\\u0306",
1394       "\\uFF21\\u0410\\u0302"
1395   };
1396 
1397   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1398   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1399 }
1400 
TestContraction(void)1401 static void TestContraction(void) {
1402     const static char *testrules[] = {
1403         "&A = AB / B",
1404         "&A = A\\u0306/\\u0306",
1405         "&c = ch / h"
1406     };
1407     const static UChar testdata[][2] = {
1408         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1409         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1410         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1411     };
1412     const static UChar testdata2[][2] = {
1413         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1414         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1415         {0x0063 /* 'c' */, 0x006C /* 'l' */}
1416     };
1417 #if 0
1418     /*
1419      * These pairs of rule strings are not guaranteed to yield the very same mappings.
1420      * In fact, LDML 24 recommends an improved way of creating mappings
1421      * which always yields different mappings for such pairs. See
1422      * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1423      */
1424     const static char *testrules3[] = {
1425         "&z < xyz &xyzw << B",
1426         "&z < xyz &xyz << B / w",
1427         "&z < ch &achm << B",
1428         "&z < ch &a << B / chm",
1429         "&\\ud800\\udc00w << B",
1430         "&\\ud800\\udc00 << B / w",
1431         "&a\\ud800\\udc00m << B",
1432         "&a << B / \\ud800\\udc00m",
1433     };
1434 #endif
1435 
1436     UErrorCode  status   = U_ZERO_ERROR;
1437     UCollator  *coll;
1438     UChar       rule[256] = {0};
1439     uint32_t    rlen     = 0;
1440     int         i;
1441 
1442     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
1443         UCollationElements *iter1;
1444         int j = 0;
1445         log_verbose("Rule %s for testing\n", testrules[i]);
1446         rlen = u_unescape(testrules[i], rule, 32);
1447         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1448         if (U_FAILURE(status)) {
1449             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1450             return;
1451         }
1452         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1453         if (U_FAILURE(status)) {
1454             log_err("Collation iterator creation failed\n");
1455             return;
1456         }
1457         while (j < 2) {
1458             UCollationElements *iter2 = ucol_openElements(coll,
1459                                                          &(testdata[i][j]),
1460                                                          1, &status);
1461             uint32_t ce;
1462             if (U_FAILURE(status)) {
1463                 log_err("Collation iterator creation failed\n");
1464                 return;
1465             }
1466             ce = ucol_next(iter2, &status);
1467             while (ce != UCOL_NULLORDER) {
1468                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
1469                     log_err("Collation elements in contraction split does not match\n");
1470                     return;
1471                 }
1472                 ce = ucol_next(iter2, &status);
1473             }
1474             j ++;
1475             ucol_closeElements(iter2);
1476         }
1477         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1478             log_err("Collation elements not exhausted\n");
1479             return;
1480         }
1481         ucol_closeElements(iter1);
1482         ucol_close(coll);
1483     }
1484 
1485     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1486     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1487     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1488         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1489                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1490                 testdata2[1][1]);
1491         return;
1492     }
1493     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1494         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1495                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1496                 testdata2[2][1]);
1497         return;
1498     }
1499     ucol_close(coll);
1500 #if 0  /* see above */
1501     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
1502         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1503         UCollator          *coll1,
1504                            *coll2;
1505         UCollationElements *iter1,
1506                            *iter2;
1507         UChar               ch = 0x0042 /* 'B' */;
1508         uint32_t            ce;
1509         rlen = u_unescape(testrules3[i], rule, 32);
1510         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1511         rlen = u_unescape(testrules3[i + 1], rule, 32);
1512         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1513         if (U_FAILURE(status)) {
1514             log_err("Collator creation failed %s\n", testrules[i]);
1515             return;
1516         }
1517         iter1 = ucol_openElements(coll1, &ch, 1, &status);
1518         iter2 = ucol_openElements(coll2, &ch, 1, &status);
1519         if (U_FAILURE(status)) {
1520             log_err("Collation iterator creation failed\n");
1521             return;
1522         }
1523         ce = ucol_next(iter1, &status);
1524         if (U_FAILURE(status)) {
1525             log_err("Retrieving ces failed\n");
1526             return;
1527         }
1528         while (ce != UCOL_NULLORDER) {
1529             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1530             if (ce == ce2) {
1531                 log_verbose("CEs match: %08x\n", ce);
1532             } else {
1533                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1534                 return;
1535             }
1536             ce = ucol_next(iter1, &status);
1537             if (U_FAILURE(status)) {
1538                 log_err("Retrieving ces failed\n");
1539                 return;
1540             }
1541         }
1542         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1543             log_err("CEs not exhausted\n");
1544             return;
1545         }
1546         ucol_closeElements(iter1);
1547         ucol_closeElements(iter2);
1548         ucol_close(coll1);
1549         ucol_close(coll2);
1550     }
1551 #endif
1552 }
1553 
TestExpansion(void)1554 static void TestExpansion(void) {
1555     const static char *testrules[] = {
1556 #if 0
1557         /*
1558          * This seems to have tested that M was not mapped to an expansion.
1559          * I believe the old builder just did that because it computed the extension CEs
1560          * at the very end, which was a bug.
1561          * Among other problems, it violated the core tailoring principle
1562          * by making an earlier rule depend on a later one.
1563          * And, of course, if M did not get an expansion, then it was primary different from K,
1564          * unlike what the rule &K<<M says.
1565          */
1566         "&J << K / B & K << M",
1567 #endif
1568         "&J << K / B << M"
1569     };
1570     const static UChar testdata[][3] = {
1571         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1572         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1573         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1574         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1575         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1576         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1577     };
1578 
1579     UErrorCode  status   = U_ZERO_ERROR;
1580     UCollator  *coll;
1581     UChar       rule[256] = {0};
1582     uint32_t    rlen     = 0;
1583     int         i;
1584 
1585     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
1586         int j = 0;
1587         log_verbose("Rule %s for testing\n", testrules[i]);
1588         rlen = u_unescape(testrules[i], rule, 32);
1589         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1590         if (U_FAILURE(status)) {
1591             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1592             return;
1593         }
1594 
1595         for (j = 0; j < 5; j ++) {
1596             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1597         }
1598         ucol_close(coll);
1599     }
1600 }
1601 
1602 #if 0
1603 /* this test tests the current limitations of the engine */
1604 /* it always fail, so it is disabled by default */
1605 static void TestLimitations(void) {
1606   /* recursive expansions */
1607   {
1608     static const char *rule = "&a=b/c&d=c/e";
1609     static const char *tlimit01[] = {"add","b","adf"};
1610     static const char *tlimit02[] = {"aa","b","af"};
1611     log_verbose("recursive expansions\n");
1612     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
1613     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
1614   }
1615   /* contractions spanning expansions */
1616   {
1617     static const char *rule = "&a<<<c/e&g<<<eh";
1618     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1619     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1620     log_verbose("contractions spanning expansions\n");
1621     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
1622     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
1623   }
1624   /* normalization: nulls in contractions */
1625   {
1626     static const char *rule = "&a<<<\\u0000\\u0302";
1627     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1628     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1629     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1630     static const UColAttributeValue valOn[] = { UCOL_ON };
1631     static const UColAttributeValue valOff[] = { UCOL_OFF };
1632 
1633     log_verbose("NULL in contractions\n");
1634     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1635     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1636     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1637     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1638 
1639   }
1640   /* normalization: contractions spanning normalization */
1641   {
1642     static const char *rule = "&a<<<\\u0000\\u0302";
1643     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1644     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1645     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1646     static const UColAttributeValue valOn[] = { UCOL_ON };
1647     static const UColAttributeValue valOff[] = { UCOL_OFF };
1648 
1649     log_verbose("contractions spanning normalization\n");
1650     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1651     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1652     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1653     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1654 
1655   }
1656   /* variable top:  */
1657   {
1658     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1659     static const char *rule = "&\\u2010<x<[variable top]=z";
1660     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1661     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1662     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1663     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1664     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1665     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1666     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1667 
1668     log_verbose("variable top\n");
1669     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1670     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1671     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1672     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
1673     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
1674 
1675   }
1676   /* case level */
1677   {
1678     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1679     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1680     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1681     static const UColAttribute att[] = { UCOL_CASE_FIRST};
1682     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1683     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1684     log_verbose("case level\n");
1685     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1686     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1687     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
1688     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
1689   }
1690 
1691 }
1692 #endif
1693 
TestBocsuCoverage(void)1694 static void TestBocsuCoverage(void) {
1695   UErrorCode status = U_ZERO_ERROR;
1696   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1697   UChar       test[256] = {0};
1698   uint32_t    tlen     = u_unescape(testString, test, 32);
1699   uint8_t key[256]     = {0};
1700   uint32_t klen         = 0;
1701 
1702   UCollator *coll = ucol_open("", &status);
1703   if(U_SUCCESS(status)) {
1704   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1705 
1706   klen = ucol_getSortKey(coll, test, tlen, key, 256);
1707   (void)klen;    /* Suppress set but not used warning. */
1708 
1709   ucol_close(coll);
1710   } else {
1711     log_data_err("Couldn't open UCA\n");
1712   }
1713 }
1714 
TestVariableTopSetting(void)1715 static void TestVariableTopSetting(void) {
1716   UErrorCode status = U_ZERO_ERROR;
1717   uint32_t varTopOriginal = 0, varTop1, varTop2;
1718   UCollator *coll = ucol_open("", &status);
1719   if(U_SUCCESS(status)) {
1720 
1721   static const UChar nul = 0;
1722   static const UChar space = 0x20;
1723   static const UChar dot = 0x2e;  /* punctuation */
1724   static const UChar degree = 0xb0;  /* symbol */
1725   static const UChar dollar = 0x24;  /* currency symbol */
1726   static const UChar zero = 0x30;  /* digit */
1727 
1728   varTopOriginal = ucol_getVariableTop(coll, &status);
1729   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1730   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1731 
1732   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1733   varTop2 = ucol_getVariableTop(coll, &status);
1734   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1735   if(U_FAILURE(status) || varTop1 != varTop2 ||
1736       !ucol_equal(coll, &nul, 0, &space, 1) ||
1737       ucol_equal(coll, &nul, 0, &dot, 1) ||
1738       ucol_equal(coll, &nul, 0, &degree, 1) ||
1739       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1740       ucol_equal(coll, &nul, 0, &zero, 1) ||
1741       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1742     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1743   }
1744 
1745   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1746   varTop2 = ucol_getVariableTop(coll, &status);
1747   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1748   if(U_FAILURE(status) || varTop1 != varTop2 ||
1749       !ucol_equal(coll, &nul, 0, &space, 1) ||
1750       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1751       ucol_equal(coll, &nul, 0, &degree, 1) ||
1752       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1753       ucol_equal(coll, &nul, 0, &zero, 1) ||
1754       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1755     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1756   }
1757 
1758   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1759   varTop2 = ucol_getVariableTop(coll, &status);
1760   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1761   if(U_FAILURE(status) || varTop1 != varTop2 ||
1762       !ucol_equal(coll, &nul, 0, &space, 1) ||
1763       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1764       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1765       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1766       ucol_equal(coll, &nul, 0, &zero, 1) ||
1767       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1768     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1769   }
1770 
1771   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1772   varTop2 = ucol_getVariableTop(coll, &status);
1773   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1774   if(U_FAILURE(status) || varTop1 != varTop2 ||
1775       !ucol_equal(coll, &nul, 0, &space, 1) ||
1776       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1777       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1778       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1779       ucol_equal(coll, &nul, 0, &zero, 1) ||
1780       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1781     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1782   }
1783 
1784   log_verbose("Testing setting variable top to contractions\n");
1785   {
1786     UChar first[4] = { 0 };
1787     first[0] = 0x0040;
1788     first[1] = 0x0050;
1789     first[2] = 0x0000;
1790 
1791     status = U_ZERO_ERROR;
1792     ucol_setVariableTop(coll, first, -1, &status);
1793 
1794     if(U_SUCCESS(status)) {
1795       log_err("Invalid contraction succeded in setting variable top!\n");
1796     }
1797 
1798   }
1799 
1800   log_verbose("Test restoring variable top\n");
1801 
1802   status = U_ZERO_ERROR;
1803   ucol_restoreVariableTop(coll, varTopOriginal, &status);
1804   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1805     log_err("Couldn't restore old variable top\n");
1806   }
1807 
1808   log_verbose("Testing calling with error set\n");
1809 
1810   status = U_INTERNAL_PROGRAM_ERROR;
1811   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1812   varTop2 = ucol_getVariableTop(coll, &status);
1813   ucol_restoreVariableTop(coll, varTop2, &status);
1814   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1815   varTop2 = ucol_getVariableTop(NULL, &status);
1816   ucol_restoreVariableTop(NULL, varTop2, &status);
1817   if(status != U_INTERNAL_PROGRAM_ERROR) {
1818     log_err("Bad reaction to passed error!\n");
1819   }
1820   ucol_close(coll);
1821   } else {
1822     log_data_err("Couldn't open UCA collator\n");
1823   }
1824 }
1825 
TestMaxVariable()1826 static void TestMaxVariable() {
1827   UErrorCode status = U_ZERO_ERROR;
1828   UColReorderCode oldMax, max;
1829   UCollator *coll;
1830 
1831   static const UChar nul = 0;
1832   static const UChar space = 0x20;
1833   static const UChar dot = 0x2e;  /* punctuation */
1834   static const UChar degree = 0xb0;  /* symbol */
1835   static const UChar dollar = 0x24;  /* currency symbol */
1836   static const UChar zero = 0x30;  /* digit */
1837 
1838   coll = ucol_open("", &status);
1839   if(U_FAILURE(status)) {
1840     log_data_err("Couldn't open root collator\n");
1841     return;
1842   }
1843 
1844   oldMax = ucol_getMaxVariable(coll);
1845   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1846   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1847 
1848   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1849   max = ucol_getMaxVariable(coll);
1850   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1851   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1852       !ucol_equal(coll, &nul, 0, &space, 1) ||
1853       ucol_equal(coll, &nul, 0, &dot, 1) ||
1854       ucol_equal(coll, &nul, 0, &degree, 1) ||
1855       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1856       ucol_equal(coll, &nul, 0, &zero, 1) ||
1857       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1858     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1859   }
1860 
1861   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1862   max = ucol_getMaxVariable(coll);
1863   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1864   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1865       !ucol_equal(coll, &nul, 0, &space, 1) ||
1866       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1867       ucol_equal(coll, &nul, 0, &degree, 1) ||
1868       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1869       ucol_equal(coll, &nul, 0, &zero, 1) ||
1870       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1871     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1872   }
1873 
1874   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1875   max = ucol_getMaxVariable(coll);
1876   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1877   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1878       !ucol_equal(coll, &nul, 0, &space, 1) ||
1879       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1880       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1881       ucol_equal(coll, &nul, 0, &dollar, 1) ||
1882       ucol_equal(coll, &nul, 0, &zero, 1) ||
1883       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1884     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1885   }
1886 
1887   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1888   max = ucol_getMaxVariable(coll);
1889   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1890   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1891       !ucol_equal(coll, &nul, 0, &space, 1) ||
1892       !ucol_equal(coll, &nul, 0, &dot, 1) ||
1893       !ucol_equal(coll, &nul, 0, &degree, 1) ||
1894       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1895       ucol_equal(coll, &nul, 0, &zero, 1) ||
1896       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1897     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1898   }
1899 
1900   log_verbose("Test restoring maxVariable\n");
1901   status = U_ZERO_ERROR;
1902   ucol_setMaxVariable(coll, oldMax, &status);
1903   if(oldMax != ucol_getMaxVariable(coll)) {
1904     log_err("Couldn't restore old maxVariable\n");
1905   }
1906 
1907   log_verbose("Testing calling with error set\n");
1908   status = U_INTERNAL_PROGRAM_ERROR;
1909   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1910   max = ucol_getMaxVariable(coll);
1911   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1912     log_err("Bad reaction to passed error!\n");
1913   }
1914   ucol_close(coll);
1915 }
1916 
TestNonChars(void)1917 static void TestNonChars(void) {
1918   static const char *test[] = {
1919       "\\u0000",  /* ignorable */
1920       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1921       "\\uFDD0", "\\uFDEF",
1922       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1923       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1924       "\\U0003FFFE", "\\U0003FFFF",
1925       "\\U0004FFFE", "\\U0004FFFF",
1926       "\\U0005FFFE", "\\U0005FFFF",
1927       "\\U0006FFFE", "\\U0006FFFF",
1928       "\\U0007FFFE", "\\U0007FFFF",
1929       "\\U0008FFFE", "\\U0008FFFF",
1930       "\\U0009FFFE", "\\U0009FFFF",
1931       "\\U000AFFFE", "\\U000AFFFF",
1932       "\\U000BFFFE", "\\U000BFFFF",
1933       "\\U000CFFFE", "\\U000CFFFF",
1934       "\\U000DFFFE", "\\U000DFFFF",
1935       "\\U000EFFFE", "\\U000EFFFF",
1936       "\\U000FFFFE", "\\U000FFFFF",
1937       "\\U0010FFFE", "\\U0010FFFF",
1938       "\\uFFFF"  /* special character with maximum primary weight */
1939   };
1940   UErrorCode status = U_ZERO_ERROR;
1941   UCollator *coll = ucol_open("en_US", &status);
1942 
1943   log_verbose("Test non characters\n");
1944 
1945   if(U_SUCCESS(status)) {
1946     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1947   } else {
1948     log_err_status(status, "Unable to open collator\n");
1949   }
1950 
1951   ucol_close(coll);
1952 }
1953 
TestExtremeCompression(void)1954 static void TestExtremeCompression(void) {
1955   static char *test[4];
1956   int32_t j = 0, i = 0;
1957 
1958   for(i = 0; i<4; i++) {
1959     test[i] = (char *)malloc(2048*sizeof(char));
1960   }
1961 
1962   for(j = 20; j < 500; j++) {
1963     for(i = 0; i<4; i++) {
1964       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1965       test[i][j-1] = (char)('a'+i);
1966       test[i][j] = 0;
1967     }
1968     genericLocaleStarter("en_US", (const char **)test, 4);
1969   }
1970 
1971 
1972   for(i = 0; i<4; i++) {
1973     free(test[i]);
1974   }
1975 }
1976 
1977 #if 0
1978 static void TestExtremeCompression(void) {
1979   static char *test[4];
1980   int32_t j = 0, i = 0;
1981   UErrorCode status = U_ZERO_ERROR;
1982   UCollator *coll = ucol_open("en_US", status);
1983   for(i = 0; i<4; i++) {
1984     test[i] = (char *)malloc(2048*sizeof(char));
1985   }
1986   for(j = 10; j < 2048; j++) {
1987     for(i = 0; i<4; i++) {
1988       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1989       test[i][j-1] = (char)('a'+i);
1990       test[i][j] = 0;
1991     }
1992   }
1993   genericLocaleStarter("en_US", (const char **)test, 4);
1994 
1995   for(j = 10; j < 2048; j++) {
1996     for(i = 0; i<1; i++) {
1997       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1998       test[i][j] = 0;
1999     }
2000   }
2001   for(i = 0; i<4; i++) {
2002     free(test[i]);
2003   }
2004 }
2005 #endif
2006 
TestSurrogates(void)2007 static void TestSurrogates(void) {
2008   static const char *test[] = {
2009     "z","\\ud900\\udc25",  "\\ud805\\udc50",
2010        "\\ud800\\udc00y",  "\\ud800\\udc00r",
2011        "\\ud800\\udc00f",  "\\ud800\\udc00",
2012        "\\ud800\\udc00c", "\\ud800\\udc00b",
2013        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2014        "\\ud800\\udc00a",
2015        "c", "b"
2016   };
2017 
2018   static const char *rule =
2019     "&z < \\ud900\\udc25   < \\ud805\\udc50"
2020        "< \\ud800\\udc00y  < \\ud800\\udc00r"
2021        "< \\ud800\\udc00f  << \\ud800\\udc00"
2022        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2023        "< \\ud800\\udc00a  < c < b" ;
2024 
2025   genericRulesStarter(rule, test, 14);
2026 }
2027 
2028 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)2029 static void TestPrefix(void) {
2030   uint32_t i;
2031 
2032   static const struct {
2033     const char *rules;
2034     const char *data[50];
2035     const uint32_t len;
2036   } tests[] = {
2037     { "&z <<< z|a",
2038       {"zz", "za"}, 2 },
2039 
2040     { "&z <<< z|   a",
2041       {"zz", "za"}, 2 },
2042     { "[strength I]"
2043       "&a=\\ud900\\udc25"
2044       "&z<<<\\ud900\\udc25|a",
2045       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2046   };
2047 
2048 
2049   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2050     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2051   }
2052 }
2053 
2054 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2055 /* JIS X 4061 collation order implementation                                   */
TestNewJapanese(void)2056 static void TestNewJapanese(void) {
2057 
2058   static const char * const test1[] = {
2059       "\\u30b7\\u30e3\\u30fc\\u30ec",
2060       "\\u30b7\\u30e3\\u30a4",
2061       "\\u30b7\\u30e4\\u30a3",
2062       "\\u30b7\\u30e3\\u30ec",
2063       "\\u3061\\u3087\\u3053",
2064       "\\u3061\\u3088\\u3053",
2065       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2066       "\\u3066\\u30fc\\u305f",
2067       "\\u30c6\\u30fc\\u30bf",
2068       "\\u30c6\\u30a7\\u30bf",
2069       "\\u3066\\u3048\\u305f",
2070       "\\u3067\\u30fc\\u305f",
2071       "\\u30c7\\u30fc\\u30bf",
2072       "\\u30c7\\u30a7\\u30bf",
2073       "\\u3067\\u3048\\u305f",
2074       "\\u3066\\u30fc\\u305f\\u30fc",
2075       "\\u30c6\\u30fc\\u30bf\\u30a1",
2076       "\\u30c6\\u30a7\\u30bf\\u30fc",
2077       "\\u3066\\u3047\\u305f\\u3041",
2078       "\\u3066\\u3048\\u305f\\u30fc",
2079       "\\u3067\\u30fc\\u305f\\u30fc",
2080       "\\u30c7\\u30fc\\u30bf\\u30a1",
2081       "\\u3067\\u30a7\\u305f\\u30a1",
2082       "\\u30c7\\u3047\\u30bf\\u3041",
2083       "\\u30c7\\u30a8\\u30bf\\u30a2",
2084       "\\u3072\\u3086",
2085       "\\u3073\\u3085\\u3042",
2086       "\\u3074\\u3085\\u3042",
2087       "\\u3073\\u3085\\u3042\\u30fc",
2088       "\\u30d3\\u30e5\\u30a2\\u30fc",
2089       "\\u3074\\u3085\\u3042\\u30fc",
2090       "\\u30d4\\u30e5\\u30a2\\u30fc",
2091       "\\u30d2\\u30e5\\u30a6",
2092       "\\u30d2\\u30e6\\u30a6",
2093       "\\u30d4\\u30e5\\u30a6\\u30a2",
2094       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2095       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2096       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2097       "\\u3072\\u3085\\u3093",
2098       "\\u3074\\u3085\\u3093",
2099       "\\u3075\\u30fc\\u308a",
2100       "\\u30d5\\u30fc\\u30ea",
2101       "\\u3075\\u3045\\u308a",
2102       "\\u3075\\u30a5\\u308a",
2103       "\\u3075\\u30a5\\u30ea",
2104       "\\u30d5\\u30a6\\u30ea",
2105       "\\u3076\\u30fc\\u308a",
2106       "\\u30d6\\u30fc\\u30ea",
2107       "\\u3076\\u3045\\u308a",
2108       "\\u30d6\\u30a5\\u308a",
2109       "\\u3077\\u3046\\u308a",
2110       "\\u30d7\\u30a6\\u30ea",
2111       "\\u3075\\u30fc\\u308a\\u30fc",
2112       "\\u30d5\\u30a5\\u30ea\\u30fc",
2113       "\\u3075\\u30a5\\u308a\\u30a3",
2114       "\\u30d5\\u3045\\u308a\\u3043",
2115       "\\u30d5\\u30a6\\u30ea\\u30fc",
2116       "\\u3075\\u3046\\u308a\\u3043",
2117       "\\u30d6\\u30a6\\u30ea\\u30a4",
2118       "\\u3077\\u30fc\\u308a\\u30fc",
2119       "\\u3077\\u30a5\\u308a\\u30a4",
2120       "\\u3077\\u3046\\u308a\\u30fc",
2121       "\\u30d7\\u30a6\\u30ea\\u30a4",
2122       "\\u30d5\\u30fd",
2123       "\\u3075\\u309e",
2124       "\\u3076\\u309d",
2125       "\\u3076\\u3075",
2126       "\\u3076\\u30d5",
2127       "\\u30d6\\u3075",
2128       "\\u30d6\\u30d5",
2129       "\\u3076\\u309e",
2130       "\\u3076\\u3077",
2131       "\\u30d6\\u3077",
2132       "\\u3077\\u309d",
2133       "\\u30d7\\u30fd",
2134       "\\u3077\\u3075",
2135 };
2136 
2137   static const char *test2[] = {
2138     "\\u306f\\u309d", /* H\\u309d */
2139     "\\u30cf\\u30fd", /* K\\u30fd */
2140     "\\u306f\\u306f", /* HH */
2141     "\\u306f\\u30cf", /* HK */
2142     "\\u30cf\\u30cf", /* KK */
2143     "\\u306f\\u309e", /* H\\u309e */
2144     "\\u30cf\\u30fe", /* K\\u30fe */
2145     "\\u306f\\u3070", /* HH\\u309b */
2146     "\\u30cf\\u30d0", /* KK\\u309b */
2147     "\\u306f\\u3071", /* HH\\u309c */
2148     "\\u30cf\\u3071", /* KH\\u309c */
2149     "\\u30cf\\u30d1", /* KK\\u309c */
2150     "\\u3070\\u309d", /* H\\u309b\\u309d */
2151     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2152     "\\u3070\\u306f", /* H\\u309bH */
2153     "\\u30d0\\u30cf", /* K\\u309bK */
2154     "\\u3070\\u309e", /* H\\u309b\\u309e */
2155     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2156     "\\u3070\\u3070", /* H\\u309bH\\u309b */
2157     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2158     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2159     "\\u3070\\u3071", /* H\\u309bH\\u309c */
2160     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2161     "\\u3071\\u309d", /* H\\u309c\\u309d */
2162     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2163     "\\u3071\\u306f", /* H\\u309cH */
2164     "\\u30d1\\u30cf", /* K\\u309cK */
2165     "\\u3071\\u3070", /* H\\u309cH\\u309b */
2166     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2167     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2168     "\\u3071\\u3071", /* H\\u309cH\\u309c */
2169     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2170   };
2171   /*
2172   static const char *test3[] = {
2173     "\\u221er\\u221e",
2174     "\\u221eR#",
2175     "\\u221et\\u221e",
2176     "#r\\u221e",
2177     "#R#",
2178     "#t%",
2179     "#T%",
2180     "8t\\u221e",
2181     "8T\\u221e",
2182     "8t#",
2183     "8T#",
2184     "8t%",
2185     "8T%",
2186     "8t8",
2187     "8T8",
2188     "\\u03c9r\\u221e",
2189     "\\u03a9R%",
2190     "rr\\u221e",
2191     "rR\\u221e",
2192     "Rr\\u221e",
2193     "RR\\u221e",
2194     "RT%",
2195     "rt8",
2196     "tr\\u221e",
2197     "tr8",
2198     "TR8",
2199     "tt8",
2200     "\\u30b7\\u30e3\\u30fc\\u30ec",
2201   };
2202   */
2203   static const UColAttribute att[] = { UCOL_STRENGTH };
2204   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2205 
2206   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2207   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2208 
2209   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
2210   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
2211   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
2212   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
2213   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
2214 }
2215 
TestStrCollIdenticalPrefix(void)2216 static void TestStrCollIdenticalPrefix(void) {
2217   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2218   const char* test[] = {
2219     "ab\\ud9b0\\udc70",
2220     "ab\\ud9b0\\udc71"
2221   };
2222   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
2223 }
2224 /* Contractions should have all their canonically equivalent */
2225 /* strings included */
TestContractionClosure(void)2226 static void TestContractionClosure(void) {
2227   static const struct {
2228     const char *rules;
2229     const char *data[10];
2230     const uint32_t len;
2231   } tests[] = {
2232     {   "&b=\\u00e4\\u00e4",
2233       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2234     {   "&b=\\u00C5",
2235       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2236   };
2237   uint32_t i;
2238 
2239 
2240   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2241     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2242   }
2243 }
2244 
2245 /* This tests also fails*/
TestBeforePrefixFailure(void)2246 static void TestBeforePrefixFailure(void) {
2247   static const struct {
2248     const char *rules;
2249     const char *data[10];
2250     const uint32_t len;
2251   } tests[] = {
2252     { "&g <<< a"
2253       "&[before 3]\\uff41 <<< x",
2254       {"x", "\\uff41"}, 2 },
2255     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2256         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2257         "&[before 3]\\u30a7<<<\\u30a9",
2258       {"\\u30a9", "\\u30a7"}, 2 },
2259     {   "&[before 3]\\u30a7<<<\\u30a9"
2260         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2261         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2262       {"\\u30a9", "\\u30a7"}, 2 },
2263   };
2264   uint32_t i;
2265 
2266 
2267   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2268     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2269   }
2270 
2271 #if 0
2272   const char* rule1 =
2273         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2274         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2275         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2276   const char* rule2 =
2277         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2278         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2279         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2280   const char* test[] = {
2281       "\\u30c6\\u30fc\\u30bf",
2282       "\\u30c6\\u30a7\\u30bf",
2283   };
2284   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
2285   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
2286 /* this piece of code should be in some sort of verbose mode     */
2287 /* it gets the collation elements for elements and prints them   */
2288 /* This is useful when trying to see whether the problem is      */
2289   {
2290     UErrorCode status = U_ZERO_ERROR;
2291     uint32_t i = 0;
2292     UCollationElements *it = NULL;
2293     uint32_t CE;
2294     UChar string[256];
2295     uint32_t uStringLen;
2296     UCollator *coll = NULL;
2297 
2298     uStringLen = u_unescape(rule1, string, 256);
2299 
2300     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2301 
2302     /*coll = ucol_open("ja_JP_JIS", &status);*/
2303     it = ucol_openElements(coll, string, 0, &status);
2304 
2305     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
2306       log_verbose("%s\n", test[i]);
2307       uStringLen = u_unescape(test[i], string, 256);
2308       ucol_setText(it, string, uStringLen, &status);
2309 
2310       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2311         log_verbose("%08X\n", CE);
2312       }
2313       log_verbose("\n");
2314 
2315     }
2316 
2317     ucol_closeElements(it);
2318     ucol_close(coll);
2319   }
2320 #endif
2321 }
2322 
TestPrefixCompose(void)2323 static void TestPrefixCompose(void) {
2324   const char* rule1 =
2325         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2326   /*
2327   const char* test[] = {
2328       "\\u30c6\\u30fc\\u30bf",
2329       "\\u30c6\\u30a7\\u30bf",
2330   };
2331   */
2332   {
2333     UErrorCode status = U_ZERO_ERROR;
2334     /*uint32_t i = 0;*/
2335     /*UCollationElements *it = NULL;*/
2336 /*    uint32_t CE;*/
2337     UChar string[256];
2338     uint32_t uStringLen;
2339     UCollator *coll = NULL;
2340 
2341     uStringLen = u_unescape(rule1, string, 256);
2342 
2343     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2344     ucol_close(coll);
2345   }
2346 
2347 
2348 }
2349 
2350 /*
2351 [last variable] last variable value
2352 [last primary ignorable] largest CE for primary ignorable
2353 [last secondary ignorable] largest CE for secondary ignorable
2354 [last tertiary ignorable] largest CE for tertiary ignorable
2355 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2356 */
2357 
TestRuleOptions(void)2358 static void TestRuleOptions(void) {
2359   /* values here are hardcoded and are correct for the current UCA
2360    * when the UCA changes, one might be forced to change these
2361    * values.
2362    */
2363 
2364   /*
2365    * These strings contain the last character before [variable top]
2366    * and the first and second characters (by primary weights) after it.
2367    * See FractionalUCA.txt. For example:
2368       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2369       [variable top = 0C FE]
2370       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2371      and
2372       00B4; [0D 0C, 05, 05]
2373    *
2374    * Note: Starting with UCA 6.0, the [variable top] collation element
2375    * is not the weight of any character or string,
2376    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2377    */
2378 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2379 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2380 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2381 
2382   /*
2383    * This string has to match the character that has the [last regular] weight
2384    * which changes with each UCA version.
2385    * See the bottom of FractionalUCA.txt which says something like
2386       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2387    *
2388    * Note: Starting with UCA 6.0, the [last regular] collation element
2389    * is not the weight of any character or string,
2390    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2391    */
2392 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2393 
2394   static const struct {
2395     const char *rules;
2396     const char *data[10];
2397     const uint32_t len;
2398   } tests[] = {
2399 #if 0
2400     /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2401     /* - all befores here amount to zero */
2402     { "&[before 3][first tertiary ignorable]<<<a",
2403         { "\\u0000", "a"}, 2
2404     }, /* you cannot go before first tertiary ignorable */
2405 
2406     { "&[before 3][last tertiary ignorable]<<<a",
2407         { "\\u0000", "a"}, 2
2408     }, /* you cannot go before last tertiary ignorable */
2409 #endif
2410     /*
2411      * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2412      * and it *is* possible to "go before" that.
2413      */
2414     { "&[before 3][first secondary ignorable]<<<a",
2415         { "\\u0000", "a"}, 2
2416     },
2417 
2418     { "&[before 3][last secondary ignorable]<<<a",
2419         { "\\u0000", "a"}, 2
2420     },
2421 
2422     /* 'normal' befores */
2423 
2424     /*
2425      * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2426      * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2427      * because there is no tailoring space before that boundary.
2428      * Made the tests work by tailoring to a space instead.
2429      */
2430     { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2431         {  "c", "b", "\\u0332", "a" }, 4
2432     },
2433 
2434     /* we don't have a code point that corresponds to
2435      * the last primary ignorable
2436      */
2437     { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2438         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2439     },
2440 
2441     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2442         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2443     },
2444 
2445     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2446         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2447     },
2448 
2449     { "&[first regular]<a"
2450       "&[before 1][first regular]<b",
2451       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2452     },
2453 
2454     { "&[before 1][last regular]<b"
2455       "&[last regular]<a",
2456         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2457     },
2458 
2459     { "&[before 1][first implicit]<b"
2460       "&[first implicit]<a",
2461         { "b", "\\u4e00", "a", "\\u4e01"}, 4
2462     },
2463 #if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2464     { "&[before 1][last implicit]<b"
2465       "&[last implicit]<a",
2466         { "b", "\\U0010FFFD", "a" }, 3
2467     },
2468 #endif
2469     { "&[last variable]<z"
2470       "&' '<x"  /* was &[last primary ignorable]<x, see above */
2471       "&[last secondary ignorable]<<y"
2472       "&[last tertiary ignorable]<<<w"
2473       "&[top]<u",
2474       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2475     }
2476 
2477   };
2478   uint32_t i;
2479 
2480   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2481     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2482   }
2483 }
2484 
2485 
TestOptimize(void)2486 static void TestOptimize(void) {
2487   /* this is not really a test - just trying out
2488    * whether copying of UCA contents will fail
2489    * Cannot really test, since the functionality
2490    * remains the same.
2491    */
2492   static const struct {
2493     const char *rules;
2494     const char *data[10];
2495     const uint32_t len;
2496   } tests[] = {
2497     /* - all befores here amount to zero */
2498     { "[optimize [\\uAC00-\\uD7FF]]",
2499     { "a", "b"}, 2}
2500   };
2501   uint32_t i;
2502 
2503   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2504     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2505   }
2506 }
2507 
2508 /*
2509 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2510 weiv    ucol_strcollIter?
2511 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2512 weiv    these are the input strings?
2513 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2514 weiv    will check - could be a problem with utf-8 iterator
2515 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2516 weiv    hmmm
2517 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2518 weiv    that doesn't sound right
2519 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2520 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2521 cycheng@ca.ibm.c... yes
2522 weiv    and then do the comparison
2523 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2524 weiv    utf-16 strings look like a little endian ones in the example you sent me
2525 weiv    It could be a bug - let me try to test it out
2526 cycheng@ca.ibm.c... ok
2527 cycheng@ca.ibm.c... we can wait till the conf. call
2528 cycheng@ca.ibm.c... next weke
2529 weiv    that would be great
2530 weiv    hmmm
2531 weiv    I might be wrong
2532 weiv    let me play with it some more
2533 cycheng@ca.ibm.c... ok
2534 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2535 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2536 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2537 weiv    ok
2538 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2539 weiv    thanks
2540 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2541 */
2542 #if 0
2543 static void Alexis(void) {
2544   UErrorCode status = U_ZERO_ERROR;
2545   UCollator *coll = ucol_open("", &status);
2546 
2547 
2548   const char utf16be[2][4] = {
2549     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2550     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2551   };
2552 
2553   const char utf8[2][4] = {
2554     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2555     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2556   };
2557 
2558   UCharIterator iterU161, iterU162;
2559   UCharIterator iterU81, iterU82;
2560 
2561   UCollationResult resU16, resU8;
2562 
2563   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2564   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2565 
2566   uiter_setUTF8(&iterU81, utf8[0], 4);
2567   uiter_setUTF8(&iterU82, utf8[1], 4);
2568 
2569   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2570 
2571   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2572   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2573 
2574 
2575   if(resU16 != resU8) {
2576     log_err("different results\n");
2577   }
2578 
2579   ucol_close(coll);
2580 }
2581 #endif
2582 
2583 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)2584 static void Alexis2(void) {
2585   UErrorCode status = U_ZERO_ERROR;
2586   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2587   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2588   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2589   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2590 
2591   UConverter *conv = NULL;
2592 
2593   UCharIterator U16BEItS, U16BEItT;
2594   UCharIterator U8ItS, U8ItT;
2595 
2596   UCollationResult resU16, resU16BE, resU8;
2597 
2598   static const char* const pairs[][2] = {
2599     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2600     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2601     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2602     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2603     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2604     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2605     { "\\u0020", "\\u0020\\u0000"}
2606 /*
2607 5F20 (my result here)
2608 5F204E008E3F
2609 5F20 (your result here)
2610 */
2611   };
2612 
2613   int32_t i = 0;
2614 
2615   UCollator *coll = ucol_open("", &status);
2616   if(status == U_FILE_ACCESS_ERROR) {
2617     log_data_err("Is your data around?\n");
2618     return;
2619   } else if(U_FAILURE(status)) {
2620     log_err("Error opening collator\n");
2621     return;
2622   }
2623   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2624   conv = ucnv_open("UTF16BE", &status);
2625   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
2626     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2627     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2628 
2629     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2630 
2631     log_verbose("Result of strcoll is %i\n", resU16);
2632 
2633     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2634     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2635     (void)U16BELenS;    /* Suppress set but not used warnings. */
2636     (void)U16BELenT;
2637 
2638     /* use the original sizes, as the result from converter is in bytes */
2639     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2640     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2641 
2642     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2643 
2644     log_verbose("Result of U16BE is %i\n", resU16BE);
2645 
2646     if(resU16 != resU16BE) {
2647       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2648     }
2649 
2650     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2651     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2652 
2653     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2654     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2655 
2656     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2657 
2658     if(resU16 != resU8) {
2659       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2660     }
2661 
2662   }
2663 
2664   ucol_close(coll);
2665   ucnv_close(conv);
2666 }
2667 
TestHebrewUCA(void)2668 static void TestHebrewUCA(void) {
2669   UErrorCode status = U_ZERO_ERROR;
2670   static const char *first[] = {
2671     "d790d6b8d79cd795d6bcd7a9",
2672     "d790d79cd79ed7a7d799d799d7a1",
2673     "d790d6b4d79ed795d6bcd7a9",
2674   };
2675 
2676   char utf8String[3][256];
2677   UChar utf16String[3][256];
2678 
2679   int32_t i = 0, j = 0;
2680   int32_t sizeUTF8[3];
2681   int32_t sizeUTF16[3];
2682 
2683   UCollator *coll = ucol_open("", &status);
2684   if (U_FAILURE(status)) {
2685       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2686       return;
2687   }
2688   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2689 
2690   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
2691     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2692     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2693     log_verbose("%i: ");
2694     for(j = 0; j < sizeUTF16[i]; j++) {
2695       /*log_verbose("\\u%04X", utf16String[i][j]);*/
2696       log_verbose("%04X", utf16String[i][j]);
2697     }
2698     log_verbose("\n");
2699   }
2700   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
2701     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
2702       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2703     }
2704   }
2705 
2706   ucol_close(coll);
2707 
2708 }
2709 
TestPartialSortKeyTermination(void)2710 static void TestPartialSortKeyTermination(void) {
2711   static const char* cases[] = {
2712     "\\u1234\\u1234\\udc00",
2713     "\\udc00\\ud800\\ud800"
2714   };
2715 
2716   int32_t i;
2717 
2718   UErrorCode status = U_ZERO_ERROR;
2719 
2720   UCollator *coll = ucol_open("", &status);
2721 
2722   UCharIterator iter;
2723 
2724   UChar currCase[256];
2725   int32_t length = 0;
2726   int32_t pKeyLen = 0;
2727 
2728   uint8_t key[256];
2729 
2730   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
2731     uint32_t state[2] = {0, 0};
2732     length = u_unescape(cases[i], currCase, 256);
2733     uiter_setString(&iter, currCase, length);
2734     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2735     (void)pKeyLen;   /* Suppress set but not used warning. */
2736 
2737     log_verbose("Done\n");
2738 
2739   }
2740   ucol_close(coll);
2741 }
2742 
TestSettings(void)2743 static void TestSettings(void) {
2744   static const char* cases[] = {
2745     "apple",
2746       "Apple"
2747   };
2748 
2749   static const char* locales[] = {
2750     "",
2751       "en"
2752   };
2753 
2754   UErrorCode status = U_ZERO_ERROR;
2755 
2756   int32_t i = 0, j = 0;
2757 
2758   UChar source[256], target[256];
2759   int32_t sLen = 0, tLen = 0;
2760 
2761   UCollator *collateObject = NULL;
2762   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
2763     collateObject = ucol_open(locales[i], &status);
2764     ucol_setStrength(collateObject, UCOL_PRIMARY);
2765     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2766     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
2767       sLen = u_unescape(cases[j-1], source, 256);
2768       source[sLen] = 0;
2769       tLen = u_unescape(cases[j], target, 256);
2770       source[tLen] = 0;
2771       doTest(collateObject, source, target, UCOL_EQUAL);
2772     }
2773     ucol_close(collateObject);
2774   }
2775 }
2776 
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)2777 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2778     UErrorCode status = U_ZERO_ERROR;
2779     int32_t errorNo = 0;
2780     const UChar *sourceRules = NULL;
2781     int32_t sourceRulesLen = 0;
2782     UParseError parseError;
2783     UColAttributeValue french = UCOL_OFF;
2784 
2785     if(!ucol_equals(source, target)) {
2786         log_err("Same collators, different address not equal\n");
2787         errorNo++;
2788     }
2789     ucol_close(target);
2790     if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2791         target = ucol_safeClone(source, NULL, NULL, &status);
2792         if(U_FAILURE(status)) {
2793             log_err("Error creating clone\n");
2794             errorNo++;
2795             return errorNo;
2796         }
2797         if(!ucol_equals(source, target)) {
2798             log_err("Collator different from it's clone\n");
2799             errorNo++;
2800         }
2801         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2802         if(french == UCOL_ON) {
2803             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2804         } else {
2805             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2806         }
2807         if(U_FAILURE(status)) {
2808             log_err("Error setting attributes\n");
2809             errorNo++;
2810             return errorNo;
2811         }
2812         if(ucol_equals(source, target)) {
2813             log_err("Collators same even when options changed\n");
2814             errorNo++;
2815         }
2816         ucol_close(target);
2817 
2818         sourceRules = ucol_getRules(source, &sourceRulesLen);
2819         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2820         if(U_FAILURE(status)) {
2821             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2822             errorNo++;
2823             return errorNo;
2824         }
2825         /* Note: The tailoring rule string is an optional data item. */
2826         if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2827             log_err("Collator different from collator that was created from the same rules\n");
2828             errorNo++;
2829         }
2830         ucol_close(target);
2831     }
2832     return errorNo;
2833 }
2834 
2835 
TestEquals(void)2836 static void TestEquals(void) {
2837     /* ucol_equals is not currently a public API. There is a chance that it will become
2838     * something like this.
2839     */
2840     /* test whether the two collators instantiated from the same locale are equal */
2841     UErrorCode status = U_ZERO_ERROR;
2842     UParseError parseError;
2843     int32_t noOfLoc = uloc_countAvailable();
2844     const char *locName = NULL;
2845     UCollator *source = NULL, *target = NULL;
2846     int32_t i = 0;
2847 
2848     const char* rules[] = {
2849         "&l < lj <<< Lj <<< LJ",
2850         "&n < nj <<< Nj <<< NJ",
2851         "&ae <<< \\u00e4",
2852         "&AE <<< \\u00c4"
2853     };
2854     /*
2855     const char* badRules[] = {
2856     "&l <<< Lj",
2857     "&n < nj <<< nJ <<< NJ",
2858     "&a <<< \\u00e4",
2859     "&AE <<< \\u00c4 <<< x"
2860     };
2861     */
2862 
2863     UChar sourceRules[1024], targetRules[1024];
2864     int32_t sourceRulesSize = 0, targetRulesSize = 0;
2865     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
2866 
2867     for(i = 0; i < rulesSize; i++) {
2868         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2869         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2870     }
2871 
2872     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2873     if(status == U_FILE_ACCESS_ERROR) {
2874         log_data_err("Is your data around?\n");
2875         return;
2876     } else if(U_FAILURE(status)) {
2877         log_err("Error opening collator\n");
2878         return;
2879     }
2880     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2881     if(!ucol_equals(source, target)) {
2882         log_err("Equivalent collators not equal!\n");
2883     }
2884     ucol_close(source);
2885     ucol_close(target);
2886 
2887     source = ucol_open("root", &status);
2888     target = ucol_open("root", &status);
2889     log_verbose("Testing root\n");
2890     if(!ucol_equals(source, source)) {
2891         log_err("Same collator not equal\n");
2892     }
2893     if(TestEqualsForCollator("root", source, target)) {
2894         log_err("Errors for root\n");
2895     }
2896     ucol_close(source);
2897 
2898     for(i = 0; i<noOfLoc; i++) {
2899         status = U_ZERO_ERROR;
2900         locName = uloc_getAvailable(i);
2901         /*if(hasCollationElements(locName)) {*/
2902         log_verbose("Testing equality for locale %s\n", locName);
2903         source = ucol_open(locName, &status);
2904         target = ucol_open(locName, &status);
2905         if (U_FAILURE(status)) {
2906             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2907             continue;
2908         }
2909         if(TestEqualsForCollator(locName, source, target)) {
2910             log_err("Errors for locale %s\n", locName);
2911         }
2912         ucol_close(source);
2913         /*}*/
2914     }
2915 }
2916 
TestJ2726(void)2917 static void TestJ2726(void) {
2918     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2919     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2920     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2921     UErrorCode status = U_ZERO_ERROR;
2922     UCollator *coll = ucol_open("en", &status);
2923     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2924     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2925     doTest(coll, a, aSpace, UCOL_EQUAL);
2926     doTest(coll, aSpace, a, UCOL_EQUAL);
2927     doTest(coll, a, spaceA, UCOL_EQUAL);
2928     doTest(coll, spaceA, a, UCOL_EQUAL);
2929     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2930     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2931     ucol_close(coll);
2932 }
2933 
NullRule(void)2934 static void NullRule(void) {
2935     UChar r[3] = {0};
2936     UErrorCode status = U_ZERO_ERROR;
2937     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2938     if(U_SUCCESS(status)) {
2939         log_err("This should have been an error!\n");
2940         ucol_close(coll);
2941     } else {
2942         status = U_ZERO_ERROR;
2943     }
2944     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2945     if(U_FAILURE(status)) {
2946         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2947     } else {
2948         ucol_close(coll);
2949     }
2950 }
2951 
2952 /**
2953  * Test for CollationElementIterator previous and next for the whole set of
2954  * unicode characters with normalization on.
2955  */
TestNumericCollation(void)2956 static void TestNumericCollation(void)
2957 {
2958     UErrorCode status = U_ZERO_ERROR;
2959 
2960     const static char *basicTestStrings[]={
2961     "hello1",
2962     "hello2",
2963     "hello2002",
2964     "hello2003",
2965     "hello123456",
2966     "hello1234567",
2967     "hello10000000",
2968     "hello100000000",
2969     "hello1000000000",
2970     "hello10000000000",
2971     };
2972 
2973     const static char *preZeroTestStrings[]={
2974     "avery10000",
2975     "avery010000",
2976     "avery0010000",
2977     "avery00010000",
2978     "avery000010000",
2979     "avery0000010000",
2980     "avery00000010000",
2981     "avery000000010000",
2982     };
2983 
2984     const static char *thirtyTwoBitNumericStrings[]={
2985     "avery42949672960",
2986     "avery42949672961",
2987     "avery42949672962",
2988     "avery429496729610"
2989     };
2990 
2991      const static char *longNumericStrings[]={
2992      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2993         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2994         are treated as multiple collation elements. */
2995     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2996     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2997     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2998     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2999     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
3000     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
3001     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3002     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3003     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3004     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3005     };
3006 
3007     const static char *supplementaryDigits[] = {
3008       "\\uD835\\uDFCE", /* 0 */
3009       "\\uD835\\uDFCF", /* 1 */
3010       "\\uD835\\uDFD0", /* 2 */
3011       "\\uD835\\uDFD1", /* 3 */
3012       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3013       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3014       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3015       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3016       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3017       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3018     };
3019 
3020     const static char *foreignDigits[] = {
3021       "\\u0661",
3022         "\\u0662",
3023         "\\u0663",
3024       "\\u0661\\u0660",
3025       "\\u0661\\u0662",
3026       "\\u0661\\u0663",
3027       "\\u0662\\u0660",
3028       "\\u0662\\u0662",
3029       "\\u0662\\u0663",
3030       "\\u0663\\u0660",
3031       "\\u0663\\u0662",
3032       "\\u0663\\u0663"
3033     };
3034 
3035     const static char *evenZeroes[] = {
3036       "2000",
3037       "2001",
3038         "2002",
3039         "2003"
3040     };
3041 
3042     UColAttribute att = UCOL_NUMERIC_COLLATION;
3043     UColAttributeValue val = UCOL_ON;
3044 
3045     /* Open our collator. */
3046     UCollator* coll = ucol_open("root", &status);
3047     if (U_FAILURE(status)){
3048         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3049               myErrorName(status));
3050         return;
3051     }
3052     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
3053     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
3054     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
3055     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
3056     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
3057     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
3058 
3059     /* Setting up our collator to do digits. */
3060     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3061     if (U_FAILURE(status)){
3062         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3063               myErrorName(status));
3064         return;
3065     }
3066 
3067     /*
3068        Testing that prepended zeroes still yield the correct collation behavior.
3069        We expect that every element in our strings array will be equal.
3070     */
3071     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
3072 
3073     ucol_close(coll);
3074 }
3075 
TestTibetanConformance(void)3076 static void TestTibetanConformance(void)
3077 {
3078     const char* test[] = {
3079         "\\u0FB2\\u0591\\u0F71\\u0061",
3080         "\\u0FB2\\u0F71\\u0061"
3081     };
3082 
3083     UErrorCode status = U_ZERO_ERROR;
3084     UCollator *coll = ucol_open("", &status);
3085     UChar source[100];
3086     UChar target[100];
3087     int result;
3088     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3089     if (U_SUCCESS(status)) {
3090         u_unescape(test[0], source, 100);
3091         u_unescape(test[1], target, 100);
3092         doTest(coll, source, target, UCOL_EQUAL);
3093         result = ucol_strcoll(coll, source, -1,   target, -1);
3094         log_verbose("result %d\n", result);
3095         if (UCOL_EQUAL != result) {
3096             log_err("Tibetan comparison error\n");
3097         }
3098     }
3099     ucol_close(coll);
3100 
3101     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3102 }
3103 
TestPinyinProblem(void)3104 static void TestPinyinProblem(void) {
3105     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3106     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
3107 }
3108 
3109 /**
3110  * Iterate through the given iterator, checking to see that all the strings
3111  * in the expected array are present.
3112  * @param expected array of strings we expect to see, or NULL
3113  * @param expectedCount number of elements of expected, or 0
3114  */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)3115 static int32_t checkUEnumeration(const char* msg,
3116                                  UEnumeration* iter,
3117                                  const char** expected,
3118                                  int32_t expectedCount) {
3119     UErrorCode ec = U_ZERO_ERROR;
3120     int32_t i = 0, n, j, bit;
3121     int32_t seenMask = 0;
3122 
3123     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3124     n = uenum_count(iter, &ec);
3125     if (!assertSuccess("count", &ec)) return -1;
3126     log_verbose("%s = [", msg);
3127     for (;; ++i) {
3128         const char* s = uenum_next(iter, NULL, &ec);
3129         if (!assertSuccess("snext", &ec) || s == NULL) break;
3130         if (i != 0) log_verbose(",");
3131         log_verbose("%s", s);
3132         /* check expected list */
3133         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3134             if ((seenMask&bit) == 0 &&
3135                 uprv_strcmp(s, expected[j]) == 0) {
3136                 seenMask |= bit;
3137                 break;
3138             }
3139         }
3140     }
3141     log_verbose("] (%d)\n", i);
3142     assertTrue("count verified", i==n);
3143     /* did we see all expected strings? */
3144     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3145         if ((seenMask&bit)!=0) {
3146             log_verbose("Ok: \"%s\" seen\n", expected[j]);
3147         } else {
3148             log_err("FAIL: \"%s\" not seen\n", expected[j]);
3149         }
3150     }
3151     return n;
3152 }
3153 
3154 /**
3155  * Test new API added for separate collation tree.
3156  */
TestSeparateTrees(void)3157 static void TestSeparateTrees(void) {
3158     UErrorCode ec = U_ZERO_ERROR;
3159     UEnumeration *e = NULL;
3160     int32_t n = -1;
3161     UBool isAvailable;
3162     char loc[256];
3163 
3164     static const char* AVAIL[] = { "en", "de" };
3165 
3166     static const char* KW[] = { "collation" };
3167 
3168     static const char* KWVAL[] = { "phonebook", "stroke" };
3169 
3170 #if !UCONFIG_NO_SERVICE
3171     e = ucol_openAvailableLocales(&ec);
3172     if (e != NULL) {
3173         assertSuccess("ucol_openAvailableLocales", &ec);
3174         assertTrue("ucol_openAvailableLocales!=0", e!=0);
3175         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
3176         (void)n;    /* Suppress set but not used warnings. */
3177         /* Don't need to check n because we check list */
3178         uenum_close(e);
3179     } else {
3180         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3181     }
3182 #endif
3183 
3184     e = ucol_getKeywords(&ec);
3185     if (e != NULL) {
3186         assertSuccess("ucol_getKeywords", &ec);
3187         assertTrue("ucol_getKeywords!=0", e!=0);
3188         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
3189         /* Don't need to check n because we check list */
3190         uenum_close(e);
3191     } else {
3192         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3193     }
3194 
3195     e = ucol_getKeywordValues(KW[0], &ec);
3196     if (e != NULL) {
3197         assertSuccess("ucol_getKeywordValues", &ec);
3198         assertTrue("ucol_getKeywordValues!=0", e!=0);
3199         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
3200         /* Don't need to check n because we check list */
3201         uenum_close(e);
3202     } else {
3203         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3204     }
3205 
3206     /* Try setting a warning before calling ucol_getKeywordValues */
3207     ec = U_USING_FALLBACK_WARNING;
3208     e = ucol_getKeywordValues(KW[0], &ec);
3209     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3210         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3211         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
3212         /* Don't need to check n because we check list */
3213         uenum_close(e);
3214     }
3215 
3216     /*
3217 U_DRAFT int32_t U_EXPORT2
3218 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3219                              const char* locale, UBool* isAvailable,
3220                              UErrorCode* status);
3221 }
3222 */
3223     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3224                                      &isAvailable, &ec);
3225     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3226         assertEquals("getFunctionalEquivalent(de)", "root", loc);
3227         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3228                    isAvailable == TRUE);
3229     }
3230 
3231     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3232                                      &isAvailable, &ec);
3233     if (assertSuccess("getFunctionalEquivalent", &ec)) {
3234         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3235         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3236                    isAvailable == FALSE);
3237     }
3238 }
3239 
3240 /* supercedes TestJ784 */
TestBeforePinyin(void)3241 static void TestBeforePinyin(void) {
3242     const static char rules[] = {
3243         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3244         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3245         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3246         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3247         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3248         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3249     };
3250 
3251     const static char *test[] = {
3252         "l\\u0101",
3253         "la",
3254         "l\\u0101n",
3255         "lan ",
3256         "l\\u0113",
3257         "le",
3258         "l\\u0113n",
3259         "len"
3260     };
3261 
3262     const static char *test2[] = {
3263         "x\\u0101",
3264         "x\\u0100",
3265         "X\\u0101",
3266         "X\\u0100",
3267         "x\\u00E1",
3268         "x\\u00C1",
3269         "X\\u00E1",
3270         "X\\u00C1",
3271         "x\\u01CE",
3272         "x\\u01CD",
3273         "X\\u01CE",
3274         "X\\u01CD",
3275         "x\\u00E0",
3276         "x\\u00C0",
3277         "X\\u00E0",
3278         "X\\u00C0",
3279         "xa",
3280         "xA",
3281         "Xa",
3282         "XA",
3283         "x\\u0101x",
3284         "x\\u0100x",
3285         "x\\u00E1x",
3286         "x\\u00C1x",
3287         "x\\u01CEx",
3288         "x\\u01CDx",
3289         "x\\u00E0x",
3290         "x\\u00C0x",
3291         "xax",
3292         "xAx"
3293     };
3294 
3295     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
3296     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
3297     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
3298     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
3299 }
3300 
TestBeforeTightening(void)3301 static void TestBeforeTightening(void) {
3302     static const struct {
3303         const char *rules;
3304         UErrorCode expectedStatus;
3305     } tests[] = {
3306         { "&[before 1]a<x", U_ZERO_ERROR },
3307         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3308         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3309         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3310         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3311         { "&[before 2]a<<x",U_ZERO_ERROR },
3312         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3313         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3314         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3315         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3316         { "&[before 3]a<<<x",U_ZERO_ERROR },
3317         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3318         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3319     };
3320 
3321     int32_t i = 0;
3322 
3323     UErrorCode status = U_ZERO_ERROR;
3324     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3325     uint32_t rlen = 0;
3326 
3327     UCollator *coll = NULL;
3328 
3329 
3330     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
3331         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3332         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3333         if(status != tests[i].expectedStatus) {
3334             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3335                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3336         }
3337         ucol_close(coll);
3338         status = U_ZERO_ERROR;
3339     }
3340 
3341 }
3342 
3343 /*
3344 &m < a
3345 &[before 1] a < x <<< X << q <<< Q < z
3346 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3347 
3348 &m < a
3349 &[before 2] a << x <<< X << q <<< Q < z
3350 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3351 
3352 &m < a
3353 &[before 3] a <<< x <<< X << q <<< Q < z
3354 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3355 
3356 
3357 &m << a
3358 &[before 1] a < x <<< X << q <<< Q < z
3359 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3360 
3361 &m << a
3362 &[before 2] a << x <<< X << q <<< Q < z
3363 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3364 
3365 &m << a
3366 &[before 3] a <<< x <<< X << q <<< Q < z
3367 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3368 
3369 
3370 &m <<< a
3371 &[before 1] a < x <<< X << q <<< Q < z
3372 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3373 
3374 &m <<< a
3375 &[before 2] a << x <<< X << q <<< Q < z
3376 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3377 
3378 &m <<< a
3379 &[before 3] a <<< x <<< X << q <<< Q < z
3380 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3381 
3382 
3383 &[before 1] s < x <<< X << q <<< Q < z
3384 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3385 
3386 &[before 2] s << x <<< X << q <<< Q < z
3387 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3388 
3389 &[before 3] s <<< x <<< X << q <<< Q < z
3390 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3391 
3392 
3393 &[before 1] \u24DC < x <<< X << q <<< Q < z
3394 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3395 
3396 &[before 2] \u24DC << x <<< X << q <<< Q < z
3397 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3398 
3399 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3400 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3401 */
3402 
3403 
3404 #if 0
3405 /* requires features not yet supported */
3406 static void TestMoreBefore(void) {
3407     static const struct {
3408         const char* rules;
3409         const char* order[16];
3410         int32_t size;
3411     } tests[] = {
3412         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3413         { "m","M","x","X","q","Q","z","a","n" }, 9},
3414         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3415         { "m","M","x","X","q","Q","a","z","n" }, 9},
3416         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3417         { "m","M","x","X","a","q","Q","z","n" }, 9},
3418         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3419         { "x","X","q","Q","z","m","M","a","n" }, 9},
3420         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3421         { "m","M","x","X","q","Q","a","z","n" }, 9},
3422         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3423         { "m","M","x","X","a","q","Q","z","n" }, 9},
3424         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3425         { "x","X","q","Q","z","n","m","a","M" }, 9},
3426         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3427         { "x","X","q","Q","m","a","M","z","n" }, 9},
3428         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3429         { "m","x","X","a","M","q","Q","z","n" }, 9},
3430         { "&[before 1] s < x <<< X << q <<< Q < z",
3431         { "r","R","x","X","q","Q","z","s","n" }, 9},
3432         { "&[before 2] s << x <<< X << q <<< Q < z",
3433         { "r","R","x","X","q","Q","s","z","n" }, 9},
3434         { "&[before 3] s <<< x <<< X << q <<< Q < z",
3435         { "r","R","x","X","s","q","Q","z","n" }, 9},
3436         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3437         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3438         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3439         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3440         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3441         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3442     };
3443 
3444     int32_t i = 0;
3445 
3446     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
3447         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3448     }
3449 }
3450 #endif
3451 
TestTailorNULL(void)3452 static void TestTailorNULL( void ) {
3453     const static char* rule = "&a <<< '\\u0000'";
3454     UErrorCode status = U_ZERO_ERROR;
3455     UChar rlz[RULE_BUFFER_LEN] = { 0 };
3456     uint32_t rlen = 0;
3457     UChar a = 1, null = 0;
3458     UCollationResult res = UCOL_EQUAL;
3459 
3460     UCollator *coll = NULL;
3461 
3462 
3463     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3464     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3465 
3466     if(U_FAILURE(status)) {
3467         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3468     } else {
3469         res = ucol_strcoll(coll, &a, 1, &null, 1);
3470 
3471         if(res != UCOL_LESS) {
3472             log_err("NULL was not tailored properly!\n");
3473         }
3474     }
3475 
3476     ucol_close(coll);
3477 }
3478 
3479 static void
TestUpperFirstQuaternary(void)3480 TestUpperFirstQuaternary(void)
3481 {
3482   const char* tests[] = { "B", "b", "Bb", "bB" };
3483   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3484   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3485   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
3486 }
3487 
3488 static void
TestJ4960(void)3489 TestJ4960(void)
3490 {
3491   const char* tests[] = { "\\u00e2T", "aT" };
3492   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3493   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3494   const char* tests2[] = { "a", "A" };
3495   const char* rule = "&[first tertiary ignorable]=A=a";
3496   UColAttribute att2[] = { UCOL_CASE_LEVEL };
3497   UColAttributeValue attVals2[] = { UCOL_ON };
3498   /* Test whether we correctly ignore primary ignorables on case level when */
3499   /* we have only primary & case level */
3500   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
3501   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3502   /* and case level */
3503   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
3504   /* Test whether completely ignorable letters have case level info (they shouldn't) */
3505   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
3506 }
3507 
3508 static void
TestJ5223(void)3509 TestJ5223(void)
3510 {
3511   static const char *test = "this is a test string";
3512   UChar ustr[256];
3513   int32_t ustr_length = u_unescape(test, ustr, 256);
3514   unsigned char sortkey[256];
3515   int32_t sortkey_length;
3516   UErrorCode status = U_ZERO_ERROR;
3517   static UCollator *coll = NULL;
3518   coll = ucol_open("root", &status);
3519   if(U_FAILURE(status)) {
3520     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3521     return;
3522   }
3523   ucol_setStrength(coll, UCOL_PRIMARY);
3524   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3525   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3526   if (U_FAILURE(status)) {
3527     log_err("Failed setting atributes\n");
3528     return;
3529   }
3530   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3531   if (sortkey_length > 256) return;
3532 
3533   /* we mark the position where the null byte should be written in advance */
3534   sortkey[sortkey_length-1] = 0xAA;
3535 
3536   /* we set the buffer size one byte higher than needed */
3537   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3538     sortkey_length+1);
3539 
3540   /* no error occurs (for me) */
3541   if (sortkey[sortkey_length-1] == 0xAA) {
3542     log_err("Hit bug at first try\n");
3543   }
3544 
3545   /* we mark the position where the null byte should be written again */
3546   sortkey[sortkey_length-1] = 0xAA;
3547 
3548   /* this time we set the buffer size to the exact amount needed */
3549   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3550     sortkey_length);
3551 
3552   /* now the trailing null byte is not written */
3553   if (sortkey[sortkey_length-1] == 0xAA) {
3554     log_err("Hit bug at second try\n");
3555   }
3556 
3557   ucol_close(coll);
3558 }
3559 
3560 /* Regression test for Thai partial sort key problem */
3561 static void
TestJ5232(void)3562 TestJ5232(void)
3563 {
3564     const static char *test[] = {
3565         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3566         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3567     };
3568 
3569     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
3570 }
3571 
3572 static void
TestJ5367(void)3573 TestJ5367(void)
3574 {
3575     const static char *test[] = { "a", "y" };
3576     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3577     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
3578 }
3579 
3580 static void
TestVI5913(void)3581 TestVI5913(void)
3582 {
3583     UErrorCode status = U_ZERO_ERROR;
3584     int32_t i, j;
3585     UCollator *coll =NULL;
3586     uint8_t  resColl[100], expColl[100];
3587     int32_t  rLen, tLen, ruleLen, sLen, kLen;
3588     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3589     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3590     /*
3591      * Note: Just tailoring &z<ae^ does not work as expected:
3592      * The UCA spec requires for discontiguous contractions that they
3593      * extend an *existing match* by one combining mark at a time.
3594      * Therefore, ae must be a contraction so that the builder finds
3595      * discontiguous contractions for ae^, for example with an intervening underdot.
3596      * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3597      */
3598     UChar rule3[256]={
3599         0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3600         0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3601         0};
3602     static const UChar tData[][20]={
3603         {0x1EAC, 0},
3604         {0x0041, 0x0323, 0x0302, 0},
3605         {0x1EA0, 0x0302, 0},
3606         {0x00C2, 0x0323, 0},
3607         {0x1ED8, 0},  /* O with dot and circumflex */
3608         {0x1ECC, 0x0302, 0},
3609         {0x1EB7, 0},
3610         {0x1EA1, 0x0306, 0},
3611     };
3612     static const UChar tailorData[][20]={
3613         {0x1FA2, 0},  /* Omega with 3 combining marks */
3614         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3615         {0x1FF3, 0x0313, 0x0300, 0},
3616         {0x1F60, 0x0300, 0x0345, 0},
3617         {0x1F62, 0x0345, 0},
3618         {0x1FA0, 0x0300, 0},
3619     };
3620     static const UChar tailorData2[][20]={
3621         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3622         {0x0073, 0x0323, 0x030C, 0},
3623         {0x0073, 0x030C, 0x0323, 0},
3624     };
3625     static const UChar tailorData3[][20]={
3626         {0x007a, 0},  /*  z */
3627         {0x0061, 0x0065, 0},  /*  a + e */
3628         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3629         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3630         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3631         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3632         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3633         {0x00EA, 0},  /* e with circumflex  */
3634     };
3635 
3636     /* Test Vietnamese sort. */
3637     coll = ucol_open("vi", &status);
3638     if(U_FAILURE(status)) {
3639         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3640         return;
3641     }
3642     log_verbose("\n\nVI collation:");
3643     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3644         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3645     }
3646     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3647         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3648     }
3649     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3650         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3651     }
3652     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3653         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3654     }
3655 
3656     for (j=0; j<8; j++) {
3657         tLen = u_strlen(tData[j]);
3658         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3659         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3660         for(i = 0; i<rLen; i++) {
3661             log_verbose(" %02X", resColl[i]);
3662         }
3663     }
3664 
3665     ucol_close(coll);
3666 
3667     /* Test Romanian sort. */
3668     coll = ucol_open("ro", &status);
3669     log_verbose("\n\nRO collation:");
3670     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3671         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3672     }
3673     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3674         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3675     }
3676     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3677         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3678     }
3679 
3680     for (j=4; j<8; j++) {
3681         tLen = u_strlen(tData[j]);
3682         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3683         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3684         for(i = 0; i<rLen; i++) {
3685             log_verbose(" %02X", resColl[i]);
3686         }
3687     }
3688     ucol_close(coll);
3689 
3690     /* Test the precomposed Greek character with 3 combining marks. */
3691     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3692     ruleLen = u_strlen(rule);
3693     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3694     if (U_FAILURE(status)) {
3695         log_err("ucol_openRules failed with %s\n", u_errorName(status));
3696         return;
3697     }
3698     sLen = u_strlen(tailorData[0]);
3699     for (j=1; j<6; j++) {
3700         tLen = u_strlen(tailorData[j]);
3701         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3702             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3703         }
3704     }
3705     /* Test getSortKey. */
3706     tLen = u_strlen(tailorData[0]);
3707     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3708     for (j=0; j<6; j++) {
3709         tLen = u_strlen(tailorData[j]);
3710         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3711         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3712             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3713             for(i = 0; i<rLen; i++) {
3714                 log_err(" %02X", resColl[i]);
3715             }
3716         }
3717     }
3718     ucol_close(coll);
3719 
3720     log_verbose("\n\nTailoring test for s with caron:");
3721     ruleLen = u_strlen(rule2);
3722     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3723     tLen = u_strlen(tailorData2[0]);
3724     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3725     for (j=1; j<3; j++) {
3726         tLen = u_strlen(tailorData2[j]);
3727         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3728         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3729             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3730             for(i = 0; i<rLen; i++) {
3731                 log_err(" %02X", resColl[i]);
3732             }
3733         }
3734     }
3735     ucol_close(coll);
3736 
3737     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3738     ruleLen = u_strlen(rule3);
3739     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3740     tLen = u_strlen(tailorData3[3]);
3741     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3742     log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3743     for(i = 0; i<kLen; i++) {
3744         log_verbose(" %02X", expColl[i]);
3745     }
3746     for (j=4; j<6; j++) {
3747         tLen = u_strlen(tailorData3[j]);
3748         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3749 
3750         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3751             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3752             for(i = 0; i<rLen; i++) {
3753                 log_err(" %02X", resColl[i]);
3754             }
3755         }
3756 
3757         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3758          for(i = 0; i<rLen; i++) {
3759              log_verbose(" %02X", resColl[i]);
3760          }
3761     }
3762     ucol_close(coll);
3763 }
3764 
3765 static void
TestTailor6179(void)3766 TestTailor6179(void)
3767 {
3768     UErrorCode status = U_ZERO_ERROR;
3769     int32_t i;
3770     UCollator *coll =NULL;
3771     uint8_t  resColl[100];
3772     int32_t  rLen, tLen, ruleLen;
3773     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3774     static const UChar rule1[]={
3775             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3776             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3777             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3778             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3779     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3780     static const UChar rule2[]={
3781             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3782             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3783             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3784             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3785             0x3C,0x3C,0x20,0x62,0};
3786 
3787     static const UChar tData1[][4]={
3788         {0x61, 0},
3789         {0x62, 0},
3790         { 0xFDD0,0x009E, 0}
3791     };
3792     static const UChar tData2[][4]={
3793         {0x61, 0},
3794         {0x62, 0},
3795         { 0xFDD0,0x009E, 0}
3796      };
3797 
3798     /*
3799      * These values from FractionalUCA.txt will change,
3800      * and need to be updated here.
3801      * TODO: Make this not check for particular sort keys.
3802      * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3803      */
3804     static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3805     static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3806     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3807     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3808 
3809     UParseError parseError;
3810 
3811     /* Test [Last Primary ignorable] */
3812 
3813     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3814     ruleLen = u_strlen(rule1);
3815     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3816     if (U_FAILURE(status)) {
3817         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3818         return;
3819     }
3820     tLen = u_strlen(tData1[0]);
3821     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3822     if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3823         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3824         for(i = 0; i<rLen; i++) {
3825             log_err(" %02X", resColl[i]);
3826         }
3827         log_err("\n");
3828     }
3829     tLen = u_strlen(tData1[1]);
3830     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3831     if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3832         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3833         for(i = 0; i<rLen; i++) {
3834             log_err(" %02X", resColl[i]);
3835         }
3836         log_err("\n");
3837     }
3838     ucol_close(coll);
3839 
3840 
3841     /* Test [Last Secondary ignorable] */
3842     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3843     ruleLen = u_strlen(rule2);
3844     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3845     if (U_FAILURE(status)) {
3846         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3847         log_info("  offset=%d  \"%s\" | \"%s\"\n",
3848                  parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3849         return;
3850     }
3851     tLen = u_strlen(tData2[0]);
3852     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3853     if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3854         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3855         for(i = 0; i<rLen; i++) {
3856             log_err(" %02X", resColl[i]);
3857         }
3858         log_err("\n");
3859     }
3860     tLen = u_strlen(tData2[1]);
3861     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3862     if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3863       log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3864       for(i = 0; i<rLen; i++) {
3865         log_err(" %02X", resColl[i]);
3866       }
3867       log_err("\n");
3868     }
3869     ucol_close(coll);
3870 }
3871 
3872 static void
TestUCAPrecontext(void)3873 TestUCAPrecontext(void)
3874 {
3875     UErrorCode status = U_ZERO_ERROR;
3876     int32_t i, j;
3877     UCollator *coll =NULL;
3878     uint8_t  resColl[100], prevColl[100];
3879     int32_t  rLen, tLen, ruleLen;
3880     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3881     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3882     /* & l middle-dot << a  a is an expansion. */
3883 
3884     UChar tData1[][20]={
3885             { 0xb7, 0},  /* standalone middle dot(0xb7) */
3886             { 0x387, 0}, /* standalone middle dot(0x387) */
3887             { 0x61, 0},  /* a */
3888             { 0x6C, 0},  /* l */
3889             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3890             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3891             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3892             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3893             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3894             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3895             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3896      };
3897 
3898     log_verbose("\n\nEN collation:");
3899     coll = ucol_open("en", &status);
3900     if (U_FAILURE(status)) {
3901         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3902         return;
3903     }
3904     for (j=0; j<11; j++) {
3905         tLen = u_strlen(tData1[j]);
3906         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3907         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3908             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3909                     j, tData1[j]);
3910         }
3911         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3912         for(i = 0; i<rLen; i++) {
3913             log_verbose(" %02X", resColl[i]);
3914         }
3915         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3916      }
3917      ucol_close(coll);
3918 
3919 
3920      log_verbose("\n\nJA collation:");
3921      coll = ucol_open("ja", &status);
3922      if (U_FAILURE(status)) {
3923          log_err("Tailoring test: &z <<a|- failed!");
3924          return;
3925      }
3926      for (j=0; j<11; j++) {
3927          tLen = u_strlen(tData1[j]);
3928          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3929          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3930              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3931                      j, tData1[j]);
3932          }
3933          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3934          for(i = 0; i<rLen; i++) {
3935              log_verbose(" %02X", resColl[i]);
3936          }
3937          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3938       }
3939       ucol_close(coll);
3940 
3941 
3942       log_verbose("\n\nTailoring test: & middle dot < a ");
3943       ruleLen = u_strlen(rule1);
3944       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3945       if (U_FAILURE(status)) {
3946           log_err("Tailoring test: & middle dot < a failed!");
3947           return;
3948       }
3949       for (j=0; j<11; j++) {
3950           tLen = u_strlen(tData1[j]);
3951           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3952           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3953               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3954                       j, tData1[j]);
3955           }
3956           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3957           for(i = 0; i<rLen; i++) {
3958               log_verbose(" %02X", resColl[i]);
3959           }
3960           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3961        }
3962        ucol_close(coll);
3963 
3964 
3965        log_verbose("\n\nTailoring test: & l middle-dot << a ");
3966        ruleLen = u_strlen(rule2);
3967        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3968        if (U_FAILURE(status)) {
3969            log_err("Tailoring test: & l middle-dot << a failed!");
3970            return;
3971        }
3972        for (j=0; j<11; j++) {
3973            tLen = u_strlen(tData1[j]);
3974            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3975            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3976                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3977                        j, tData1[j]);
3978            }
3979            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3980                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3981                        j, tData1[j]);
3982            }
3983            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3984            for(i = 0; i<rLen; i++) {
3985                log_verbose(" %02X", resColl[i]);
3986            }
3987            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3988         }
3989         ucol_close(coll);
3990 }
3991 
3992 static void
TestOutOfBuffer5468(void)3993 TestOutOfBuffer5468(void)
3994 {
3995     static const char *test = "\\u4e00";
3996     UChar ustr[256];
3997     int32_t ustr_length = u_unescape(test, ustr, 256);
3998     unsigned char shortKeyBuf[1];
3999     int32_t sortkey_length;
4000     UErrorCode status = U_ZERO_ERROR;
4001     static UCollator *coll = NULL;
4002 
4003     coll = ucol_open("root", &status);
4004     if(U_FAILURE(status)) {
4005       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4006       return;
4007     }
4008     ucol_setStrength(coll, UCOL_PRIMARY);
4009     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4010     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4011     if (U_FAILURE(status)) {
4012       log_err("Failed setting atributes\n");
4013       return;
4014     }
4015 
4016     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4017     if (sortkey_length != 4) {
4018         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4019     }
4020     log_verbose("length of sortKey is %d", sortkey_length);
4021     ucol_close(coll);
4022 }
4023 
4024 #define TSKC_DATA_SIZE 5
4025 #define TSKC_BUF_SIZE  50
4026 static void
TestSortKeyConsistency(void)4027 TestSortKeyConsistency(void)
4028 {
4029     UErrorCode icuRC = U_ZERO_ERROR;
4030     UCollator* ucol;
4031     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4032 
4033     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4034     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4035     int32_t i, j, i2;
4036 
4037     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4038     if (U_FAILURE(icuRC))
4039     {
4040         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4041         return;
4042     }
4043 
4044     for (i = 0; i < TSKC_DATA_SIZE; i++)
4045     {
4046         UCharIterator uiter;
4047         uint32_t state[2] = { 0, 0 };
4048         int32_t dataLen = i+1;
4049         for (j=0; j<TSKC_BUF_SIZE; j++)
4050             bufFull[i][j] = bufPart[i][j] = 0;
4051 
4052         /* Full sort key */
4053         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4054 
4055         /* Partial sort key */
4056         uiter_setString(&uiter, data, dataLen);
4057         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4058         if (U_FAILURE(icuRC))
4059         {
4060             log_err("ucol_nextSortKeyPart failed\n");
4061             ucol_close(ucol);
4062             return;
4063         }
4064 
4065         for (i2=0; i2<i; i2++)
4066         {
4067             UBool fullMatch = TRUE;
4068             UBool partMatch = TRUE;
4069             for (j=0; j<TSKC_BUF_SIZE; j++)
4070             {
4071                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4072                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4073             }
4074             if (fullMatch != partMatch) {
4075                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4076                                   : "partial key was consistent, but full key changed\n");
4077                 ucol_close(ucol);
4078                 return;
4079             }
4080         }
4081     }
4082 
4083     /*=============================================*/
4084    ucol_close(ucol);
4085 }
4086 
4087 /* ticket: 6101 */
TestCroatianSortKey(void)4088 static void TestCroatianSortKey(void) {
4089     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4090     UErrorCode status = U_ZERO_ERROR;
4091     UCollator *ucol;
4092     UCharIterator iter;
4093 
4094     static const UChar text[] = { 0x0044, 0xD81A };
4095 
4096     size_t length = sizeof(text)/sizeof(*text);
4097 
4098     uint8_t textSortKey[32];
4099     size_t lenSortKey = 32;
4100     size_t actualSortKeyLen;
4101     uint32_t uStateInfo[2] = { 0, 0 };
4102 
4103     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4104     if (U_FAILURE(status)) {
4105         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4106         return;
4107     }
4108 
4109     uiter_setString(&iter, text, length);
4110 
4111     actualSortKeyLen = ucol_nextSortKeyPart(
4112         ucol, &iter, (uint32_t*)uStateInfo,
4113         textSortKey, lenSortKey, &status
4114         );
4115 
4116     if (actualSortKeyLen == lenSortKey) {
4117         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4118     }
4119 
4120     ucol_close(ucol);
4121 }
4122 
4123 /* ticket: 6140 */
4124 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4125  * they are both Hiragana and Katakana
4126  */
4127 #define SORTKEYLEN 50
TestHiragana(void)4128 static void TestHiragana(void) {
4129     UErrorCode status = U_ZERO_ERROR;
4130     UCollator* ucol;
4131     UCollationResult strcollresult;
4132     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4133     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4134     int32_t data1Len = sizeof(data1)/sizeof(*data1);
4135     int32_t data2Len = sizeof(data2)/sizeof(*data2);
4136     int32_t i, j;
4137     uint8_t sortKey1[SORTKEYLEN];
4138     uint8_t sortKey2[SORTKEYLEN];
4139 
4140     UCharIterator uiter1;
4141     UCharIterator uiter2;
4142     uint32_t state1[2] = { 0, 0 };
4143     uint32_t state2[2] = { 0, 0 };
4144     int32_t keySize1;
4145     int32_t keySize2;
4146 
4147     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4148             &status);
4149     if (U_FAILURE(status)) {
4150         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4151         return;
4152     }
4153 
4154     /* Start of full sort keys */
4155     /* Full sort key1 */
4156     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4157     /* Full sort key2 */
4158     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4159     if (keySize1 == keySize2) {
4160         for (i = 0; i < keySize1; i++) {
4161             if (sortKey1[i] != sortKey2[i]) {
4162                 log_err("Full sort keys are different. Should be equal.");
4163             }
4164         }
4165     } else {
4166         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4167     }
4168     /* End of full sort keys */
4169 
4170     /* Start of partial sort keys */
4171     /* Partial sort key1 */
4172     uiter_setString(&uiter1, data1, data1Len);
4173     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4174     /* Partial sort key2 */
4175     uiter_setString(&uiter2, data2, data2Len);
4176     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4177     if (U_SUCCESS(status) && keySize1 == keySize2) {
4178         for (j = 0; j < keySize1; j++) {
4179             if (sortKey1[j] != sortKey2[j]) {
4180                 log_err("Partial sort keys are different. Should be equal");
4181             }
4182         }
4183     } else {
4184         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4185     }
4186     /* End of partial sort keys */
4187 
4188     /* Start of strcoll */
4189     /* Use ucol_strcoll() to determine ordering */
4190     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4191     if (strcollresult != UCOL_EQUAL) {
4192         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4193     }
4194 
4195     ucol_close(ucol);
4196 }
4197 
4198 /* Convenient struct for running collation tests */
4199 typedef struct {
4200   const UChar source[MAX_TOKEN_LEN];  /* String on left */
4201   const UChar target[MAX_TOKEN_LEN];  /* String on right */
4202   UCollationResult result;            /* -1, 0 or +1, depending on collation */
4203 } OneTestCase;
4204 
4205 /*
4206  * Utility function to test one collation test case.
4207  * @param testcases Array of test cases.
4208  * @param n_testcases Size of the array testcases.
4209  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4210  * @param n_rules Size of the array str_rules.
4211  */
doTestOneTestCase(const OneTestCase testcases[],int n_testcases,const char * str_rules[],int n_rules)4212 static void doTestOneTestCase(const OneTestCase testcases[],
4213                               int n_testcases,
4214                               const char* str_rules[],
4215                               int n_rules)
4216 {
4217   int rule_no, testcase_no;
4218   UChar rule[500];
4219   int32_t length = 0;
4220   UErrorCode status = U_ZERO_ERROR;
4221   UParseError parse_error;
4222   UCollator  *myCollation;
4223 
4224   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4225 
4226     length = u_unescape(str_rules[rule_no], rule, 500);
4227     if (length == 0) {
4228         log_err("ERROR: The rule cannot be unescaped: %s\n");
4229         return;
4230     }
4231     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4232     if(U_FAILURE(status)){
4233         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4234         log_info("  offset=%d  \"%s\" | \"%s\"\n",
4235                  parse_error.offset,
4236                  aescstrdup(parse_error.preContext, -1),
4237                  aescstrdup(parse_error.postContext, -1));
4238         return;
4239     }
4240     log_verbose("Testing the <<* syntax\n");
4241     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4242     ucol_setStrength(myCollation, UCOL_TERTIARY);
4243     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4244       doTest(myCollation,
4245              testcases[testcase_no].source,
4246              testcases[testcase_no].target,
4247              testcases[testcase_no].result
4248              );
4249     }
4250     ucol_close(myCollation);
4251   }
4252 }
4253 
4254 const static OneTestCase rangeTestcases[] = {
4255   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4256   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4257   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4258 
4259   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4260   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4261   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4262   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4263   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4264 
4265   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4266   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4267   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4268   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4269 
4270   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4271   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4272   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4273   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4274   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4275   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4276   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4277   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4278 };
4279 
4280 static int nRangeTestcases = LEN(rangeTestcases);
4281 
4282 const static OneTestCase rangeTestcasesSupplemental[] = {
4283   { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4284   { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4285   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4286   { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4287   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4288   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4289   { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4290 };
4291 
4292 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
4293 
4294 const static OneTestCase rangeTestcasesQwerty[] = {
4295   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4296   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4297 
4298   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4299   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4300 
4301   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4302   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4303 
4304   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4305   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4306 
4307   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4308     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4309   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4310     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4311 };
4312 
4313 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
4314 
TestSameStrengthList(void)4315 static void TestSameStrengthList(void)
4316 {
4317   const char* strRules[] = {
4318     /* Normal */
4319     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4320 
4321     /* Lists */
4322     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4323   };
4324   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4325 }
4326 
TestSameStrengthListQuoted(void)4327 static void TestSameStrengthListQuoted(void)
4328 {
4329   const char* strRules[] = {
4330     /* Lists with quoted characters */
4331     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4332     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4333 
4334     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4335     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4336 
4337     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4338     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4339   };
4340   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4341 }
4342 
TestSameStrengthListSupplemental(void)4343 static void TestSameStrengthListSupplemental(void)
4344 {
4345   const char* strRules[] = {
4346     "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4347     "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4348     "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4349     "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4350   };
4351   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
4352 }
4353 
TestSameStrengthListQwerty(void)4354 static void TestSameStrengthListQwerty(void)
4355 {
4356   const char* strRules[] = {
4357     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4358     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4359     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4360     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4361     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4362 
4363     /* Quoted characters also will work if two quoted characters are not consecutive.  */
4364     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4365 
4366     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4367     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4368 
4369  };
4370   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
4371 }
4372 
TestSameStrengthListQuotedQwerty(void)4373 static void TestSameStrengthListQuotedQwerty(void)
4374 {
4375   const char* strRules[] = {
4376     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4377     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4378     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4379 
4380     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4381     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4382    };
4383   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
4384 }
4385 
TestSameStrengthListRanges(void)4386 static void TestSameStrengthListRanges(void)
4387 {
4388   const char* strRules[] = {
4389     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4390   };
4391   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4392 }
4393 
TestSameStrengthListSupplementalRanges(void)4394 static void TestSameStrengthListSupplementalRanges(void)
4395 {
4396   const char* strRules[] = {
4397     /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4398     "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4399   };
4400   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
4401 }
4402 
TestSpecialCharacters(void)4403 static void TestSpecialCharacters(void)
4404 {
4405   const char* strRules[] = {
4406     /* Normal */
4407     "&';'<'+'<','<'-'<'&'<'*'",
4408 
4409     /* List */
4410     "&';'<*'+,-&*'",
4411 
4412     /* Range */
4413     "&';'<*'+'-'-&*'",
4414   };
4415 
4416   const static OneTestCase specialCharacterStrings[] = {
4417     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4418     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4419     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4420     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4421   };
4422   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
4423 }
4424 
TestPrivateUseCharacters(void)4425 static void TestPrivateUseCharacters(void)
4426 {
4427   const char* strRules[] = {
4428     /* Normal */
4429     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4430     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4431   };
4432 
4433   const static OneTestCase privateUseCharacterStrings[] = {
4434     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4435     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4436     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4437     { {0xe2da}, {0xe2db}, UCOL_LESS },
4438     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4439     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4440   };
4441   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4442 }
4443 
TestPrivateUseCharactersInList(void)4444 static void TestPrivateUseCharactersInList(void)
4445 {
4446   const char* strRules[] = {
4447     /* List */
4448     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4449     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4450     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4451   };
4452 
4453   const static OneTestCase privateUseCharacterStrings[] = {
4454     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4455     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4456     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4457     { {0xe2da}, {0xe2db}, UCOL_LESS },
4458     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4459     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4460   };
4461   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4462 }
4463 
TestPrivateUseCharactersInRange(void)4464 static void TestPrivateUseCharactersInRange(void)
4465 {
4466   const char* strRules[] = {
4467     /* Range */
4468     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4469     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4470     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4471   };
4472 
4473   const static OneTestCase privateUseCharacterStrings[] = {
4474     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4475     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4476     { {0xe2d9}, {0xe2da}, UCOL_LESS },
4477     { {0xe2da}, {0xe2db}, UCOL_LESS },
4478     { {0xe2db}, {0xe2dc}, UCOL_LESS },
4479     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4480   };
4481   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4482 }
4483 
TestInvalidListsAndRanges(void)4484 static void TestInvalidListsAndRanges(void)
4485 {
4486   const char* invalidRules[] = {
4487     /* Range not in starred expression */
4488     "&\\ufffe<\\uffff-\\U00010002",
4489 
4490     /* Range without start */
4491     "&a<*-c",
4492 
4493     /* Range without end */
4494     "&a<*b-",
4495 
4496     /* More than one hyphen */
4497     "&a<*b-g-l",
4498 
4499     /* Range in the wrong order */
4500     "&a<*k-b",
4501 
4502   };
4503 
4504   UChar rule[500];
4505   UErrorCode status = U_ZERO_ERROR;
4506   UParseError parse_error;
4507   int n_rules = LEN(invalidRules);
4508   int rule_no;
4509   int length;
4510   UCollator  *myCollation;
4511 
4512   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4513 
4514     length = u_unescape(invalidRules[rule_no], rule, 500);
4515     if (length == 0) {
4516         log_err("ERROR: The rule cannot be unescaped: %s\n");
4517         return;
4518     }
4519     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4520     (void)myCollation;      /* Suppress set but not used warning. */
4521     if(!U_FAILURE(status)){
4522       log_err("ERROR: Could not cause a failure as expected: \n");
4523     }
4524     status = U_ZERO_ERROR;
4525   }
4526 }
4527 
4528 /*
4529  * This test ensures that characters placed before a character in a different script have the same lead byte
4530  * in their collation key before and after script reordering.
4531  */
TestBeforeRuleWithScriptReordering(void)4532 static void TestBeforeRuleWithScriptReordering(void)
4533 {
4534     UParseError error;
4535     UErrorCode status = U_ZERO_ERROR;
4536     UCollator  *myCollation;
4537     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4538     UChar rules[500];
4539     uint32_t rulesLength = 0;
4540     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4541     UCollationResult collResult;
4542 
4543     uint8_t baseKey[256];
4544     uint32_t baseKeyLength;
4545     uint8_t beforeKey[256];
4546     uint32_t beforeKeyLength;
4547 
4548     UChar base[] = { 0x03b1 }; /* base */
4549     int32_t baseLen = sizeof(base)/sizeof(*base);
4550 
4551     UChar before[] = { 0x0e01 }; /* ko kai */
4552     int32_t beforeLen = sizeof(before)/sizeof(*before);
4553 
4554     /*UChar *data[] = { before, base };
4555     genericRulesStarter(srules, data, 2);*/
4556 
4557     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4558 
4559     (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4560     (void)baseKeyLength;
4561 
4562     /* build collator */
4563     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4564 
4565     rulesLength = u_unescape(srules, rules, LEN(rules));
4566     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4567     if(U_FAILURE(status)) {
4568         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4569         return;
4570     }
4571 
4572     /* check collation results - before rule applied but not script reordering */
4573     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4574     if (collResult != UCOL_GREATER) {
4575         log_err("Collation result not correct before script reordering = %d\n", collResult);
4576     }
4577 
4578     /* check the lead byte of the collation keys before script reordering */
4579     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4580     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4581     if (baseKey[0] != beforeKey[0]) {
4582       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4583    }
4584 
4585     /* reorder the scripts */
4586     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4587     if(U_FAILURE(status)) {
4588         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4589         return;
4590     }
4591 
4592     /* check collation results - before rule applied and after script reordering */
4593     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4594     if (collResult != UCOL_GREATER) {
4595         log_err("Collation result not correct after script reordering = %d\n", collResult);
4596     }
4597 
4598     /* check the lead byte of the collation keys after script reordering */
4599     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4600     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4601     if (baseKey[0] != beforeKey[0]) {
4602         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4603     }
4604 
4605     ucol_close(myCollation);
4606 }
4607 
4608 /*
4609  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4610  */
TestNonLeadBytesDuringCollationReordering(void)4611 static void TestNonLeadBytesDuringCollationReordering(void)
4612 {
4613     UErrorCode status = U_ZERO_ERROR;
4614     UCollator  *myCollation;
4615     int32_t reorderCodes[1] = {USCRIPT_GREEK};
4616 
4617     uint8_t baseKey[256];
4618     uint32_t baseKeyLength;
4619     uint8_t reorderKey[256];
4620     uint32_t reorderKeyLength;
4621 
4622     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4623 
4624     uint32_t i;
4625 
4626 
4627     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4628 
4629     /* build collator tertiary */
4630     myCollation = ucol_open("", &status);
4631     ucol_setStrength(myCollation, UCOL_TERTIARY);
4632     if(U_FAILURE(status)) {
4633         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4634         return;
4635     }
4636     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
4637 
4638     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4639     if(U_FAILURE(status)) {
4640         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4641         return;
4642     }
4643     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
4644 
4645     if (baseKeyLength != reorderKeyLength) {
4646         log_err("Key lengths not the same during reordering.\n");
4647         return;
4648     }
4649 
4650     for (i = 1; i < baseKeyLength; i++) {
4651         if (baseKey[i] != reorderKey[i]) {
4652             log_err("Collation key bytes not the same at position %d.\n", i);
4653             return;
4654         }
4655     }
4656     ucol_close(myCollation);
4657 
4658     /* build collator quaternary */
4659     myCollation = ucol_open("", &status);
4660     ucol_setStrength(myCollation, UCOL_QUATERNARY);
4661     if(U_FAILURE(status)) {
4662         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4663         return;
4664     }
4665     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
4666 
4667     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4668     if(U_FAILURE(status)) {
4669         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4670         return;
4671     }
4672     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
4673 
4674     if (baseKeyLength != reorderKeyLength) {
4675         log_err("Key lengths not the same during reordering.\n");
4676         return;
4677     }
4678 
4679     for (i = 1; i < baseKeyLength; i++) {
4680         if (baseKey[i] != reorderKey[i]) {
4681             log_err("Collation key bytes not the same at position %d.\n", i);
4682             return;
4683         }
4684     }
4685     ucol_close(myCollation);
4686 }
4687 
4688 /*
4689  * Test reordering API.
4690  */
TestReorderingAPI(void)4691 static void TestReorderingAPI(void)
4692 {
4693     UErrorCode status = U_ZERO_ERROR;
4694     UCollator  *myCollation;
4695     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4696     int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4697     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4698     int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4699     UCollationResult collResult;
4700     int32_t retrievedReorderCodesLength;
4701     int32_t retrievedReorderCodes[10];
4702     UChar greekString[] = { 0x03b1 };
4703     UChar punctuationString[] = { 0x203e };
4704     int loopIndex;
4705 
4706     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4707 
4708     /* build collator tertiary */
4709     myCollation = ucol_open("", &status);
4710     ucol_setStrength(myCollation, UCOL_TERTIARY);
4711     if(U_FAILURE(status)) {
4712         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4713         return;
4714     }
4715 
4716     /* set the reorderding */
4717     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4718     if (U_FAILURE(status)) {
4719         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4720         return;
4721     }
4722 
4723     /* get the reordering */
4724     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4725     if (status != U_BUFFER_OVERFLOW_ERROR) {
4726         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4727         return;
4728     }
4729     status = U_ZERO_ERROR;
4730     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4731         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4732         return;
4733     }
4734     /* now let's really get it */
4735     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4736     if (U_FAILURE(status)) {
4737         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4738         return;
4739     }
4740     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4741         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4742         return;
4743     }
4744     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4745         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4746             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4747             return;
4748         }
4749     }
4750     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4751     if (collResult != UCOL_LESS) {
4752         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4753         return;
4754     }
4755 
4756     /* clear the reordering */
4757     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4758     if (U_FAILURE(status)) {
4759         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4760         return;
4761     }
4762 
4763     /* get the reordering again */
4764     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4765     if (retrievedReorderCodesLength != 0) {
4766         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4767         return;
4768     }
4769 
4770     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4771     if (collResult != UCOL_GREATER) {
4772         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4773         return;
4774     }
4775 
4776     /* clear the reordering using [NONE] */
4777     ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4778     if (U_FAILURE(status)) {
4779         log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4780         return;
4781     }
4782 
4783     /* get the reordering again */
4784     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4785     if (retrievedReorderCodesLength != 0) {
4786         log_err_status(status,
4787                        "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4788                        retrievedReorderCodesLength);
4789         return;
4790     }
4791 
4792     /* test for error condition on duplicate reorder codes */
4793     ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
4794     if (!U_FAILURE(status)) {
4795         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4796         return;
4797     }
4798 
4799     status = U_ZERO_ERROR;
4800     /* test for reorder codes after a reset code */
4801     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
4802     if (!U_FAILURE(status)) {
4803         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4804         return;
4805     }
4806 
4807     ucol_close(myCollation);
4808 }
4809 
4810 /*
4811  * Test reordering API.
4812  */
TestReorderingAPIWithRuleCreatedCollator(void)4813 static void TestReorderingAPIWithRuleCreatedCollator(void)
4814 {
4815     UErrorCode status = U_ZERO_ERROR;
4816     UCollator  *myCollation;
4817     UChar rules[90];
4818     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4819     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4820     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4821     UCollationResult collResult;
4822     int32_t retrievedReorderCodesLength;
4823     int32_t retrievedReorderCodes[10];
4824     static const UChar greekString[] = { 0x03b1 };
4825     static const UChar punctuationString[] = { 0x203e };
4826     static const UChar hanString[] = { 0x65E5, 0x672C };
4827     int loopIndex;
4828 
4829     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4830 
4831     /* build collator from rules */
4832     u_uastrcpy(rules, "[reorder Hani Grek]");
4833     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4834     if(U_FAILURE(status)) {
4835         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4836         return;
4837     }
4838 
4839     /* get the reordering */
4840     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4841     if (U_FAILURE(status)) {
4842         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4843         return;
4844     }
4845     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
4846         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
4847         return;
4848     }
4849     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4850         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4851             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4852             return;
4853         }
4854     }
4855     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
4856     if (collResult != UCOL_GREATER) {
4857         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4858         return;
4859     }
4860 
4861     /* set the reordering */
4862     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4863     if (U_FAILURE(status)) {
4864         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4865         return;
4866     }
4867 
4868     /* get the reordering */
4869     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4870     if (status != U_BUFFER_OVERFLOW_ERROR) {
4871         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4872         return;
4873     }
4874     status = U_ZERO_ERROR;
4875     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4876         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4877         return;
4878     }
4879     /* now let's really get it */
4880     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4881     if (U_FAILURE(status)) {
4882         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4883         return;
4884     }
4885     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4886         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4887         return;
4888     }
4889     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4890         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4891             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4892             return;
4893         }
4894     }
4895     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4896     if (collResult != UCOL_LESS) {
4897         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4898         return;
4899     }
4900 
4901     /* clear the reordering */
4902     ucol_setReorderCodes(myCollation, NULL, 0, &status);
4903     if (U_FAILURE(status)) {
4904         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4905         return;
4906     }
4907 
4908     /* get the reordering again */
4909     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4910     if (retrievedReorderCodesLength != 0) {
4911         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4912         return;
4913     }
4914 
4915     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4916     if (collResult != UCOL_GREATER) {
4917         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4918         return;
4919     }
4920 
4921     /* reset the reordering */
4922     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4923     if (U_FAILURE(status)) {
4924         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4925         return;
4926     }
4927     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4928     if (U_FAILURE(status)) {
4929         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4930         return;
4931     }
4932     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
4933         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
4934         return;
4935     }
4936     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4937         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4938             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4939             return;
4940         }
4941     }
4942 
4943     ucol_close(myCollation);
4944 }
4945 
containsExpectedScript(const int32_t scripts[],int32_t length,int32_t expectedScript)4946 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4947     int32_t i;
4948     for (i = 0; i < length; ++i) {
4949         if (expectedScript == scripts[i]) { return TRUE; }
4950     }
4951     return FALSE;
4952 }
4953 
TestEquivalentReorderingScripts(void)4954 static void TestEquivalentReorderingScripts(void) {
4955     // Beginning with ICU 55, collation reordering moves single scripts
4956     // rather than groups of scripts,
4957     // except where scripts share a range and sort primary-equal.
4958     UErrorCode status = U_ZERO_ERROR;
4959     int32_t equivalentScripts[100];
4960     int32_t length;
4961     int i;
4962     int32_t prevScript;
4963     /* These scripts are expected to be equivalent. */
4964     static const int32_t expectedScripts[] = {
4965         USCRIPT_HIRAGANA,
4966         USCRIPT_KATAKANA,
4967         USCRIPT_KATAKANA_OR_HIRAGANA
4968     };
4969 
4970     equivalentScripts[0] = 0;
4971     length = ucol_getEquivalentReorderCodes(
4972             USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
4973     if (U_FAILURE(status)) {
4974         log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4975         return;
4976     }
4977     if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4978         log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4979                 "length expected 1, was = %d; expected [%d] was [%d]\n",
4980                 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4981     }
4982 
4983     length = ucol_getEquivalentReorderCodes(
4984             USCRIPT_HIRAGANA, equivalentScripts, LEN(equivalentScripts), &status);
4985     if (U_FAILURE(status)) {
4986         log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4987         return;
4988     }
4989     if (length != LEN(expectedScripts)) {
4990         log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4991                 "expected %d, was = %d\n",
4992                 LEN(expectedScripts), length);
4993     }
4994     prevScript = -1;
4995     for (i = 0; i < length; ++i) {
4996         int32_t script = equivalentScripts[i];
4997         if (script <= prevScript) {
4998             log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4999         }
5000         prevScript = script;
5001     }
5002     for (i = 0; i < LEN(expectedScripts); i++) {
5003         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5004             log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5005                     expectedScripts[i]);
5006         }
5007     }
5008 
5009     length = ucol_getEquivalentReorderCodes(
5010             USCRIPT_KATAKANA, equivalentScripts, LEN(equivalentScripts), &status);
5011     if (U_FAILURE(status)) {
5012         log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5013         return;
5014     }
5015     if (length != LEN(expectedScripts)) {
5016         log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5017                 "expected %d, was = %d\n",
5018                 LEN(expectedScripts), length);
5019     }
5020     for (i = 0; i < LEN(expectedScripts); i++) {
5021         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5022             log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5023                     expectedScripts[i]);
5024         }
5025     }
5026 
5027     length = ucol_getEquivalentReorderCodes(
5028             USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, LEN(equivalentScripts), &status);
5029     if (U_FAILURE(status) || length != LEN(expectedScripts)) {
5030         log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5031                 "expected %d, was = %d\n",
5032                 LEN(expectedScripts), length);
5033     }
5034 
5035     length = ucol_getEquivalentReorderCodes(
5036             USCRIPT_HAN, equivalentScripts, LEN(equivalentScripts), &status);
5037     if (U_FAILURE(status) || length != 3) {
5038         log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5039                 "expected 3, was = %d\n", length);
5040     }
5041     length = ucol_getEquivalentReorderCodes(
5042             USCRIPT_SIMPLIFIED_HAN, equivalentScripts, LEN(equivalentScripts), &status);
5043     if (U_FAILURE(status) || length != 3) {
5044         log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5045                 "expected 3, was = %d\n", length);
5046     }
5047     length = ucol_getEquivalentReorderCodes(
5048             USCRIPT_TRADITIONAL_HAN, equivalentScripts, LEN(equivalentScripts), &status);
5049     if (U_FAILURE(status) || length != 3) {
5050         log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5051                 "expected 3, was = %d\n", length);
5052     }
5053 
5054     length = ucol_getEquivalentReorderCodes(
5055             USCRIPT_MEROITIC_CURSIVE, equivalentScripts, LEN(equivalentScripts), &status);
5056     if (U_FAILURE(status) || length != 2) {
5057         log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5058                 "expected 2, was = %d\n", length);
5059     }
5060     length = ucol_getEquivalentReorderCodes(
5061             USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, LEN(equivalentScripts), &status);
5062     if (U_FAILURE(status) || length != 2) {
5063         log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5064                 "expected 2, was = %d\n", length);
5065     }
5066 }
5067 
TestReorderingAcrossCloning(void)5068 static void TestReorderingAcrossCloning(void)
5069 {
5070     UErrorCode status = U_ZERO_ERROR;
5071     UCollator  *myCollation;
5072     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5073     UCollator *clonedCollation;
5074     int32_t retrievedReorderCodesLength;
5075     int32_t retrievedReorderCodes[10];
5076     int loopIndex;
5077 
5078     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5079 
5080     /* build collator tertiary */
5081     myCollation = ucol_open("", &status);
5082     ucol_setStrength(myCollation, UCOL_TERTIARY);
5083     if(U_FAILURE(status)) {
5084         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5085         return;
5086     }
5087 
5088     /* set the reorderding */
5089     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5090     if (U_FAILURE(status)) {
5091         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5092         return;
5093     }
5094 
5095     /* clone the collator */
5096     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5097     if (U_FAILURE(status)) {
5098         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5099         return;
5100     }
5101 
5102     /* get the reordering */
5103     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
5104     if (U_FAILURE(status)) {
5105         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5106         return;
5107     }
5108     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
5109         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
5110         return;
5111     }
5112     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5113         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5114             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5115             return;
5116         }
5117     }
5118 
5119     /*uprv_free(buffer);*/
5120     ucol_close(myCollation);
5121     ucol_close(clonedCollation);
5122 }
5123 
5124 /*
5125  * Utility function to test one collation reordering test case set.
5126  * @param testcases Array of test cases.
5127  * @param n_testcases Size of the array testcases.
5128  * @param reorderTokens Array of reordering codes.
5129  * @param reorderTokensLen Size of the array reorderTokens.
5130  */
doTestOneReorderingAPITestCase(const OneTestCase testCases[],uint32_t testCasesLen,const int32_t reorderTokens[],int32_t reorderTokensLen)5131 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5132 {
5133     uint32_t testCaseNum;
5134     UErrorCode status = U_ZERO_ERROR;
5135     UCollator  *myCollation;
5136 
5137     myCollation = ucol_open("", &status);
5138     if (U_FAILURE(status)) {
5139         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5140         return;
5141     }
5142     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5143     if(U_FAILURE(status)) {
5144         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5145         return;
5146     }
5147 
5148     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5149         doTest(myCollation,
5150             testCases[testCaseNum].source,
5151             testCases[testCaseNum].target,
5152             testCases[testCaseNum].result
5153         );
5154     }
5155     ucol_close(myCollation);
5156 }
5157 
TestGreekFirstReorder(void)5158 static void TestGreekFirstReorder(void)
5159 {
5160     const char* strRules[] = {
5161         "[reorder Grek]"
5162     };
5163 
5164     const int32_t apiRules[] = {
5165         USCRIPT_GREEK
5166     };
5167 
5168     const static OneTestCase privateUseCharacterStrings[] = {
5169         { {0x0391}, {0x0391}, UCOL_EQUAL },
5170         { {0x0041}, {0x0391}, UCOL_GREATER },
5171         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5172         { {0x0060}, {0x0391}, UCOL_LESS },
5173         { {0x0391}, {0xe2dc}, UCOL_LESS },
5174         { {0x0391}, {0x0060}, UCOL_GREATER },
5175     };
5176 
5177     /* Test rules creation */
5178     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5179 
5180     /* Test collation reordering API */
5181     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5182 }
5183 
TestGreekLastReorder(void)5184 static void TestGreekLastReorder(void)
5185 {
5186     const char* strRules[] = {
5187         "[reorder Zzzz Grek]"
5188     };
5189 
5190     const int32_t apiRules[] = {
5191         USCRIPT_UNKNOWN, USCRIPT_GREEK
5192     };
5193 
5194     const static OneTestCase privateUseCharacterStrings[] = {
5195         { {0x0391}, {0x0391}, UCOL_EQUAL },
5196         { {0x0041}, {0x0391}, UCOL_LESS },
5197         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5198         { {0x0060}, {0x0391}, UCOL_LESS },
5199         { {0x0391}, {0xe2dc}, UCOL_GREATER },
5200     };
5201 
5202     /* Test rules creation */
5203     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5204 
5205     /* Test collation reordering API */
5206     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5207 }
5208 
TestNonScriptReorder(void)5209 static void TestNonScriptReorder(void)
5210 {
5211     const char* strRules[] = {
5212         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5213     };
5214 
5215     const int32_t apiRules[] = {
5216         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5217         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5218         UCOL_REORDER_CODE_CURRENCY
5219     };
5220 
5221     const static OneTestCase privateUseCharacterStrings[] = {
5222         { {0x0391}, {0x0041}, UCOL_LESS },
5223         { {0x0041}, {0x0391}, UCOL_GREATER },
5224         { {0x0060}, {0x0041}, UCOL_LESS },
5225         { {0x0060}, {0x0391}, UCOL_GREATER },
5226         { {0x0024}, {0x0041}, UCOL_GREATER },
5227     };
5228 
5229     /* Test rules creation */
5230     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5231 
5232     /* Test collation reordering API */
5233     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5234 }
5235 
TestHaniReorder(void)5236 static void TestHaniReorder(void)
5237 {
5238     const char* strRules[] = {
5239         "[reorder Hani]"
5240     };
5241     const int32_t apiRules[] = {
5242         USCRIPT_HAN
5243     };
5244 
5245     const static OneTestCase privateUseCharacterStrings[] = {
5246         { {0x4e00}, {0x0041}, UCOL_LESS },
5247         { {0x4e00}, {0x0060}, UCOL_GREATER },
5248         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5249         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5250         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5251         { {0xfa27}, {0x0041}, UCOL_LESS },
5252         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5253     };
5254 
5255     /* Test rules creation */
5256     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5257 
5258     /* Test collation reordering API */
5259     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5260 }
5261 
TestHaniReorderWithOtherRules(void)5262 static void TestHaniReorderWithOtherRules(void)
5263 {
5264     const char* strRules[] = {
5265         "[reorder Hani] &b<a"
5266     };
5267     /*const int32_t apiRules[] = {
5268         USCRIPT_HAN
5269     };*/
5270 
5271     const static OneTestCase privateUseCharacterStrings[] = {
5272         { {0x4e00}, {0x0041}, UCOL_LESS },
5273         { {0x4e00}, {0x0060}, UCOL_GREATER },
5274         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5275         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5276         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5277         { {0xfa27}, {0x0041}, UCOL_LESS },
5278         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5279         { {0x0062}, {0x0061}, UCOL_LESS },
5280     };
5281 
5282     /* Test rules creation */
5283     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5284 }
5285 
TestMultipleReorder(void)5286 static void TestMultipleReorder(void)
5287 {
5288     const char* strRules[] = {
5289         "[reorder Grek Zzzz DIGIT Latn Hani]"
5290     };
5291 
5292     const int32_t apiRules[] = {
5293         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5294     };
5295 
5296     const static OneTestCase collationTestCases[] = {
5297         { {0x0391}, {0x0041}, UCOL_LESS},
5298         { {0x0031}, {0x0041}, UCOL_LESS},
5299         { {0x0041}, {0x4e00}, UCOL_LESS},
5300     };
5301 
5302     /* Test rules creation */
5303     doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
5304 
5305     /* Test collation reordering API */
5306     doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
5307 }
5308 
5309 /*
5310  * Test that covers issue reported in ticket 8814
5311  */
TestReorderWithNumericCollation(void)5312 static void TestReorderWithNumericCollation(void)
5313 {
5314     UErrorCode status = U_ZERO_ERROR;
5315     UCollator  *myCollation;
5316     UCollator  *myReorderCollation;
5317     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5318     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5319     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5320     UChar fortyS[] = { 0x0053 };
5321     UChar fortyThreeP[] = { 0x0050 };
5322     uint8_t fortyS_sortKey[128];
5323     int32_t fortyS_sortKey_Length;
5324     uint8_t fortyThreeP_sortKey[128];
5325     int32_t fortyThreeP_sortKey_Length;
5326     uint8_t fortyS_sortKey_reorder[128];
5327     int32_t fortyS_sortKey_reorder_Length;
5328     uint8_t fortyThreeP_sortKey_reorder[128];
5329     int32_t fortyThreeP_sortKey_reorder_Length;
5330     UCollationResult collResult;
5331     UCollationResult collResultReorder;
5332 
5333     log_verbose("Testing reordering with and without numeric collation\n");
5334 
5335     /* build collator tertiary with numeric */
5336     myCollation = ucol_open("", &status);
5337     /*
5338     ucol_setStrength(myCollation, UCOL_TERTIARY);
5339     */
5340     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5341     if(U_FAILURE(status)) {
5342         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5343         return;
5344     }
5345 
5346     /* build collator tertiary with numeric and reordering */
5347     myReorderCollation = ucol_open("", &status);
5348     /*
5349     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5350     */
5351     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5352     ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
5353     if(U_FAILURE(status)) {
5354         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5355         return;
5356     }
5357 
5358     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
5359     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
5360     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
5361     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5362 
5363     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5364         log_err_status(status, "ERROR: couldn't generate sort keys\n");
5365         return;
5366     }
5367     collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
5368     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
5369     /*
5370     fprintf(stderr, "\tcollResult = %x\n", collResult);
5371     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5372     fprintf(stderr, "\nfortyS\n");
5373     for (i = 0; i < fortyS_sortKey_Length; i++) {
5374         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5375     }
5376     fprintf(stderr, "\nfortyThreeP\n");
5377     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5378         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5379     }
5380     */
5381     if (collResult != collResultReorder) {
5382         log_err_status(status, "ERROR: collation results should have been the same.\n");
5383         return;
5384     }
5385 
5386     ucol_close(myCollation);
5387     ucol_close(myReorderCollation);
5388 }
5389 
compare_uint8_t_arrays(const uint8_t * a,const uint8_t * b)5390 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5391 {
5392   for (; *a == *b; ++a, ++b) {
5393     if (*a == 0) {
5394       return 0;
5395     }
5396   }
5397   return (*a < *b ? -1 : 1);
5398 }
5399 
TestImportRulesDeWithPhonebook(void)5400 static void TestImportRulesDeWithPhonebook(void)
5401 {
5402   const char* normalRules[] = {
5403     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5404     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5405     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5406   };
5407   const OneTestCase normalTests[] = {
5408     { {0x00e6}, {0x00c6}, UCOL_LESS},
5409     { {0x00fc}, {0x00dc}, UCOL_GREATER},
5410   };
5411 
5412   const char* importRules[] = {
5413     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5414     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5415     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5416   };
5417   const OneTestCase importTests[] = {
5418     { {0x00e6}, {0x00c6}, UCOL_LESS},
5419     { {0x00fc}, {0x00dc}, UCOL_LESS},
5420   };
5421 
5422   doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
5423   doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
5424 }
5425 
5426 #if 0
5427 static void TestImportRulesFiWithEor(void)
5428 {
5429   /* DUCET. */
5430   const char* defaultRules[] = {
5431     "&a<b",                                    /* Dummy rule. */
5432   };
5433 
5434   const OneTestCase defaultTests[] = {
5435     { {0x0110}, {0x00F0}, UCOL_LESS},
5436     { {0x00a3}, {0x00a5}, UCOL_LESS},
5437     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5438   };
5439 
5440   /* European Ordering rules: ignore currency characters. */
5441   const char* eorRules[] = {
5442     "[import root-u-co-eor]",
5443   };
5444 
5445   const OneTestCase eorTests[] = {
5446     { {0x0110}, {0x00F0}, UCOL_LESS},
5447     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5448     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5449   };
5450 
5451   const char* fiStdRules[] = {
5452     "[import fi-u-co-standard]",
5453   };
5454 
5455   const OneTestCase fiStdTests[] = {
5456     { {0x0110}, {0x00F0}, UCOL_GREATER},
5457     { {0x00a3}, {0x00a5}, UCOL_LESS},
5458     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5459   };
5460 
5461   /* Both European Ordering Rules and Fi Standard Rules. */
5462   const char* eorFiStdRules[] = {
5463     "[import root-u-co-eor][import fi-u-co-standard]",
5464   };
5465 
5466   /* This is essentially same as the one before once fi.txt is updated with import. */
5467   const char* fiEorRules[] = {
5468     "[import fi-u-co-eor]",
5469   };
5470 
5471   const OneTestCase fiEorTests[] = {
5472     { {0x0110}, {0x00F0}, UCOL_GREATER},
5473     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5474     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5475   };
5476 
5477   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
5478   doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
5479   doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
5480   doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
5481 
5482   log_knownIssue("8962", NULL);
5483   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5484         eor{
5485             Sequence{
5486                 "[import root-u-co-eor][import fi-u-co-standard]"
5487             }
5488             Version{"21.0"}
5489         }
5490   */
5491   /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
5492 
5493 }
5494 #endif
5495 
5496 #if 0
5497 /*
5498  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5499  * the resource files are built with -includeUnihanColl option.
5500  * TODO: Uncomment this function and make it work when unihan rules are built by default.
5501  */
5502 static void TestImportRulesCJKWithUnihan(void)
5503 {
5504   /* DUCET. */
5505   const char* defaultRules[] = {
5506     "&a<b",                                    /* Dummy rule. */
5507   };
5508 
5509   const OneTestCase defaultTests[] = {
5510     { {0x3402}, {0x4e1e}, UCOL_GREATER},
5511   };
5512 
5513   /* European Ordering rules: ignore currency characters. */
5514   const char* unihanRules[] = {
5515     "[import ko-u-co-unihan]",
5516   };
5517 
5518   const OneTestCase unihanTests[] = {
5519     { {0x3402}, {0x4e1e}, UCOL_LESS},
5520   };
5521 
5522   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
5523   doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
5524 
5525 }
5526 #endif
5527 
TestImport(void)5528 static void TestImport(void)
5529 {
5530     UCollator* vicoll;
5531     UCollator* escoll;
5532     UCollator* viescoll;
5533     UCollator* importviescoll;
5534     UParseError error;
5535     UErrorCode status = U_ZERO_ERROR;
5536     UChar* virules;
5537     int32_t viruleslength;
5538     UChar* esrules;
5539     int32_t esruleslength;
5540     UChar* viesrules;
5541     int32_t viesruleslength;
5542     char srules[500] = "[import vi][import es]";
5543     UChar rules[500];
5544     uint32_t length = 0;
5545     int32_t itemCount;
5546     int32_t i, k;
5547     UChar32 start;
5548     UChar32 end;
5549     UChar str[500];
5550     int32_t strLength;
5551 
5552     uint8_t sk1[500];
5553     uint8_t sk2[500];
5554 
5555     UBool b;
5556     USet* tailoredSet;
5557     USet* importTailoredSet;
5558 
5559 
5560     vicoll = ucol_open("vi", &status);
5561     if(U_FAILURE(status)){
5562         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5563         return;
5564     }
5565 
5566     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5567     if(viruleslength == 0) {
5568         log_data_err("missing vi tailoring rule string\n");
5569         ucol_close(vicoll);
5570         return;
5571     }
5572     escoll = ucol_open("es", &status);
5573     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5574     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5575     viesrules[0] = 0;
5576     u_strcat(viesrules, virules);
5577     u_strcat(viesrules, esrules);
5578     viesruleslength = viruleslength + esruleslength;
5579     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5580 
5581     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5582     length = u_unescape(srules, rules, 500);
5583     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5584     if(U_FAILURE(status)){
5585         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5586         return;
5587     }
5588 
5589     tailoredSet = ucol_getTailoredSet(viescoll, &status);
5590     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5591 
5592     if(!uset_equals(tailoredSet, importTailoredSet)){
5593         log_err("Tailored sets not equal");
5594     }
5595 
5596     uset_close(importTailoredSet);
5597 
5598     itemCount = uset_getItemCount(tailoredSet);
5599 
5600     for( i = 0; i < itemCount; i++){
5601         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5602         if(strLength < 2){
5603             for (; start <= end; start++){
5604                 k = 0;
5605                 U16_APPEND(str, k, 500, start, b);
5606                 (void)b;    /* Suppress set but not used warning. */
5607                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5608                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5609                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5610                     log_err("Sort key for %s not equal\n", str);
5611                     break;
5612                 }
5613             }
5614         }else{
5615             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5616             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5617             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5618                 log_err("ZZSort key for %s not equal\n", str);
5619                 break;
5620             }
5621 
5622         }
5623     }
5624 
5625     uset_close(tailoredSet);
5626 
5627     uprv_free(viesrules);
5628 
5629     ucol_close(vicoll);
5630     ucol_close(escoll);
5631     ucol_close(viescoll);
5632     ucol_close(importviescoll);
5633 }
5634 
TestImportWithType(void)5635 static void TestImportWithType(void)
5636 {
5637     UCollator* vicoll;
5638     UCollator* decoll;
5639     UCollator* videcoll;
5640     UCollator* importvidecoll;
5641     UParseError error;
5642     UErrorCode status = U_ZERO_ERROR;
5643     const UChar* virules;
5644     int32_t viruleslength;
5645     const UChar* derules;
5646     int32_t deruleslength;
5647     UChar* viderules;
5648     int32_t videruleslength;
5649     const char srules[500] = "[import vi][import de-u-co-phonebk]";
5650     UChar rules[500];
5651     uint32_t length = 0;
5652     int32_t itemCount;
5653     int32_t i, k;
5654     UChar32 start;
5655     UChar32 end;
5656     UChar str[500];
5657     int32_t strLength;
5658 
5659     uint8_t sk1[500];
5660     uint8_t sk2[500];
5661 
5662     USet* tailoredSet;
5663     USet* importTailoredSet;
5664 
5665     vicoll = ucol_open("vi", &status);
5666     if(U_FAILURE(status)){
5667         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5668         return;
5669     }
5670     virules = ucol_getRules(vicoll, &viruleslength);
5671     if(viruleslength == 0) {
5672         log_data_err("missing vi tailoring rule string\n");
5673         ucol_close(vicoll);
5674         return;
5675     }
5676     /* decoll = ucol_open("de@collation=phonebook", &status); */
5677     decoll = ucol_open("de-u-co-phonebk", &status);
5678     if(U_FAILURE(status)){
5679         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5680         return;
5681     }
5682 
5683 
5684     derules = ucol_getRules(decoll, &deruleslength);
5685     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5686     viderules[0] = 0;
5687     u_strcat(viderules, virules);
5688     u_strcat(viderules, derules);
5689     videruleslength = viruleslength + deruleslength;
5690     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5691 
5692     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5693     length = u_unescape(srules, rules, 500);
5694     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5695     if(U_FAILURE(status)){
5696         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5697         return;
5698     }
5699 
5700     tailoredSet = ucol_getTailoredSet(videcoll, &status);
5701     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5702 
5703     if(!uset_equals(tailoredSet, importTailoredSet)){
5704         log_err("Tailored sets not equal");
5705     }
5706 
5707     uset_close(importTailoredSet);
5708 
5709     itemCount = uset_getItemCount(tailoredSet);
5710 
5711     for( i = 0; i < itemCount; i++){
5712         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5713         if(strLength < 2){
5714             for (; start <= end; start++){
5715                 k = 0;
5716                 U16_APPEND_UNSAFE(str, k, start);
5717                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5718                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5719                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5720                     log_err("Sort key for %s not equal\n", str);
5721                     break;
5722                 }
5723             }
5724         }else{
5725             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5726             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5727             if(compare_uint8_t_arrays(sk1, sk2) != 0){
5728                 log_err("Sort key for %s not equal\n", str);
5729                 break;
5730             }
5731 
5732         }
5733     }
5734 
5735     uset_close(tailoredSet);
5736 
5737     uprv_free(viderules);
5738 
5739     ucol_close(videcoll);
5740     ucol_close(importvidecoll);
5741     ucol_close(vicoll);
5742     ucol_close(decoll);
5743 }
5744 
5745 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5746 static const UChar longUpperStr1[]= { /* 155 chars */
5747     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5748     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5749     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5750     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5751     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5752     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5753     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5754     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5755     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5756     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5757 };
5758 
5759 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5760 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5761     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5763     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5764     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5765     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5766 };
5767 
5768 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5769 static const UChar longUpperStr3[]= { /* 324 chars */
5770     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5780     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5781     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5782 };
5783 
5784 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
5785 
5786 typedef struct {
5787     const UChar * longUpperStrPtr;
5788     int32_t       longUpperStrLen;
5789 } LongUpperStrItem;
5790 
5791 /* String pointers must be in reverse collation order of the corresponding strings */
5792 static const LongUpperStrItem longUpperStrItems[] = {
5793     { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
5794     { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
5795     { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
5796     { NULL,          0                           }
5797 };
5798 
5799 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5800 
5801 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
TestCaseLevelBufferOverflow(void)5802 static void TestCaseLevelBufferOverflow(void)
5803 {
5804     UErrorCode status = U_ZERO_ERROR;
5805     UCollator * ucol = ucol_open("root", &status);
5806     if ( U_SUCCESS(status) ) {
5807         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5808         if ( U_SUCCESS(status) ) {
5809             const LongUpperStrItem * itemPtr;
5810             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5811             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5812                 int32_t sortKeyLen;
5813                 if (itemPtr > longUpperStrItems) {
5814                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5815                 }
5816                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5817                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5818                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5819                     break;
5820                 }
5821                 if ( itemPtr > longUpperStrItems ) {
5822                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5823                     if (compareResult >= 0) {
5824                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5825                     }
5826                 }
5827             }
5828         } else {
5829             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5830         }
5831         ucol_close(ucol);
5832     } else {
5833         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5834     }
5835 }
5836 
5837 /* Test for #10595 */
5838 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5839 #define KEY_PART_SIZE 16
5840 
TestNextSortKeyPartJaIdentical(void)5841 static void TestNextSortKeyPartJaIdentical(void)
5842 {
5843     UErrorCode status = U_ZERO_ERROR;
5844     UCollator *coll;
5845     uint8_t keyPart[KEY_PART_SIZE];
5846     UCharIterator iter;
5847     uint32_t state[2] = {0, 0};
5848     int32_t keyPartLen;
5849 
5850     coll = ucol_open("ja", &status);
5851     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5852     if (U_FAILURE(status)) {
5853         log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5854         return;
5855     }
5856 
5857     uiter_setString(&iter, testJapaneseName, 5);
5858     keyPartLen = KEY_PART_SIZE;
5859     while (keyPartLen == KEY_PART_SIZE) {
5860         keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5861         if (U_FAILURE(status)) {
5862             log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5863             break;
5864         }
5865     }
5866 
5867     ucol_close(coll);
5868 }
5869 
5870 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5871 
addMiscCollTest(TestNode ** root)5872 void addMiscCollTest(TestNode** root)
5873 {
5874     TEST(TestRuleOptions);
5875     TEST(TestBeforePrefixFailure);
5876     TEST(TestContractionClosure);
5877     TEST(TestPrefixCompose);
5878     TEST(TestStrCollIdenticalPrefix);
5879     TEST(TestPrefix);
5880     TEST(TestNewJapanese);
5881     /*TEST(TestLimitations);*/
5882     TEST(TestNonChars);
5883     TEST(TestExtremeCompression);
5884     TEST(TestSurrogates);
5885     TEST(TestVariableTopSetting);
5886     TEST(TestMaxVariable);
5887     TEST(TestBocsuCoverage);
5888     TEST(TestCyrillicTailoring);
5889     TEST(TestCase);
5890     TEST(IncompleteCntTest);
5891     TEST(BlackBirdTest);
5892     TEST(FunkyATest);
5893     TEST(BillFairmanTest);
5894     TEST(TestChMove);
5895     TEST(TestImplicitTailoring);
5896     TEST(TestFCDProblem);
5897     TEST(TestEmptyRule);
5898     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5899     TEST(TestJ815);
5900     /*TEST(TestJ831);*/ /* we changed lv locale */
5901     TEST(TestBefore);
5902     TEST(TestHangulTailoring);
5903     TEST(TestUCARules);
5904     TEST(TestIncrementalNormalize);
5905     TEST(TestComposeDecompose);
5906     TEST(TestCompressOverlap);
5907     TEST(TestContraction);
5908     TEST(TestExpansion);
5909     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5910     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5911     TEST(TestOptimize);
5912     TEST(TestSuppressContractions);
5913     TEST(Alexis2);
5914     TEST(TestHebrewUCA);
5915     TEST(TestPartialSortKeyTermination);
5916     TEST(TestSettings);
5917     TEST(TestEquals);
5918     TEST(TestJ2726);
5919     TEST(NullRule);
5920     TEST(TestNumericCollation);
5921     TEST(TestTibetanConformance);
5922     TEST(TestPinyinProblem);
5923     TEST(TestSeparateTrees);
5924     TEST(TestBeforePinyin);
5925     TEST(TestBeforeTightening);
5926     /*TEST(TestMoreBefore);*/
5927     TEST(TestTailorNULL);
5928     TEST(TestUpperFirstQuaternary);
5929     TEST(TestJ4960);
5930     TEST(TestJ5223);
5931     TEST(TestJ5232);
5932     TEST(TestJ5367);
5933     TEST(TestHiragana);
5934     TEST(TestSortKeyConsistency);
5935     TEST(TestVI5913);  /* VI, RO tailored rules */
5936     TEST(TestCroatianSortKey);
5937     TEST(TestTailor6179);
5938     TEST(TestUCAPrecontext);
5939     TEST(TestOutOfBuffer5468);
5940     TEST(TestSameStrengthList);
5941 
5942     TEST(TestSameStrengthListQuoted);
5943     TEST(TestSameStrengthListSupplemental);
5944     TEST(TestSameStrengthListQwerty);
5945     TEST(TestSameStrengthListQuotedQwerty);
5946     TEST(TestSameStrengthListRanges);
5947     TEST(TestSameStrengthListSupplementalRanges);
5948     TEST(TestSpecialCharacters);
5949     TEST(TestPrivateUseCharacters);
5950     TEST(TestPrivateUseCharactersInList);
5951     TEST(TestPrivateUseCharactersInRange);
5952     TEST(TestInvalidListsAndRanges);
5953     TEST(TestImportRulesDeWithPhonebook);
5954     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5955     /* TEST(TestImportRulesCJKWithUnihan); */
5956     TEST(TestImport);
5957     TEST(TestImportWithType);
5958 
5959     TEST(TestBeforeRuleWithScriptReordering);
5960     TEST(TestNonLeadBytesDuringCollationReordering);
5961     TEST(TestReorderingAPI);
5962     TEST(TestReorderingAPIWithRuleCreatedCollator);
5963     TEST(TestEquivalentReorderingScripts);
5964     TEST(TestGreekFirstReorder);
5965     TEST(TestGreekLastReorder);
5966     TEST(TestNonScriptReorder);
5967     TEST(TestHaniReorder);
5968     TEST(TestHaniReorderWithOtherRules);
5969     TEST(TestMultipleReorder);
5970     TEST(TestReorderingAcrossCloning);
5971     TEST(TestReorderWithNumericCollation);
5972 
5973     TEST(TestCaseLevelBufferOverflow);
5974     TEST(TestNextSortKeyPartJaIdentical);
5975 }
5976 
5977 #endif /* #if !UCONFIG_NO_COLLATION */
5978