• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /********************************************************************
3  * COPYRIGHT:
4  * Copyright (c) 2001-2010, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  ********************************************************************/
7 /*******************************************************************************
8 *
9 * File cmsccoll.C
10 *
11 *******************************************************************************/
12 /**
13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
14  * to fit.
15  */
16 
17 #include <stdio.h>
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_COLLATION
22 
23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h"
26 #include "cintltst.h"
27 #include "ccolltst.h"
28 #include "callcoll.h"
29 #include "unicode/ustring.h"
30 #include "string.h"
31 #include "ucol_imp.h"
32 #include "ucol_tok.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "uparse.h"
41 #include "putilimp.h"
42 
43 
44 #define LEN(a) (sizeof(a)/sizeof(a[0]))
45 
46 #define MAX_TOKEN_LEN 16
47 
48 typedef UCollationResult tst_strcoll(void *collator, const int object,
49                         const UChar *source, const int sLen,
50                         const UChar *target, const int tLen);
51 
52 
53 
54 const static char cnt1[][10] = {
55 
56   "AA",
57   "AC",
58   "AZ",
59   "AQ",
60   "AB",
61   "ABZ",
62   "ABQ",
63   "Z",
64   "ABC",
65   "Q",
66   "B"
67 };
68 
69 const static char cnt2[][10] = {
70   "DA",
71   "DAD",
72   "DAZ",
73   "MAR",
74   "Z",
75   "DAVIS",
76   "MARK",
77   "DAV",
78   "DAVI"
79 };
80 
IncompleteCntTest(void)81 static void IncompleteCntTest(void)
82 {
83   UErrorCode status = U_ZERO_ERROR;
84   UChar temp[90];
85   UChar t1[90];
86   UChar t2[90];
87 
88   UCollator *coll =  NULL;
89   uint32_t i = 0, j = 0;
90   uint32_t size = 0;
91 
92   u_uastrcpy(temp, " & Z < ABC < Q < B");
93 
94   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
95 
96   if(U_SUCCESS(status)) {
97     size = sizeof(cnt1)/sizeof(cnt1[0]);
98     for(i = 0; i < size-1; i++) {
99       for(j = i+1; j < size; j++) {
100         UCollationElements *iter;
101         u_uastrcpy(t1, cnt1[i]);
102         u_uastrcpy(t2, cnt1[j]);
103         doTest(coll, t1, t2, UCOL_LESS);
104         /* synwee : added collation element iterator test */
105         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
106         if (U_FAILURE(status)) {
107           log_err("Creation of iterator failed\n");
108           break;
109         }
110         backAndForth(iter);
111         ucol_closeElements(iter);
112       }
113     }
114   }
115 
116   ucol_close(coll);
117 
118 
119   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
120   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
121 
122   if(U_SUCCESS(status)) {
123     size = sizeof(cnt2)/sizeof(cnt2[0]);
124     for(i = 0; i < size-1; i++) {
125       for(j = i+1; j < size; j++) {
126         UCollationElements *iter;
127         u_uastrcpy(t1, cnt2[i]);
128         u_uastrcpy(t2, cnt2[j]);
129         doTest(coll, t1, t2, UCOL_LESS);
130 
131         /* synwee : added collation element iterator test */
132         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
133         if (U_FAILURE(status)) {
134           log_err("Creation of iterator failed\n");
135           break;
136         }
137         backAndForth(iter);
138         ucol_closeElements(iter);
139       }
140     }
141   }
142 
143   ucol_close(coll);
144 
145 
146 }
147 
148 const static char shifted[][20] = {
149   "black bird",
150   "black-bird",
151   "blackbird",
152   "black Bird",
153   "black-Bird",
154   "blackBird",
155   "black birds",
156   "black-birds",
157   "blackbirds"
158 };
159 
160 const static UCollationResult shiftedTert[] = {
161   UCOL_EQUAL,
162   UCOL_EQUAL,
163   UCOL_EQUAL,
164   UCOL_LESS,
165   UCOL_EQUAL,
166   UCOL_EQUAL,
167   UCOL_LESS,
168   UCOL_EQUAL,
169   UCOL_EQUAL
170 };
171 
172 const static char nonignorable[][20] = {
173   "black bird",
174   "black Bird",
175   "black birds",
176   "black-bird",
177   "black-Bird",
178   "black-birds",
179   "blackbird",
180   "blackBird",
181   "blackbirds"
182 };
183 
BlackBirdTest(void)184 static void BlackBirdTest(void) {
185   UErrorCode status = U_ZERO_ERROR;
186   UChar t1[90];
187   UChar t2[90];
188 
189   uint32_t i = 0, j = 0;
190   uint32_t size = 0;
191   UCollator *coll = ucol_open("en_US", &status);
192 
193   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
194   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
195 
196   if(U_SUCCESS(status)) {
197     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
198     for(i = 0; i < size-1; i++) {
199       for(j = i+1; j < size; j++) {
200         u_uastrcpy(t1, nonignorable[i]);
201         u_uastrcpy(t2, nonignorable[j]);
202         doTest(coll, t1, t2, UCOL_LESS);
203       }
204     }
205   }
206 
207   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
208   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
209 
210   if(U_SUCCESS(status)) {
211     size = sizeof(shifted)/sizeof(shifted[0]);
212     for(i = 0; i < size-1; i++) {
213       for(j = i+1; j < size; j++) {
214         u_uastrcpy(t1, shifted[i]);
215         u_uastrcpy(t2, shifted[j]);
216         doTest(coll, t1, t2, UCOL_LESS);
217       }
218     }
219   }
220 
221   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
222   if(U_SUCCESS(status)) {
223     size = sizeof(shifted)/sizeof(shifted[0]);
224     for(i = 1; i < size; i++) {
225       u_uastrcpy(t1, shifted[i-1]);
226       u_uastrcpy(t2, shifted[i]);
227       doTest(coll, t1, t2, shiftedTert[i]);
228     }
229   }
230 
231   ucol_close(coll);
232 }
233 
234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
235     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
236     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
237     {0x0041/*'A'*/, 0x0300, 0x0000},
238     {0x00C0, 0x0301, 0x0000},
239     /* this would work with forced normalization */
240     {0x00C0, 0x0316, 0x0000}
241 };
242 
243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
244     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
245     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
246     {0x00C0, 0},
247     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
248     /* this would work with forced normalization */
249     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
250 };
251 
252 const static UCollationResult results[] = {
253     UCOL_GREATER,
254     UCOL_EQUAL,
255     UCOL_EQUAL,
256     UCOL_GREATER,
257     UCOL_EQUAL
258 };
259 
FunkyATest(void)260 static void FunkyATest(void)
261 {
262 
263     int32_t i;
264     UErrorCode status = U_ZERO_ERROR;
265     UCollator  *myCollation;
266     myCollation = ucol_open("en_US", &status);
267     if(U_FAILURE(status)){
268         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
269         return;
270     }
271     log_verbose("Testing some A letters, for some reason\n");
272     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
273     ucol_setStrength(myCollation, UCOL_TERTIARY);
274     for (i = 0; i < 4 ; i++)
275     {
276         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
277     }
278     ucol_close(myCollation);
279 }
280 
281 UColAttributeValue caseFirst[] = {
282     UCOL_OFF,
283     UCOL_LOWER_FIRST,
284     UCOL_UPPER_FIRST
285 };
286 
287 
288 UColAttributeValue alternateHandling[] = {
289     UCOL_NON_IGNORABLE,
290     UCOL_SHIFTED
291 };
292 
293 UColAttributeValue caseLevel[] = {
294     UCOL_OFF,
295     UCOL_ON
296 };
297 
298 UColAttributeValue strengths[] = {
299     UCOL_PRIMARY,
300     UCOL_SECONDARY,
301     UCOL_TERTIARY,
302     UCOL_QUATERNARY,
303     UCOL_IDENTICAL
304 };
305 
306 #if 0
307 static const char * strengthsC[] = {
308     "UCOL_PRIMARY",
309     "UCOL_SECONDARY",
310     "UCOL_TERTIARY",
311     "UCOL_QUATERNARY",
312     "UCOL_IDENTICAL"
313 };
314 
315 static const char * caseFirstC[] = {
316     "UCOL_OFF",
317     "UCOL_LOWER_FIRST",
318     "UCOL_UPPER_FIRST"
319 };
320 
321 
322 static const char * alternateHandlingC[] = {
323     "UCOL_NON_IGNORABLE",
324     "UCOL_SHIFTED"
325 };
326 
327 static const char * caseLevelC[] = {
328     "UCOL_OFF",
329     "UCOL_ON"
330 };
331 
332 /* not used currently - does not test only prints */
333 static void PrintMarkDavis(void)
334 {
335   UErrorCode status = U_ZERO_ERROR;
336   UChar m[256];
337   uint8_t sortkey[256];
338   UCollator *coll = ucol_open("en_US", &status);
339   uint32_t h,i,j,k, sortkeysize;
340   uint32_t sizem = 0;
341   char buffer[512];
342   uint32_t len = 512;
343 
344   log_verbose("PrintMarkDavis");
345 
346   u_uastrcpy(m, "Mark Davis");
347   sizem = u_strlen(m);
348 
349 
350   m[1] = 0xe4;
351 
352   for(i = 0; i<sizem; i++) {
353     fprintf(stderr, "\\u%04X ", m[i]);
354   }
355   fprintf(stderr, "\n");
356 
357   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
358     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
359     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
360 
361     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
362       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
363       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
364 
365       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
366         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
367         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
368 
369         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
370           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
371           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
372           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
373           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
374         }
375 
376       }
377 
378     }
379 
380   }
381 }
382 #endif
383 
BillFairmanTest(void)384 static void BillFairmanTest(void) {
385 /*
386 ** check for actual locale via ICU resource bundles
387 **
388 ** lp points to the original locale ("fr_FR_....")
389 */
390 
391     UResourceBundle *lr,*cr;
392     UErrorCode              lec = U_ZERO_ERROR;
393     const char *lp = "fr_FR_you_ll_never_find_this_locale";
394 
395     log_verbose("BillFairmanTest\n");
396 
397     lr = ures_open(NULL,lp,&lec);
398     if (lr) {
399         cr = ures_getByKey(lr,"collations",0,&lec);
400         if (cr) {
401             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
402             if (lp) {
403                 if (U_SUCCESS(lec)) {
404                     if(strcmp(lp, "fr") != 0) {
405                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
406                     }
407                 }
408             }
409             ures_close(cr);
410         }
411         ures_close(lr);
412     }
413 }
414 
testPrimary(UCollator * col,const UChar * p,const UChar * q)415 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
416     UChar source[256] = { '\0'};
417     UChar target[256] = { '\0'};
418     UChar preP = 0x31a3;
419     UChar preQ = 0x310d;
420 /*
421     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
422     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
423 */
424     /*log_verbose("Testing primary\n");*/
425 
426     doTest(col, p, q, UCOL_LESS);
427 /*
428     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
429 
430     if(result!=UCOL_LESS){
431        aescstrdup(p,utfSource,256);
432        aescstrdup(q,utfTarget,256);
433        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
434     }
435 */
436     source[0] = preP;
437     u_strcpy(source+1,p);
438     target[0] = preQ;
439     u_strcpy(target+1,q);
440     doTest(col, source, target, UCOL_LESS);
441 /*
442     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
443 */
444 }
445 
testSecondary(UCollator * col,const UChar * p,const UChar * q)446 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
447     UChar source[256] = { '\0'};
448     UChar target[256] = { '\0'};
449 
450     /*log_verbose("Testing secondary\n");*/
451 
452     doTest(col, p, q, UCOL_LESS);
453 /*
454     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
455 */
456     source[0] = 0x0053;
457     u_strcpy(source+1,p);
458     target[0]= 0x0073;
459     u_strcpy(target+1,q);
460 
461     doTest(col, source, target, UCOL_LESS);
462 /*
463     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
464 */
465 
466 
467     u_strcpy(source,p);
468     source[u_strlen(p)] = 0x62;
469     source[u_strlen(p)+1] = 0;
470 
471 
472     u_strcpy(target,q);
473     target[u_strlen(q)] = 0x61;
474     target[u_strlen(q)+1] = 0;
475 
476     doTest(col, source, target, UCOL_GREATER);
477 
478 /*
479     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
480 */
481 }
482 
testTertiary(UCollator * col,const UChar * p,const UChar * q)483 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
484     UChar source[256] = { '\0'};
485     UChar target[256] = { '\0'};
486 
487     /*log_verbose("Testing tertiary\n");*/
488 
489     doTest(col, p, q, UCOL_LESS);
490 /*
491     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
492 */
493     source[0] = 0x0020;
494     u_strcpy(source+1,p);
495     target[0]= 0x002D;
496     u_strcpy(target+1,q);
497 
498     doTest(col, source, target, UCOL_LESS);
499 /*
500     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
501 */
502 
503     u_strcpy(source,p);
504     source[u_strlen(p)] = 0xE0;
505     source[u_strlen(p)+1] = 0;
506 
507     u_strcpy(target,q);
508     target[u_strlen(q)] = 0x61;
509     target[u_strlen(q)+1] = 0;
510 
511     doTest(col, source, target, UCOL_GREATER);
512 
513 /*
514     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
515 */
516 }
517 
testEquality(UCollator * col,const UChar * p,const UChar * q)518 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
519 /*
520     UChar source[256] = { '\0'};
521     UChar target[256] = { '\0'};
522 */
523 
524     doTest(col, p, q, UCOL_EQUAL);
525 /*
526     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
527 */
528 }
529 
testCollator(UCollator * coll,UErrorCode * status)530 static void testCollator(UCollator *coll, UErrorCode *status) {
531   const UChar *rules = NULL, *current = NULL;
532   int32_t ruleLen = 0;
533   uint32_t strength = 0;
534   uint32_t chOffset = 0; uint32_t chLen = 0;
535   uint32_t exOffset = 0; uint32_t exLen = 0;
536   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
537   uint32_t firstEx = 0;
538 /*  uint32_t rExpsLen = 0; */
539   uint32_t firstLen = 0;
540   UBool varT = FALSE; UBool top_ = TRUE;
541   uint16_t specs = 0;
542   UBool startOfRules = TRUE;
543   UBool lastReset = FALSE;
544   UBool before = FALSE;
545   uint32_t beforeStrength = 0;
546   UColTokenParser src;
547   UColOptionSet opts;
548 
549   UChar first[256];
550   UChar second[256];
551   UChar tempB[256];
552   uint32_t tempLen;
553   UChar *rulesCopy = NULL;
554   UParseError parseError;
555 
556   uprv_memset(&src, 0, sizeof(UColTokenParser));
557 
558   src.opts = &opts;
559 
560   rules = ucol_getRules(coll, &ruleLen);
561   if(U_SUCCESS(*status) && ruleLen > 0) {
562     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
563     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
564     src.current = src.source = rulesCopy;
565     src.end = rulesCopy+ruleLen;
566     src.extraCurrent = src.end;
567     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
568     *first = *second = 0;
569 
570 	/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
571 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
572     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
573       strength = src.parsedToken.strength;
574       chOffset = src.parsedToken.charsOffset;
575       chLen = src.parsedToken.charsLen;
576       exOffset = src.parsedToken.extensionOffset;
577       exLen = src.parsedToken.extensionLen;
578       prefixOffset = src.parsedToken.prefixOffset;
579       prefixLen = src.parsedToken.prefixLen;
580       specs = src.parsedToken.flags;
581 
582       startOfRules = FALSE;
583       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
584       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
585       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
586         second[0] = 0;
587       } else {
588         u_strncpy(second,src.source+chOffset, chLen);
589         second[chLen] = 0;
590 
591         if(exLen > 0 && firstEx == 0) {
592           u_strncat(first, src.source+exOffset, exLen);
593           first[firstLen+exLen] = 0;
594         }
595 
596         if(lastReset == TRUE && prefixLen != 0) {
597           u_strncpy(first+prefixLen, first, firstLen);
598           u_strncpy(first, src.source+prefixOffset, prefixLen);
599           first[firstLen+prefixLen] = 0;
600           firstLen = firstLen+prefixLen;
601         }
602 
603         if(before == TRUE) { /* swap first and second */
604           u_strcpy(tempB, first);
605           u_strcpy(first, second);
606           u_strcpy(second, tempB);
607 
608           tempLen = firstLen;
609           firstLen = chLen;
610           chLen = tempLen;
611 
612           tempLen = firstEx;
613           firstEx = exLen;
614           exLen = tempLen;
615           if(beforeStrength < strength) {
616             strength = beforeStrength;
617           }
618         }
619       }
620       lastReset = FALSE;
621 
622       switch(strength){
623       case UCOL_IDENTICAL:
624           testEquality(coll,first,second);
625           break;
626       case UCOL_PRIMARY:
627           testPrimary(coll,first,second);
628           break;
629       case UCOL_SECONDARY:
630           testSecondary(coll,first,second);
631           break;
632       case UCOL_TERTIARY:
633           testTertiary(coll,first,second);
634           break;
635       case UCOL_TOK_RESET:
636         lastReset = TRUE;
637         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
638         if(before) {
639           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
640         }
641         break;
642       default:
643           break;
644       }
645 
646       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
647         before = FALSE;
648       } else {
649         firstLen = chLen;
650         firstEx = exLen;
651         u_strcpy(first, second);
652       }
653     }
654     uprv_free(src.source);
655   }
656 }
657 
ucaTest(void * collator,const int object,const UChar * source,const int sLen,const UChar * target,const int tLen)658 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
659   UCollator *UCA = (UCollator *)collator;
660   return ucol_strcoll(UCA, source, sLen, target, tLen);
661 }
662 
663 /*
664 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
665 #ifdef U_WINDOWS
666   LCID lcid = (LCID)collator;
667   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
668 #else
669   return 0;
670 #endif
671 }
672 */
673 
swampEarlier(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)674 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
675                                      UChar s1, UChar s2,
676                                      const UChar *s, const uint32_t sLen,
677                                      const UChar *t, const uint32_t tLen) {
678   UChar source[256] = {0};
679   UChar target[256] = {0};
680 
681   source[0] = s1;
682   u_strcpy(source+1, s);
683   target[0] = s2;
684   u_strcpy(target+1, t);
685 
686   return func(collator, opts, source, sLen+1, target, tLen+1);
687 }
688 
swampLater(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)689 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
690                                    UChar s1, UChar s2,
691                                    const UChar *s, const uint32_t sLen,
692                                    const UChar *t, const uint32_t tLen) {
693   UChar source[256] = {0};
694   UChar target[256] = {0};
695 
696   u_strcpy(source, s);
697   source[sLen] = s1;
698   u_strcpy(target, t);
699   target[tLen] = s2;
700 
701   return func(collator, opts, source, sLen+1, target, tLen+1);
702 }
703 
probeStrength(tst_strcoll * func,void * collator,int opts,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen,UCollationResult result)704 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
705                               const UChar *s, const uint32_t sLen,
706                               const UChar *t, const uint32_t tLen,
707                               UCollationResult result) {
708   /*UChar fPrimary = 0x6d;*/
709   /*UChar sPrimary = 0x6e;*/
710   UChar fSecondary = 0x310d;
711   UChar sSecondary = 0x31a3;
712   UChar fTertiary = 0x310f;
713   UChar sTertiary = 0x31b7;
714 
715   UCollationResult oposite;
716   if(result == UCOL_EQUAL) {
717     return UCOL_IDENTICAL;
718   } else if(result == UCOL_GREATER) {
719     oposite = UCOL_LESS;
720   } else {
721     oposite = UCOL_GREATER;
722   }
723 
724   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
725     return UCOL_PRIMARY;
726   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
727     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
728     return UCOL_SECONDARY;
729   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
730     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
731     return UCOL_TERTIARY;
732   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
733     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
734     return UCOL_QUATERNARY;
735   } else {
736     return UCOL_IDENTICAL;
737   }
738 }
739 
getRelationSymbol(UCollationResult res,uint32_t strength,char * buffer)740 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
741   uint32_t i = 0;
742 
743   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
744     buffer[0] = '=';
745     buffer[1] = '=';
746     buffer[2] = '\0';
747   } else if(res == UCOL_GREATER) {
748     for(i = 0; i<strength+1; i++) {
749       buffer[i] = '>';
750     }
751     buffer[strength+1] = '\0';
752   } else {
753     for(i = 0; i<strength+1; i++) {
754       buffer[i] = '<';
755     }
756     buffer[strength+1] = '\0';
757   }
758 
759   return buffer;
760 }
761 
762 
763 
logFailure(const char * platform,const char * test,const UChar * source,const uint32_t sLen,const UChar * target,const uint32_t tLen,UCollationResult realRes,uint32_t realStrength,UCollationResult expRes,uint32_t expStrength,UBool error)764 static void logFailure (const char *platform, const char *test,
765                         const UChar *source, const uint32_t sLen,
766                         const UChar *target, const uint32_t tLen,
767                         UCollationResult realRes, uint32_t realStrength,
768                         UCollationResult expRes, uint32_t expStrength, UBool error) {
769 
770   uint32_t i = 0;
771 
772   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
773   static int32_t maxOutputLength = 0;
774   int32_t outputLength;
775 
776   *sEsc = *tEsc = *s = *t = 0;
777   if(error == TRUE) {
778     log_err("Difference between expected and generated order. Run test with -v for more info\n");
779   } else if(getTestOption(VERBOSITY_OPTION) == 0) {
780     return;
781   }
782   for(i = 0; i<sLen; i++) {
783     sprintf(b, "%04X", source[i]);
784     strcat(sEsc, "\\u");
785     strcat(sEsc, b);
786     strcat(s, b);
787     strcat(s, " ");
788     if(source[i] < 0x80) {
789       sprintf(b, "(%c)", source[i]);
790       strcat(sEsc, b);
791     }
792   }
793   for(i = 0; i<tLen; i++) {
794     sprintf(b, "%04X", target[i]);
795     strcat(tEsc, "\\u");
796     strcat(tEsc, b);
797     strcat(t, b);
798     strcat(t, " ");
799     if(target[i] < 0x80) {
800       sprintf(b, "(%c)", target[i]);
801       strcat(tEsc, b);
802     }
803   }
804 /*
805   strcpy(output, "[[ ");
806   strcat(output, sEsc);
807   strcat(output, getRelationSymbol(expRes, expStrength, relation));
808   strcat(output, tEsc);
809 
810   strcat(output, " : ");
811 
812   strcat(output, sEsc);
813   strcat(output, getRelationSymbol(realRes, realStrength, relation));
814   strcat(output, tEsc);
815   strcat(output, " ]] ");
816 
817   log_verbose("%s", output);
818 */
819 
820 
821   strcpy(output, "DIFF: ");
822 
823   strcat(output, s);
824   strcat(output, " : ");
825   strcat(output, t);
826 
827   strcat(output, test);
828   strcat(output, ": ");
829 
830   strcat(output, sEsc);
831   strcat(output, getRelationSymbol(expRes, expStrength, relation));
832   strcat(output, tEsc);
833 
834   strcat(output, " ");
835 
836   strcat(output, platform);
837   strcat(output, ": ");
838 
839   strcat(output, sEsc);
840   strcat(output, getRelationSymbol(realRes, realStrength, relation));
841   strcat(output, tEsc);
842 
843   outputLength = (int32_t)strlen(output);
844   if(outputLength > maxOutputLength) {
845     maxOutputLength = outputLength;
846     U_ASSERT(outputLength < sizeof(output));
847   }
848 
849   log_verbose("%s\n", output);
850 
851 }
852 
853 /*
854 static void printOutRules(const UChar *rules) {
855   uint32_t len = u_strlen(rules);
856   uint32_t i = 0;
857   char toPrint;
858   uint32_t line = 0;
859 
860   fprintf(stdout, "Rules:");
861 
862   for(i = 0; i<len; i++) {
863     if(rules[i]<0x7f && rules[i]>=0x20) {
864       toPrint = (char)rules[i];
865       if(toPrint == '&') {
866         line = 1;
867         fprintf(stdout, "\n&");
868       } else if(toPrint == ';') {
869         fprintf(stdout, "<<");
870         line+=2;
871       } else if(toPrint == ',') {
872         fprintf(stdout, "<<<");
873         line+=3;
874       } else {
875         fprintf(stdout, "%c", toPrint);
876         line++;
877       }
878     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
879       fprintf(stdout, "\\u%04X", rules[i]);
880       line+=6;
881     }
882     if(line>72) {
883       fprintf(stdout, "\n");
884       line = 0;
885     }
886   }
887 
888   log_verbose("\n");
889 
890 }
891 */
892 
testSwitch(tst_strcoll * func,void * collator,int opts,uint32_t strength,const UChar * first,const UChar * second,const char * msg,UBool error)893 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
894   uint32_t diffs = 0;
895   UCollationResult realResult;
896   uint32_t realStrength;
897 
898   uint32_t sLen = u_strlen(first);
899   uint32_t tLen = u_strlen(second);
900 
901   realResult = func(collator, opts, first, sLen, second, tLen);
902   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
903 
904   if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
905     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
906     diffs++;
907   } else if(realResult != UCOL_LESS || realStrength != strength) {
908     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
909     diffs++;
910   }
911   return diffs;
912 }
913 
914 
testAgainstUCA(UCollator * coll,UCollator * UCA,const char * refName,UBool error,UErrorCode * status)915 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
916   const UChar *rules = NULL, *current = NULL;
917   int32_t ruleLen = 0;
918   uint32_t strength = 0;
919   uint32_t chOffset = 0; uint32_t chLen = 0;
920   uint32_t exOffset = 0; uint32_t exLen = 0;
921   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
922 /*  uint32_t rExpsLen = 0; */
923   uint32_t firstLen = 0, secondLen = 0;
924   UBool varT = FALSE; UBool top_ = TRUE;
925   uint16_t specs = 0;
926   UBool startOfRules = TRUE;
927   UColTokenParser src;
928   UColOptionSet opts;
929 
930   UChar first[256];
931   UChar second[256];
932   UChar *rulesCopy = NULL;
933 
934   uint32_t UCAdiff = 0;
935   uint32_t Windiff = 1;
936   UParseError parseError;
937 
938   uprv_memset(&src, 0, sizeof(UColTokenParser));
939   src.opts = &opts;
940 
941   rules = ucol_getRules(coll, &ruleLen);
942 
943   /*printOutRules(rules);*/
944 
945   if(U_SUCCESS(*status) && ruleLen > 0) {
946     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
947     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
948     src.current = src.source = rulesCopy;
949     src.end = rulesCopy+ruleLen;
950     src.extraCurrent = src.end;
951     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
952     *first = *second = 0;
953 
954     /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
955        the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
956     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
957       strength = src.parsedToken.strength;
958       chOffset = src.parsedToken.charsOffset;
959       chLen = src.parsedToken.charsLen;
960       exOffset = src.parsedToken.extensionOffset;
961       exLen = src.parsedToken.extensionLen;
962       prefixOffset = src.parsedToken.prefixOffset;
963       prefixLen = src.parsedToken.prefixLen;
964       specs = src.parsedToken.flags;
965 
966       startOfRules = FALSE;
967       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
968       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
969 
970       u_strncpy(second,src.source+chOffset, chLen);
971       second[chLen] = 0;
972       secondLen = chLen;
973 
974       if(exLen > 0) {
975         u_strncat(first, src.source+exOffset, exLen);
976         first[firstLen+exLen] = 0;
977         firstLen += exLen;
978       }
979 
980       if(strength != UCOL_TOK_RESET) {
981         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
982           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
983           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
984         }
985       }
986 
987 
988       firstLen = chLen;
989       u_strcpy(first, second);
990 
991     }
992     if(UCAdiff != 0 && Windiff != 0) {
993       log_verbose("\n");
994     }
995     if(UCAdiff == 0) {
996       log_verbose("No immediate difference with %s!\n", refName);
997     }
998     if(Windiff == 0) {
999       log_verbose("No immediate difference with Win32!\n");
1000     }
1001     uprv_free(src.source);
1002   }
1003 }
1004 
1005 /*
1006  * Takes two CEs (lead and continuation) and
1007  * compares them as CEs should be compared:
1008  * primary vs. primary, secondary vs. secondary
1009  * tertiary vs. tertiary
1010  */
compareCEs(uint32_t s1,uint32_t s2,uint32_t t1,uint32_t t2)1011 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1012                    uint32_t t1, uint32_t t2) {
1013   uint32_t s = 0, t = 0;
1014   if(s1 == t1 && s2 == t2) {
1015     return 0;
1016   }
1017   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1018   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1019   if(s < t) {
1020     return -1;
1021   } else if(s > t) {
1022     return 1;
1023   } else {
1024     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1025     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1026     if(s < t) {
1027       return -1;
1028     } else if(s > t) {
1029       return 1;
1030     } else {
1031       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1032       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1033       if(s < t) {
1034         return -1;
1035       } else {
1036         return 1;
1037       }
1038     }
1039   }
1040 }
1041 
1042 typedef struct {
1043   uint32_t startCE;
1044   uint32_t startContCE;
1045   uint32_t limitCE;
1046   uint32_t limitContCE;
1047 } indirectBoundaries;
1048 
1049 /* these values are used for finding CE values for indirect positioning. */
1050 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
1051 /* values. It only works for resets and you cannot tailor indirect names */
1052 /* An indirect name can define either an anchor point or a range. An     */
1053 /* anchor point behaves in exactly the same way as a code point in reset */
1054 /* would, except that it cannot be tailored. A range (we currently only  */
1055 /* know for the [top] range will explicitly set the upper bound for      */
1056 /* generated CEs, thus allowing for better control over how many CEs can */
1057 /* be squeezed between in the range without performance penalty.         */
1058 /* In that respect, we use [top] for tailoring of locales that use CJK   */
1059 /* characters. Other indirect values are currently a pure convenience,   */
1060 /* they can be used to assure that the CEs will be always positioned in  */
1061 /* the same place relative to a point with known properties (e.g. first  */
1062 /* primary ignorable). */
1063 static indirectBoundaries ucolIndirectBoundaries[15];
1064 static UBool indirectBoundariesSet = FALSE;
setIndirectBoundaries(uint32_t indexR,uint32_t * start,uint32_t * end)1065 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1066     /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1067     /* to initalize here. */
1068     ucolIndirectBoundaries[indexR].startCE = start[0];
1069     ucolIndirectBoundaries[indexR].startContCE = start[1];
1070     if(end) {
1071         ucolIndirectBoundaries[indexR].limitCE = end[0];
1072         ucolIndirectBoundaries[indexR].limitContCE = end[1];
1073     } else {
1074         ucolIndirectBoundaries[indexR].limitCE = 0;
1075         ucolIndirectBoundaries[indexR].limitContCE = 0;
1076     }
1077 }
1078 
testCEs(UCollator * coll,UErrorCode * status)1079 static void testCEs(UCollator *coll, UErrorCode *status) {
1080     const UChar *rules = NULL, *current = NULL;
1081     int32_t ruleLen = 0;
1082 
1083     uint32_t strength = 0;
1084     uint32_t maxStrength = UCOL_IDENTICAL;
1085     uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1086     uint32_t lastCE;
1087     uint32_t lastContCE;
1088 
1089     int32_t result = 0;
1090     uint32_t chOffset = 0; uint32_t chLen = 0;
1091     uint32_t exOffset = 0; uint32_t exLen = 0;
1092     uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1093     uint32_t oldOffset = 0;
1094 
1095     /* uint32_t rExpsLen = 0; */
1096     /* uint32_t firstLen = 0; */
1097     uint16_t specs = 0;
1098     UBool varT = FALSE; UBool top_ = TRUE;
1099     UBool startOfRules = TRUE;
1100     UBool before = FALSE;
1101     UColTokenParser src;
1102     UColOptionSet opts;
1103     UParseError parseError;
1104     UChar *rulesCopy = NULL;
1105     collIterate *c = uprv_new_collIterate(status);
1106     UCAConstants *consts = NULL;
1107     uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1108         UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1109     const char *colLoc;
1110     UCollator *UCA = ucol_open("root", status);
1111 
1112     if (U_FAILURE(*status)) {
1113         log_err("Could not open root collator %s\n", u_errorName(*status));
1114         uprv_delete_collIterate(c);
1115         return;
1116     }
1117 
1118     colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1119     if (U_FAILURE(*status)) {
1120         log_err("Could not get collator name: %s\n", u_errorName(*status));
1121         ucol_close(UCA);
1122         uprv_delete_collIterate(c);
1123         return;
1124     }
1125 
1126     uprv_memset(&src, 0, sizeof(UColTokenParser));
1127 
1128     consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1129     UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1130     /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1131     UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1132     UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1133 
1134     baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1135 
1136     src.opts = &opts;
1137 
1138     rules = ucol_getRules(coll, &ruleLen);
1139 
1140     src.invUCA = ucol_initInverseUCA(status);
1141 
1142     if(indirectBoundariesSet == FALSE) {
1143         /* UCOL_RESET_TOP_VALUE */
1144         setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1145         /* UCOL_FIRST_PRIMARY_IGNORABLE */
1146         setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1147         /* UCOL_LAST_PRIMARY_IGNORABLE */
1148         setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1149         /* UCOL_FIRST_SECONDARY_IGNORABLE */
1150         setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1151         /* UCOL_LAST_SECONDARY_IGNORABLE */
1152         setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1153         /* UCOL_FIRST_TERTIARY_IGNORABLE */
1154         setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1155         /* UCOL_LAST_TERTIARY_IGNORABLE */
1156         setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1157         /* UCOL_FIRST_VARIABLE */
1158         setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1159         /* UCOL_LAST_VARIABLE */
1160         setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1161         /* UCOL_FIRST_NON_VARIABLE */
1162         setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1163         /* UCOL_LAST_NON_VARIABLE */
1164         setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1165         /* UCOL_FIRST_IMPLICIT */
1166         setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1167         /* UCOL_LAST_IMPLICIT */
1168         setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1169         /* UCOL_FIRST_TRAILING */
1170         setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1171         /* UCOL_LAST_TRAILING */
1172         setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1173         ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1174         indirectBoundariesSet = TRUE;
1175     }
1176 
1177 
1178     if(U_SUCCESS(*status) && ruleLen > 0) {
1179         rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1180         uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1181         src.current = src.source = rulesCopy;
1182         src.end = rulesCopy+ruleLen;
1183         src.extraCurrent = src.end;
1184         src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1185 
1186 	    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1187 	       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1188         while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1189             strength = src.parsedToken.strength;
1190             chOffset = src.parsedToken.charsOffset;
1191             chLen = src.parsedToken.charsLen;
1192             exOffset = src.parsedToken.extensionOffset;
1193             exLen = src.parsedToken.extensionLen;
1194             prefixOffset = src.parsedToken.prefixOffset;
1195             prefixLen = src.parsedToken.prefixLen;
1196             specs = src.parsedToken.flags;
1197 
1198             startOfRules = FALSE;
1199             varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1200             top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1201 
1202             uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1203 
1204             currCE = ucol_getNextCE(coll, c, status);
1205             if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1206                 log_verbose("Thai prevowel detected. Will pick next CE\n");
1207                 currCE = ucol_getNextCE(coll, c, status);
1208             }
1209 
1210             currContCE = ucol_getNextCE(coll, c, status);
1211             if(!isContinuation(currContCE)) {
1212                 currContCE = 0;
1213             }
1214 
1215             /* we need to repack CEs here */
1216 
1217             if(strength == UCOL_TOK_RESET) {
1218                 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1219                 if(top_ == TRUE) {
1220                     int32_t tokenIndex = src.parsedToken.indirectIndex;
1221 
1222                     nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1223                     nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1224                 } else {
1225                     nextCE = baseCE = currCE;
1226                     nextContCE = baseContCE = currContCE;
1227                 }
1228                 maxStrength = UCOL_IDENTICAL;
1229             } else {
1230                 if(strength < maxStrength) {
1231                     maxStrength = strength;
1232                     if(baseCE == UCOL_RESET_TOP_VALUE) {
1233                         log_verbose("Resetting to [top]\n");
1234                         nextCE = UCOL_NEXT_TOP_VALUE;
1235                         nextContCE = UCOL_NEXT_TOP_CONT;
1236                     } else {
1237                         result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1238                     }
1239                     if(result < 0) {
1240                         if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1241                             log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1242                             return;
1243                         } else {
1244                             log_err("%s: couldn't find the CE\n", colLoc);
1245                             return;
1246                         }
1247                     }
1248                 }
1249 
1250                 currCE &= 0xFFFFFF3F;
1251                 currContCE &= 0xFFFFFFBF;
1252 
1253                 if(maxStrength == UCOL_IDENTICAL) {
1254                     if(baseCE != currCE || baseContCE != currContCE) {
1255                         log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1256                     }
1257                 } else {
1258                     if(strength == UCOL_IDENTICAL) {
1259                         if(lastCE != currCE || lastContCE != currContCE) {
1260                             log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1261                         }
1262                     } else {
1263                         if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1264                             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1265                             log_err("%s: current CE is not less than base CE\n", colLoc);
1266                         }
1267                         if(!before) {
1268                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1269                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1270                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1271                             }
1272                         } else {
1273                             before = FALSE;
1274                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1275                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1276                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1277                             }
1278                         }
1279                     }
1280                 }
1281 
1282             }
1283 
1284             oldOffset = chOffset;
1285             lastCE = currCE & 0xFFFFFF3F;
1286             lastContCE = currContCE & 0xFFFFFFBF;
1287         }
1288         uprv_free(src.source);
1289     }
1290     ucol_close(UCA);
1291     uprv_delete_collIterate(c);
1292 }
1293 
1294 #if 0
1295 /* these locales are now picked from index RB */
1296 static const char* localesToTest[] = {
1297 "ar", "bg", "ca", "cs", "da",
1298 "el", "en_BE", "en_US_POSIX",
1299 "es", "et", "fi", "fr", "hi",
1300 "hr", "hu", "is", "iw", "ja",
1301 "ko", "lt", "lv", "mk", "mt",
1302 "nb", "nn", "nn_NO", "pl", "ro",
1303 "ru", "sh", "sk", "sl", "sq",
1304 "sr", "sv", "th", "tr", "uk",
1305 "vi", "zh", "zh_TW"
1306 };
1307 #endif
1308 
1309 static const char* rulesToTest[] = {
1310   /* Funky fa rule */
1311   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1312   /*"& Z < p, P",*/
1313     /* Cui Mins rules */
1314     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1315     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1316     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1317     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1318     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1319     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1320     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1321 };
1322 
1323 
TestCollations(void)1324 static void TestCollations(void) {
1325     int32_t noOfLoc = uloc_countAvailable();
1326     int32_t i = 0, j = 0;
1327 
1328     UErrorCode status = U_ZERO_ERROR;
1329     char cName[256];
1330     UChar name[256];
1331     int32_t nameSize;
1332 
1333 
1334     const char *locName = NULL;
1335     UCollator *coll = NULL;
1336     UCollator *UCA = ucol_open("", &status);
1337     UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1338     if (U_FAILURE(status)) {
1339         log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1340         return;
1341     }
1342     ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1343 
1344     for(i = 0; i<noOfLoc; i++) {
1345         status = U_ZERO_ERROR;
1346         locName = uloc_getAvailable(i);
1347         if(uprv_strcmp("ja", locName) == 0) {
1348             log_verbose("Don't know how to test prefixes\n");
1349             continue;
1350         }
1351         if(hasCollationElements(locName)) {
1352             nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1353             for(j = 0; j<nameSize; j++) {
1354                 cName[j] = (char)name[j];
1355             }
1356             cName[nameSize] = 0;
1357             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1358             coll = ucol_open(locName, &status);
1359             if(U_SUCCESS(status)) {
1360                 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1361                 ucol_close(coll);
1362             } else {
1363                 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1364                 status = U_ZERO_ERROR;
1365             }
1366         }
1367     }
1368     ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1369     ucol_close(UCA);
1370 }
1371 
RamsRulesTest(void)1372 static void RamsRulesTest(void) {
1373     UErrorCode status = U_ZERO_ERROR;
1374     int32_t i = 0;
1375     UCollator *coll = NULL;
1376     UChar rule[2048];
1377     uint32_t ruleLen;
1378     int32_t noOfLoc = uloc_countAvailable();
1379     const char *locName = NULL;
1380 
1381     log_verbose("RamsRulesTest\n");
1382 
1383     if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1384         /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1385         return;
1386     }
1387 
1388     for(i = 0; i<noOfLoc; i++) {
1389         locName = uloc_getAvailable(i);
1390         if(hasCollationElements(locName)) {
1391             if (uprv_strcmp("ja", locName)==0) {
1392                 log_verbose("Don't know how to test Japanese because of prefixes\n");
1393                 continue;
1394             }
1395             if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1396                 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1397                 continue;
1398             }
1399             if (uprv_strcmp("bn", locName)==0 ||
1400                 uprv_strcmp("en_US_POSIX", locName)==0 ||
1401                 uprv_strcmp("km", locName)==0 ||
1402                 uprv_strcmp("km_KH", locName)==0 ||
1403                 uprv_strcmp("my", locName)==0 ||
1404                 uprv_strcmp("si", locName)==0 ||
1405                 uprv_strcmp("si_LK", locName)==0 ||
1406                 uprv_strcmp("zh", locName)==0 ||
1407                 uprv_strcmp("zh_Hant", locName)==0
1408             ) {
1409                 log_verbose("Don't know how to test %s. "
1410                             "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1411                 continue;
1412             }
1413             log_verbose("Testing locale %s\n", locName);
1414             status = U_ZERO_ERROR;
1415             coll = ucol_open(locName, &status);
1416             if(U_SUCCESS(status)) {
1417               if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1418                 if(coll->image->jamoSpecial == TRUE) {
1419                   log_err("%s has special JAMOs\n", locName);
1420                 }
1421                 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1422                 testCollator(coll, &status);
1423                 testCEs(coll, &status);
1424               } else {
1425                 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1426               }
1427               ucol_close(coll);
1428             } else {
1429               log_err("Could not open %s: %s\n", locName, u_errorName(status));
1430             }
1431         }
1432     }
1433 
1434     for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1435         log_verbose("Testing rule: %s\n", rulesToTest[i]);
1436         ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1437         status = U_ZERO_ERROR;
1438         coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1439         if(U_SUCCESS(status)) {
1440             testCollator(coll, &status);
1441             testCEs(coll, &status);
1442             ucol_close(coll);
1443         } else {
1444           log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1445         }
1446     }
1447 
1448 }
1449 
IsTailoredTest(void)1450 static void IsTailoredTest(void) {
1451     UErrorCode status = U_ZERO_ERROR;
1452     uint32_t i = 0;
1453     UCollator *coll = NULL;
1454     UChar rule[2048];
1455     UChar tailored[2048];
1456     UChar notTailored[2048];
1457     uint32_t ruleLen, tailoredLen, notTailoredLen;
1458 
1459     log_verbose("IsTailoredTest\n");
1460 
1461     u_uastrcpy(rule, "&Z < A, B, C;c < d");
1462     ruleLen = u_strlen(rule);
1463 
1464     u_uastrcpy(tailored, "ABCcd");
1465     tailoredLen = u_strlen(tailored);
1466 
1467     u_uastrcpy(notTailored, "ZabD");
1468     notTailoredLen = u_strlen(notTailored);
1469 
1470     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1471     if(U_SUCCESS(status)) {
1472         for(i = 0; i<tailoredLen; i++) {
1473             if(!ucol_isTailored(coll, tailored[i], &status)) {
1474                 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1475             }
1476         }
1477         for(i = 0; i<notTailoredLen; i++) {
1478             if(ucol_isTailored(coll, notTailored[i], &status)) {
1479                 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1480             }
1481         }
1482         ucol_close(coll);
1483     }
1484     else {
1485         log_err_status(status, "Can't tailor rules\n");
1486     }
1487     /* Code coverage */
1488     status = U_ZERO_ERROR;
1489     coll = ucol_open("ja", &status);
1490     if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1491         log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1492     }
1493     ucol_close(coll);
1494 }
1495 
1496 
1497 const static char chTest[][20] = {
1498   "c",
1499   "C",
1500   "ca", "cb", "cx", "cy", "CZ",
1501   "c\\u030C", "C\\u030C",
1502   "h",
1503   "H",
1504   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1505   "ch", "cH", "Ch", "CH",
1506   "cha", "charly", "che", "chh", "chch", "chr",
1507   "i", "I", "iarly",
1508   "r", "R",
1509   "r\\u030C", "R\\u030C",
1510   "s",
1511   "S",
1512   "s\\u030C", "S\\u030C",
1513   "z", "Z",
1514   "z\\u030C", "Z\\u030C"
1515 };
1516 
TestChMove(void)1517 static void TestChMove(void) {
1518     UChar t1[256] = {0};
1519     UChar t2[256] = {0};
1520 
1521     uint32_t i = 0, j = 0;
1522     uint32_t size = 0;
1523     UErrorCode status = U_ZERO_ERROR;
1524 
1525     UCollator *coll = ucol_open("cs", &status);
1526 
1527     if(U_SUCCESS(status)) {
1528         size = sizeof(chTest)/sizeof(chTest[0]);
1529         for(i = 0; i < size-1; i++) {
1530             for(j = i+1; j < size; j++) {
1531                 u_unescape(chTest[i], t1, 256);
1532                 u_unescape(chTest[j], t2, 256);
1533                 doTest(coll, t1, t2, UCOL_LESS);
1534             }
1535         }
1536     }
1537     else {
1538         log_data_err("Can't open collator");
1539     }
1540     ucol_close(coll);
1541 }
1542 
1543 
1544 
1545 
1546 const static char impTest[][20] = {
1547   "\\u4e00",
1548     "a",
1549     "A",
1550     "b",
1551     "B",
1552     "\\u4e01"
1553 };
1554 
1555 
TestImplicitTailoring(void)1556 static void TestImplicitTailoring(void) {
1557   static const struct {
1558     const char *rules;
1559     const char *data[10];
1560     const uint32_t len;
1561   } tests[] = {
1562       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1563       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1564       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1565       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1566   };
1567 
1568   int32_t i = 0;
1569 
1570   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1571       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1572   }
1573 
1574 /*
1575   UChar t1[256] = {0};
1576   UChar t2[256] = {0};
1577 
1578   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1579 
1580   uint32_t i = 0, j = 0;
1581   uint32_t size = 0;
1582   uint32_t ruleLen = 0;
1583   UErrorCode status = U_ZERO_ERROR;
1584   UCollator *coll = NULL;
1585   ruleLen = u_unescape(rule, t1, 256);
1586 
1587   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1588 
1589   if(U_SUCCESS(status)) {
1590     size = sizeof(impTest)/sizeof(impTest[0]);
1591     for(i = 0; i < size-1; i++) {
1592       for(j = i+1; j < size; j++) {
1593         u_unescape(impTest[i], t1, 256);
1594         u_unescape(impTest[j], t2, 256);
1595         doTest(coll, t1, t2, UCOL_LESS);
1596       }
1597     }
1598   }
1599   else {
1600     log_err("Can't open collator");
1601   }
1602   ucol_close(coll);
1603   */
1604 }
1605 
TestFCDProblem(void)1606 static void TestFCDProblem(void) {
1607   UChar t1[256] = {0};
1608   UChar t2[256] = {0};
1609 
1610   const char *s1 = "\\u0430\\u0306\\u0325";
1611   const char *s2 = "\\u04D1\\u0325";
1612 
1613   UErrorCode status = U_ZERO_ERROR;
1614   UCollator *coll = ucol_open("", &status);
1615   u_unescape(s1, t1, 256);
1616   u_unescape(s2, t2, 256);
1617 
1618   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1619   doTest(coll, t1, t2, UCOL_EQUAL);
1620 
1621   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1622   doTest(coll, t1, t2, UCOL_EQUAL);
1623 
1624   ucol_close(coll);
1625 }
1626 
1627 /*
1628 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1629 We're only using NFC/NFD in this test.
1630 */
1631 #define NORM_BUFFER_TEST_LEN 18
1632 typedef struct {
1633   UChar32 u;
1634   UChar NFC[NORM_BUFFER_TEST_LEN];
1635   UChar NFD[NORM_BUFFER_TEST_LEN];
1636 } tester;
1637 
TestComposeDecompose(void)1638 static void TestComposeDecompose(void) {
1639     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1640     static const UChar UNICODESET_STR[] = {
1641         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1642         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1643         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1644     };
1645     int32_t noOfLoc;
1646     int32_t i = 0, j = 0;
1647 
1648     UErrorCode status = U_ZERO_ERROR;
1649     const char *locName = NULL;
1650     uint32_t nfcSize;
1651     uint32_t nfdSize;
1652     tester **t;
1653     uint32_t noCases = 0;
1654     UCollator *coll = NULL;
1655     UChar32 u = 0;
1656     UChar comp[NORM_BUFFER_TEST_LEN];
1657     uint32_t len = 0;
1658     UCollationElements *iter;
1659     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1660     int32_t charsToTestSize;
1661 
1662     noOfLoc = uloc_countAvailable();
1663 
1664     coll = ucol_open("", &status);
1665     if (U_FAILURE(status)) {
1666         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1667         return;
1668     }
1669     charsToTestSize = uset_size(charsToTest);
1670     if (charsToTestSize <= 0) {
1671         log_err("Set was zero. Missing data?\n");
1672         return;
1673     }
1674     t = malloc(charsToTestSize * sizeof(tester *));
1675     t[0] = (tester *)malloc(sizeof(tester));
1676     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1677 
1678     for(u = 0; u < charsToTestSize; u++) {
1679         UChar32 ch = uset_charAt(charsToTest, u);
1680         len = 0;
1681         UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
1682         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1683         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1684 
1685         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1686           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1687             t[noCases]->u = ch;
1688             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1689                 u_strncpy(t[noCases]->NFC, comp, len);
1690                 t[noCases]->NFC[len] = 0;
1691             }
1692             noCases++;
1693             t[noCases] = (tester *)malloc(sizeof(tester));
1694             uprv_memset(t[noCases], 0, sizeof(tester));
1695         }
1696     }
1697     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1698     uset_close(charsToTest);
1699     charsToTest = NULL;
1700 
1701     for(u=0; u<(UChar32)noCases; u++) {
1702         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1703             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1704             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1705         }
1706     }
1707     /*
1708     for(u = 0; u < charsToTestSize; u++) {
1709       if(!(u&0xFFFF)) {
1710         log_verbose("%08X ", u);
1711       }
1712       uprv_memset(t[noCases], 0, sizeof(tester));
1713       t[noCases]->u = u;
1714       len = 0;
1715       UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1716       comp[len] = 0;
1717       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1718       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1719       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1720       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1721     }
1722     */
1723 
1724     ucol_close(coll);
1725 
1726     log_verbose("Testing locales, number of cases = %i\n", noCases);
1727     for(i = 0; i<noOfLoc; i++) {
1728         status = U_ZERO_ERROR;
1729         locName = uloc_getAvailable(i);
1730         if(hasCollationElements(locName)) {
1731             char cName[256];
1732             UChar name[256];
1733             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1734 
1735             for(j = 0; j<nameSize; j++) {
1736                 cName[j] = (char)name[j];
1737             }
1738             cName[nameSize] = 0;
1739             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1740 
1741             coll = ucol_open(locName, &status);
1742             ucol_setStrength(coll, UCOL_IDENTICAL);
1743             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1744 
1745             for(u=0; u<(UChar32)noCases; u++) {
1746                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1747                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1748                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1749                     log_verbose("Testing NFC\n");
1750                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1751                     backAndForth(iter);
1752                     log_verbose("Testing NFD\n");
1753                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1754                     backAndForth(iter);
1755                 }
1756             }
1757             ucol_closeElements(iter);
1758             ucol_close(coll);
1759         }
1760     }
1761     for(u = 0; u <= (UChar32)noCases; u++) {
1762         free(t[u]);
1763     }
1764     free(t);
1765 }
1766 
TestEmptyRule(void)1767 static void TestEmptyRule(void) {
1768   UErrorCode status = U_ZERO_ERROR;
1769   UChar rulez[] = { 0 };
1770   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1771 
1772   ucol_close(coll);
1773 }
1774 
TestUCARules(void)1775 static void TestUCARules(void) {
1776   UErrorCode status = U_ZERO_ERROR;
1777   UChar b[256];
1778   UChar *rules = b;
1779   uint32_t ruleLen = 0;
1780   UCollator *UCAfromRules = NULL;
1781   UCollator *coll = ucol_open("", &status);
1782   if(status == U_FILE_ACCESS_ERROR) {
1783     log_data_err("Is your data around?\n");
1784     return;
1785   } else if(U_FAILURE(status)) {
1786     log_err("Error opening collator\n");
1787     return;
1788   }
1789   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1790 
1791   log_verbose("TestUCARules\n");
1792   if(ruleLen > 256) {
1793     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1794     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1795   }
1796   log_verbose("Rules length is %d\n", ruleLen);
1797   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1798   if(U_SUCCESS(status)) {
1799     ucol_close(UCAfromRules);
1800   } else {
1801     log_verbose("Unable to create a collator from UCARules!\n");
1802   }
1803 /*
1804   u_unescape(blah, b, 256);
1805   ucol_getSortKey(coll, b, 1, res, 256);
1806 */
1807   ucol_close(coll);
1808   if(rules != b) {
1809     free(rules);
1810   }
1811 }
1812 
1813 
1814 /* Pinyin tonal order */
1815 /*
1816     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1817           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1818     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1819     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1820     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1821     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1822       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1823 .. (\u00fc)
1824 
1825 However, in testing we got the following order:
1826     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1827           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1828     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1829 .. (\u0113)
1830     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1831     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1832     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1833 .. (\u01d8)
1834       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1835 */
1836 
TestBefore(void)1837 static void TestBefore(void) {
1838   const static char *data[] = {
1839       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1840       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1841       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1842       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1843       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1844       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1845   };
1846   genericRulesStarter(
1847     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1848     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1849     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1850     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1851     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1852     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1853     data, sizeof(data)/sizeof(data[0]));
1854 }
1855 
1856 #if 0
1857 /* superceded by TestBeforePinyin */
1858 static void TestJ784(void) {
1859   const static char *data[] = {
1860       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1861       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1862       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1863       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1864       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1865       "\\u00fc",
1866            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1867   };
1868   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1869 }
1870 #endif
1871 
1872 #if 0
1873 /* superceded by the changes to the lv locale */
1874 static void TestJ831(void) {
1875   const static char *data[] = {
1876     "I",
1877       "i",
1878       "Y",
1879       "y"
1880   };
1881   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1882 }
1883 #endif
1884 
TestJ815(void)1885 static void TestJ815(void) {
1886   const static char *data[] = {
1887     "aa",
1888       "Aa",
1889       "ab",
1890       "Ab",
1891       "ad",
1892       "Ad",
1893       "ae",
1894       "Ae",
1895       "\\u00e6",
1896       "\\u00c6",
1897       "af",
1898       "Af",
1899       "b",
1900       "B"
1901   };
1902   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1903   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1904 }
1905 
1906 
1907 /*
1908 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1909 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1910 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1911 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1912 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1913 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1914 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1915 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1916 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1917 */
TestRedundantRules(void)1918 static void TestRedundantRules(void) {
1919   int32_t i;
1920 
1921   static const struct {
1922       const char *rules;
1923       const char *expectedRules;
1924       const char *testdata[8];
1925       uint32_t testdatalen;
1926   } tests[] = {
1927     /* this test conflicts with positioning of CODAN placeholder */
1928        /*{
1929         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1930         "&\\u2089<<<x",
1931         {"\\u2089", "x"}, 2
1932        }, */
1933     /* this test conflicts with the [before x] syntax tightening */
1934       /*{
1935         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1936         "&\\u0252<<<x",
1937         {"\\u0252", "x"}, 2
1938       }, */
1939     /* this test conflicts with the [before x] syntax tightening */
1940       /*{
1941          "& a < b <<< c << d <<< e& [before 1] e <<< x",
1942          "& a <<< x < b <<< c << d <<< e",
1943         {"a", "x", "b", "c", "d", "e"}, 6
1944       }, */
1945       {
1946         "& a < b < c < d& [before 1] c < m",
1947         "& a < b < m < c < d",
1948         {"a", "b", "m", "c", "d"}, 5
1949       },
1950       {
1951         "& a < b <<< c << d <<< e& [before 3] e <<< x",
1952         "& a < b <<< c << d <<< x <<< e",
1953         {"a", "b", "c", "d", "x", "e"}, 6
1954       },
1955     /* this test conflicts with the [before x] syntax tightening */
1956       /* {
1957         "& a < b <<< c << d <<< e& [before 2] e <<< x",
1958         "& a < b <<< c <<< x << d <<< e",
1959         {"a", "b", "c", "x", "d", "e"},, 6
1960       }, */
1961       {
1962         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1963         "& a < b <<< c << d <<< e <<< f < x < g",
1964         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1965       },
1966       {
1967         "& a <<< b << c < d& a < m",
1968         "& a <<< b << c < m < d",
1969         {"a", "b", "c", "m", "d"}, 5
1970       },
1971       {
1972         "&a<b<<b\\u0301 &z<b",
1973         "&a<b\\u0301 &z<b",
1974         {"a", "b\\u0301", "z", "b"}, 4
1975       },
1976       {
1977         "&z<m<<<q<<<m",
1978         "&z<q<<<m",
1979         {"z", "q", "m"},3
1980       },
1981       {
1982         "&z<<<m<q<<<m",
1983         "&z<q<<<m",
1984         {"z", "q", "m"}, 3
1985       },
1986       {
1987         "& a < b < c < d& r < c",
1988         "& a < b < d& r < c",
1989         {"a", "b", "d"}, 3
1990       },
1991       {
1992         "& a < b < c < d& r < c",
1993         "& a < b < d& r < c",
1994         {"r", "c"}, 2
1995       },
1996       {
1997         "& a < b < c < d& c < m",
1998         "& a < b < c < m < d",
1999         {"a", "b", "c", "m", "d"}, 5
2000       },
2001       {
2002         "& a < b < c < d& a < m",
2003         "& a < m < b < c < d",
2004         {"a", "m", "b", "c", "d"}, 5
2005       }
2006   };
2007 
2008 
2009   UCollator *credundant = NULL;
2010   UCollator *cresulting = NULL;
2011   UErrorCode status = U_ZERO_ERROR;
2012   UChar rlz[2048] = { 0 };
2013   uint32_t rlen = 0;
2014 
2015   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2016     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2017     rlen = u_unescape(tests[i].rules, rlz, 2048);
2018 
2019     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2020     if(status == U_FILE_ACCESS_ERROR) {
2021       log_data_err("Is your data around?\n");
2022       return;
2023     } else if(U_FAILURE(status)) {
2024       log_err("Error opening collator\n");
2025       return;
2026     }
2027 
2028     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2029     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2030 
2031     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2032 
2033     ucol_close(credundant);
2034     ucol_close(cresulting);
2035 
2036     log_verbose("testing using data\n");
2037 
2038     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2039   }
2040 
2041 }
2042 
TestExpansionSyntax(void)2043 static void TestExpansionSyntax(void) {
2044   int32_t i;
2045 
2046   const static char *rules[] = {
2047     "&AE <<< a << b <<< c &d <<< f",
2048     "&AE <<< a <<< b << c << d < e < f <<< g",
2049     "&AE <<< B <<< C / D <<< F"
2050   };
2051 
2052   const static char *expectedRules[] = {
2053     "&A <<< a / E << b / E <<< c /E  &d <<< f",
2054     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2055     "&A <<< B / E <<< C / ED <<< F / E"
2056   };
2057 
2058   const static char *testdata[][8] = {
2059     {"AE", "a", "b", "c"},
2060     {"AE", "a", "b", "c", "d", "e", "f", "g"},
2061     {"AE", "B", "C"} /* / ED <<< F / E"},*/
2062   };
2063 
2064   const static uint32_t testdatalen[] = {
2065       4,
2066       8,
2067       3
2068   };
2069 
2070 
2071 
2072   UCollator *credundant = NULL;
2073   UCollator *cresulting = NULL;
2074   UErrorCode status = U_ZERO_ERROR;
2075   UChar rlz[2048] = { 0 };
2076   uint32_t rlen = 0;
2077 
2078   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2079     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2080     rlen = u_unescape(rules[i], rlz, 2048);
2081 
2082     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2083     if(status == U_FILE_ACCESS_ERROR) {
2084       log_data_err("Is your data around?\n");
2085       return;
2086     } else if(U_FAILURE(status)) {
2087       log_err("Error opening collator\n");
2088       return;
2089     }
2090     rlen = u_unescape(expectedRules[i], rlz, 2048);
2091     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2092 
2093     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2094     /* as a hard error test, but only in information mode */
2095     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2096 
2097     ucol_close(credundant);
2098     ucol_close(cresulting);
2099 
2100     log_verbose("testing using data\n");
2101 
2102     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2103   }
2104 }
2105 
TestCase(void)2106 static void TestCase(void)
2107 {
2108     const static UChar gRules[MAX_TOKEN_LEN] =
2109     /*" & 0 < 1,\u2461<a,A"*/
2110     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2111 
2112     const static UChar testCase[][MAX_TOKEN_LEN] =
2113     {
2114         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2115         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2116         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2117         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2118     };
2119 
2120     const static UCollationResult caseTestResults[][9] =
2121     {
2122         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2123         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2124         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2125         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2126     };
2127 
2128     const static UColAttributeValue caseTestAttributes[][2] =
2129     {
2130         { UCOL_LOWER_FIRST, UCOL_OFF},
2131         { UCOL_UPPER_FIRST, UCOL_OFF},
2132         { UCOL_LOWER_FIRST, UCOL_ON},
2133         { UCOL_UPPER_FIRST, UCOL_ON}
2134     };
2135     int32_t i,j,k;
2136     UErrorCode status = U_ZERO_ERROR;
2137     UCollationElements *iter;
2138     UCollator  *myCollation;
2139     myCollation = ucol_open("en_US", &status);
2140 
2141     if(U_FAILURE(status)){
2142         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2143         return;
2144     }
2145     log_verbose("Testing different case settings\n");
2146     ucol_setStrength(myCollation, UCOL_TERTIARY);
2147 
2148     for(k = 0; k<4; k++) {
2149       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2150       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2151       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2152       for (i = 0; i < 3 ; i++) {
2153         for(j = i+1; j<4; j++) {
2154           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2155         }
2156       }
2157     }
2158     ucol_close(myCollation);
2159 
2160     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2161     if(U_FAILURE(status)){
2162         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2163         return;
2164     }
2165     log_verbose("Testing different case settings with custom rules\n");
2166     ucol_setStrength(myCollation, UCOL_TERTIARY);
2167 
2168     for(k = 0; k<4; k++) {
2169       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2170       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2171       for (i = 0; i < 3 ; i++) {
2172         for(j = i+1; j<4; j++) {
2173           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2174           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2175           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2176           backAndForth(iter);
2177           ucol_closeElements(iter);
2178           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2179           backAndForth(iter);
2180           ucol_closeElements(iter);
2181         }
2182       }
2183     }
2184     ucol_close(myCollation);
2185     {
2186       const static char *lowerFirst[] = {
2187         "h",
2188         "H",
2189         "ch",
2190         "Ch",
2191         "CH",
2192         "cha",
2193         "chA",
2194         "Cha",
2195         "ChA",
2196         "CHa",
2197         "CHA",
2198         "i",
2199         "I"
2200       };
2201 
2202       const static char *upperFirst[] = {
2203         "H",
2204         "h",
2205         "CH",
2206         "Ch",
2207         "ch",
2208         "CHA",
2209         "CHa",
2210         "ChA",
2211         "Cha",
2212         "chA",
2213         "cha",
2214         "I",
2215         "i"
2216       };
2217       log_verbose("mixed case test\n");
2218       log_verbose("lower first, case level off\n");
2219       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2220       log_verbose("upper first, case level off\n");
2221       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2222       log_verbose("lower first, case level on\n");
2223       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2224       log_verbose("upper first, case level on\n");
2225       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2226     }
2227 
2228 }
2229 
TestIncrementalNormalize(void)2230 static void TestIncrementalNormalize(void) {
2231 
2232     /*UChar baseA     =0x61;*/
2233     UChar baseA     =0x41;
2234 /*    UChar baseB     = 0x42;*/
2235     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
2236     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2237     /*
2238         0x316 is combining grave accent below, cc=220
2239         0x321 is combining palatalized hook below, cc=202
2240         0x300 is combining grave accent, cc=230
2241     */
2242 
2243 #define MAXSLEN 2000
2244     /*int          maxSLen   = 64000;*/
2245     int          sLen;
2246     int          i;
2247 
2248     UCollator        *coll;
2249     UErrorCode       status = U_ZERO_ERROR;
2250     UCollationResult result;
2251 
2252     int32_t myQ = getTestOption(QUICK_OPTION);
2253 
2254     if(getTestOption(QUICK_OPTION) < 0) {
2255         setTestOption(QUICK_OPTION, 1);
2256     }
2257 
2258     {
2259         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2260         /*          most buffers along the way.*/
2261         UChar            strA[MAXSLEN+1];
2262         UChar            strB[MAXSLEN+1];
2263 
2264         coll = ucol_open("en_US", &status);
2265         if(status == U_FILE_ACCESS_ERROR) {
2266           log_data_err("Is your data around?\n");
2267           return;
2268         } else if(U_FAILURE(status)) {
2269           log_err("Error opening collator\n");
2270           return;
2271         }
2272         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2273 
2274         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2275         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2276         /*for (sLen = 1000; sLen<1001; sLen++) {*/
2277         for (sLen = 500; sLen<501; sLen++) {
2278         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2279             strA[0] = baseA;
2280             strB[0] = baseA;
2281             for (i=1; i<=sLen-1; i++) {
2282                 strA[i] = ccMix[i % 3];
2283                 strB[sLen-i] = ccMix[i % 3];
2284             }
2285             strA[sLen]   = 0;
2286             strB[sLen]   = 0;
2287 
2288             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2289             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2290             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2291             doTest(coll, strA, strB, UCOL_EQUAL);
2292         }
2293     }
2294 
2295     setTestOption(QUICK_OPTION, myQ);
2296 
2297 
2298     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2299     /*         of the string.  Checks a couple of edge cases.*/
2300 
2301     {
2302         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2303         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2304         ucol_setStrength(coll, UCOL_TERTIARY);
2305         doTest(coll, strA, strB, UCOL_EQUAL);
2306     }
2307 
2308     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2309 
2310     {
2311       /* New UCA  3.1.1.
2312        * test below used a code point from Desseret, which sorts differently
2313        * than d800 dc00
2314        */
2315         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2316         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2317         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2318         ucol_setStrength(coll, UCOL_TERTIARY);
2319         doTest(coll, strA, strB, UCOL_GREATER);
2320     }
2321 
2322     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2323 
2324     {
2325         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2326         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2327         char  sortKeyA[50];
2328         char  sortKeyAz[50];
2329         char  sortKeyB[50];
2330         char  sortKeyBz[50];
2331         int   r;
2332 
2333         /* there used to be -3 here. Hmmmm.... */
2334         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2335         result = ucol_strcoll(coll, strA, 3, strB, 3);
2336         if (result != UCOL_GREATER) {
2337             log_err("ERROR 1 in test 4\n");
2338         }
2339         result = ucol_strcoll(coll, strA, -1, strB, -1);
2340         if (result != UCOL_EQUAL) {
2341             log_err("ERROR 2 in test 4\n");
2342         }
2343 
2344         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2345         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2346         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2347         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2348 
2349         r = strcmp(sortKeyA, sortKeyAz);
2350         if (r <= 0) {
2351             log_err("Error 3 in test 4\n");
2352         }
2353         r = strcmp(sortKeyA, sortKeyB);
2354         if (r <= 0) {
2355             log_err("Error 4 in test 4\n");
2356         }
2357         r = strcmp(sortKeyAz, sortKeyBz);
2358         if (r != 0) {
2359             log_err("Error 5 in test 4\n");
2360         }
2361 
2362         ucol_setStrength(coll, UCOL_IDENTICAL);
2363         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2364         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2365         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2366         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2367 
2368         r = strcmp(sortKeyA, sortKeyAz);
2369         if (r <= 0) {
2370             log_err("Error 6 in test 4\n");
2371         }
2372         r = strcmp(sortKeyA, sortKeyB);
2373         if (r <= 0) {
2374             log_err("Error 7 in test 4\n");
2375         }
2376         r = strcmp(sortKeyAz, sortKeyBz);
2377         if (r != 0) {
2378             log_err("Error 8 in test 4\n");
2379         }
2380         ucol_setStrength(coll, UCOL_TERTIARY);
2381     }
2382 
2383 
2384     /*  Test 5:  Null characters in non-normal source strings.*/
2385 
2386     {
2387         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2388         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2389         char  sortKeyA[50];
2390         char  sortKeyAz[50];
2391         char  sortKeyB[50];
2392         char  sortKeyBz[50];
2393         int   r;
2394 
2395         result = ucol_strcoll(coll, strA, 6, strB, 6);
2396         if (result != UCOL_GREATER) {
2397             log_err("ERROR 1 in test 5\n");
2398         }
2399         result = ucol_strcoll(coll, strA, -1, strB, -1);
2400         if (result != UCOL_EQUAL) {
2401             log_err("ERROR 2 in test 5\n");
2402         }
2403 
2404         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2405         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2406         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2407         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2408 
2409         r = strcmp(sortKeyA, sortKeyAz);
2410         if (r <= 0) {
2411             log_err("Error 3 in test 5\n");
2412         }
2413         r = strcmp(sortKeyA, sortKeyB);
2414         if (r <= 0) {
2415             log_err("Error 4 in test 5\n");
2416         }
2417         r = strcmp(sortKeyAz, sortKeyBz);
2418         if (r != 0) {
2419             log_err("Error 5 in test 5\n");
2420         }
2421 
2422         ucol_setStrength(coll, UCOL_IDENTICAL);
2423         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2424         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2425         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2426         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2427 
2428         r = strcmp(sortKeyA, sortKeyAz);
2429         if (r <= 0) {
2430             log_err("Error 6 in test 5\n");
2431         }
2432         r = strcmp(sortKeyA, sortKeyB);
2433         if (r <= 0) {
2434             log_err("Error 7 in test 5\n");
2435         }
2436         r = strcmp(sortKeyAz, sortKeyBz);
2437         if (r != 0) {
2438             log_err("Error 8 in test 5\n");
2439         }
2440         ucol_setStrength(coll, UCOL_TERTIARY);
2441     }
2442 
2443 
2444     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2445 
2446     {
2447         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2448         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2449 
2450         result = ucol_strcoll(coll, strA, 5, strB, 5);
2451         if (result != UCOL_LESS) {
2452             log_err("Error 1 in test 6\n");
2453         }
2454         result = ucol_strcoll(coll, strA, -1, strB, -1);
2455         if (result != UCOL_EQUAL) {
2456             log_err("Error 2 in test 6\n");
2457         }
2458     }
2459 
2460     ucol_close(coll);
2461 }
2462 
2463 
2464 
2465 #if 0
2466 static void TestGetCaseBit(void) {
2467   static const char *caseBitData[] = {
2468     "a", "A", "ch", "Ch", "CH",
2469       "\\uFF9E", "\\u0009"
2470   };
2471 
2472   static const uint8_t results[] = {
2473     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2474       UCOL_UPPER_CASE, UCOL_LOWER_CASE
2475   };
2476 
2477   uint32_t i, blen = 0;
2478   UChar b[256] = {0};
2479   UErrorCode status = U_ZERO_ERROR;
2480   UCollator *UCA = ucol_open("", &status);
2481   uint8_t res = 0;
2482 
2483   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2484     blen = u_unescape(caseBitData[i], b, 256);
2485     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2486     if(results[i] != res) {
2487       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2488     }
2489   }
2490 }
2491 #endif
2492 
TestHangulTailoring(void)2493 static void TestHangulTailoring(void) {
2494     static const char *koreanData[] = {
2495         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2496             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2497             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2498             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2499             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2500             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2501     };
2502 
2503     const char *rules =
2504         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2505         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2506         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2507         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2508         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2509         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2510 
2511 
2512   UErrorCode status = U_ZERO_ERROR;
2513   UChar rlz[2048] = { 0 };
2514   uint32_t rlen = u_unescape(rules, rlz, 2048);
2515 
2516   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2517   if(status == U_FILE_ACCESS_ERROR) {
2518     log_data_err("Is your data around?\n");
2519     return;
2520   } else if(U_FAILURE(status)) {
2521     log_err("Error opening collator\n");
2522     return;
2523   }
2524 
2525   log_verbose("Using start of korean rules\n");
2526 
2527   if(U_SUCCESS(status)) {
2528     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2529   } else {
2530     log_err("Unable to open collator with rules %s\n", rules);
2531   }
2532 
2533   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2534   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2535   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2536 
2537   ucol_close(coll);
2538 
2539   log_verbose("Using ko__LOTUS locale\n");
2540   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2541 }
2542 
TestCompressOverlap(void)2543 static void TestCompressOverlap(void) {
2544     UChar       secstr[150];
2545     UChar       tertstr[150];
2546     UErrorCode  status = U_ZERO_ERROR;
2547     UCollator  *coll;
2548     char        result[200];
2549     uint32_t    resultlen;
2550     int         count = 0;
2551     char       *tempptr;
2552 
2553     coll = ucol_open("", &status);
2554 
2555     if (U_FAILURE(status)) {
2556         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2557         return;
2558     }
2559     while (count < 149) {
2560         secstr[count] = 0x0020; /* [06, 05, 05] */
2561         tertstr[count] = 0x0020;
2562         count ++;
2563     }
2564 
2565     /* top down compression ----------------------------------- */
2566     secstr[count] = 0x0332; /* [, 87, 05] */
2567     tertstr[count] = 0x3000; /* [06, 05, 07] */
2568 
2569     /* no compression secstr should have 150 secondary bytes, tertstr should
2570     have 150 tertiary bytes.
2571     with correct overlapping compression, secstr should have 4 secondary
2572     bytes, tertstr should have > 2 tertiary bytes */
2573     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2574     tempptr = uprv_strchr(result, 1) + 1;
2575     while (*(tempptr + 1) != 1) {
2576         /* the last secondary collation element is not checked since it is not
2577         part of the compression */
2578         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2579             log_err("Secondary compression overlapped\n");
2580         }
2581         tempptr ++;
2582     }
2583 
2584     /* tertiary top/bottom/common for en_US is similar to the secondary
2585     top/bottom/common */
2586     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2587     tempptr = uprv_strrchr(result, 1) + 1;
2588     while (*(tempptr + 1) != 0) {
2589         /* the last secondary collation element is not checked since it is not
2590         part of the compression */
2591         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2592             log_err("Tertiary compression overlapped\n");
2593         }
2594         tempptr ++;
2595     }
2596 
2597     /* bottom up compression ------------------------------------- */
2598     secstr[count] = 0;
2599     tertstr[count] = 0;
2600     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2601     tempptr = uprv_strchr(result, 1) + 1;
2602     while (*(tempptr + 1) != 1) {
2603         /* the last secondary collation element is not checked since it is not
2604         part of the compression */
2605         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2606             log_err("Secondary compression overlapped\n");
2607         }
2608         tempptr ++;
2609     }
2610 
2611     /* tertiary top/bottom/common for en_US is similar to the secondary
2612     top/bottom/common */
2613     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2614     tempptr = uprv_strrchr(result, 1) + 1;
2615     while (*(tempptr + 1) != 0) {
2616         /* the last secondary collation element is not checked since it is not
2617         part of the compression */
2618         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2619             log_err("Tertiary compression overlapped\n");
2620         }
2621         tempptr ++;
2622     }
2623 
2624     ucol_close(coll);
2625 }
2626 
TestCyrillicTailoring(void)2627 static void TestCyrillicTailoring(void) {
2628   static const char *test[] = {
2629     "\\u0410b",
2630       "\\u0410\\u0306a",
2631       "\\u04d0A"
2632   };
2633 
2634     /* Russian overrides contractions, so this test is not valid anymore */
2635     /*genericLocaleStarter("ru", test, 3);*/
2636 
2637     genericLocaleStarter("root", test, 3);
2638     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2639     genericRulesStarter("&Z < \\u0410", test, 3);
2640     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2641     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2642     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2643     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2644 }
2645 
TestSuppressContractions(void)2646 static void TestSuppressContractions(void) {
2647 
2648   static const char *testNoCont2[] = {
2649       "\\u0410\\u0302a",
2650       "\\u0410\\u0306b",
2651       "\\u0410c"
2652   };
2653   static const char *testNoCont[] = {
2654       "a\\u0410",
2655       "A\\u0410\\u0306",
2656       "\\uFF21\\u0410\\u0302"
2657   };
2658 
2659   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2660   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2661 }
2662 
TestContraction(void)2663 static void TestContraction(void) {
2664     const static char *testrules[] = {
2665         "&A = AB / B",
2666         "&A = A\\u0306/\\u0306",
2667         "&c = ch / h"
2668     };
2669     const static UChar testdata[][2] = {
2670         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2671         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2672         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2673     };
2674     const static UChar testdata2[][2] = {
2675         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2676         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2677         {0x0063 /* 'c' */, 0x006C /* 'l' */}
2678     };
2679     const static char *testrules3[] = {
2680         "&z < xyz &xyzw << B",
2681         "&z < xyz &xyz << B / w",
2682         "&z < ch &achm << B",
2683         "&z < ch &a << B / chm",
2684         "&\\ud800\\udc00w << B",
2685         "&\\ud800\\udc00 << B / w",
2686         "&a\\ud800\\udc00m << B",
2687         "&a << B / \\ud800\\udc00m",
2688     };
2689 
2690     UErrorCode  status   = U_ZERO_ERROR;
2691     UCollator  *coll;
2692     UChar       rule[256] = {0};
2693     uint32_t    rlen     = 0;
2694     int         i;
2695 
2696     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2697         UCollationElements *iter1;
2698         int j = 0;
2699         log_verbose("Rule %s for testing\n", testrules[i]);
2700         rlen = u_unescape(testrules[i], rule, 32);
2701         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2702         if (U_FAILURE(status)) {
2703             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2704             return;
2705         }
2706         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2707         if (U_FAILURE(status)) {
2708             log_err("Collation iterator creation failed\n");
2709             return;
2710         }
2711         while (j < 2) {
2712             UCollationElements *iter2 = ucol_openElements(coll,
2713                                                          &(testdata[i][j]),
2714                                                          1, &status);
2715             uint32_t ce;
2716             if (U_FAILURE(status)) {
2717                 log_err("Collation iterator creation failed\n");
2718                 return;
2719             }
2720             ce = ucol_next(iter2, &status);
2721             while (ce != UCOL_NULLORDER) {
2722                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2723                     log_err("Collation elements in contraction split does not match\n");
2724                     return;
2725                 }
2726                 ce = ucol_next(iter2, &status);
2727             }
2728             j ++;
2729             ucol_closeElements(iter2);
2730         }
2731         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2732             log_err("Collation elements not exhausted\n");
2733             return;
2734         }
2735         ucol_closeElements(iter1);
2736         ucol_close(coll);
2737     }
2738 
2739     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2740     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2741     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2742         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2743                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2744                 testdata2[1][1]);
2745         return;
2746     }
2747     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2748         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2749                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2750                 testdata2[2][1]);
2751         return;
2752     }
2753     ucol_close(coll);
2754 
2755     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2756         UCollator          *coll1,
2757                            *coll2;
2758         UCollationElements *iter1,
2759                            *iter2;
2760         UChar               ch = 0x0042 /* 'B' */;
2761         uint32_t            ce;
2762         rlen = u_unescape(testrules3[i], rule, 32);
2763         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2764         rlen = u_unescape(testrules3[i + 1], rule, 32);
2765         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2766         if (U_FAILURE(status)) {
2767             log_err("Collator creation failed %s\n", testrules[i]);
2768             return;
2769         }
2770         iter1 = ucol_openElements(coll1, &ch, 1, &status);
2771         iter2 = ucol_openElements(coll2, &ch, 1, &status);
2772         if (U_FAILURE(status)) {
2773             log_err("Collation iterator creation failed\n");
2774             return;
2775         }
2776         ce = ucol_next(iter1, &status);
2777         if (U_FAILURE(status)) {
2778             log_err("Retrieving ces failed\n");
2779             return;
2780         }
2781         while (ce != UCOL_NULLORDER) {
2782             if (ce != (uint32_t)ucol_next(iter2, &status)) {
2783                 log_err("CEs does not match\n");
2784                 return;
2785             }
2786             ce = ucol_next(iter1, &status);
2787             if (U_FAILURE(status)) {
2788                 log_err("Retrieving ces failed\n");
2789                 return;
2790             }
2791         }
2792         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2793             log_err("CEs not exhausted\n");
2794             return;
2795         }
2796         ucol_closeElements(iter1);
2797         ucol_closeElements(iter2);
2798         ucol_close(coll1);
2799         ucol_close(coll2);
2800     }
2801 }
2802 
TestExpansion(void)2803 static void TestExpansion(void) {
2804     const static char *testrules[] = {
2805         "&J << K / B & K << M",
2806         "&J << K / B << M"
2807     };
2808     const static UChar testdata[][3] = {
2809         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2810         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2811         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2812         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2813         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2814         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2815     };
2816 
2817     UErrorCode  status   = U_ZERO_ERROR;
2818     UCollator  *coll;
2819     UChar       rule[256] = {0};
2820     uint32_t    rlen     = 0;
2821     int         i;
2822 
2823     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2824         int j = 0;
2825         log_verbose("Rule %s for testing\n", testrules[i]);
2826         rlen = u_unescape(testrules[i], rule, 32);
2827         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2828         if (U_FAILURE(status)) {
2829             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2830             return;
2831         }
2832 
2833         for (j = 0; j < 5; j ++) {
2834             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2835         }
2836         ucol_close(coll);
2837     }
2838 }
2839 
2840 #if 0
2841 /* this test tests the current limitations of the engine */
2842 /* it always fail, so it is disabled by default */
2843 static void TestLimitations(void) {
2844   /* recursive expansions */
2845   {
2846     static const char *rule = "&a=b/c&d=c/e";
2847     static const char *tlimit01[] = {"add","b","adf"};
2848     static const char *tlimit02[] = {"aa","b","af"};
2849     log_verbose("recursive expansions\n");
2850     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2851     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2852   }
2853   /* contractions spanning expansions */
2854   {
2855     static const char *rule = "&a<<<c/e&g<<<eh";
2856     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2857     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2858     log_verbose("contractions spanning expansions\n");
2859     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2860     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2861   }
2862   /* normalization: nulls in contractions */
2863   {
2864     static const char *rule = "&a<<<\\u0000\\u0302";
2865     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2866     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2867     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2868     static const UColAttributeValue valOn[] = { UCOL_ON };
2869     static const UColAttributeValue valOff[] = { UCOL_OFF };
2870 
2871     log_verbose("NULL in contractions\n");
2872     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2873     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2874     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2875     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2876 
2877   }
2878   /* normalization: contractions spanning normalization */
2879   {
2880     static const char *rule = "&a<<<\\u0000\\u0302";
2881     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2882     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2883     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2884     static const UColAttributeValue valOn[] = { UCOL_ON };
2885     static const UColAttributeValue valOff[] = { UCOL_OFF };
2886 
2887     log_verbose("contractions spanning normalization\n");
2888     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2889     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2890     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2891     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2892 
2893   }
2894   /* variable top:  */
2895   {
2896     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2897     static const char *rule = "&\\u2010<x<[variable top]=z";
2898     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2899     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2900     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2901     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2902     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2903     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2904     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2905 
2906     log_verbose("variable top\n");
2907     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2908     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2909     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2910     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2911     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2912 
2913   }
2914   /* case level */
2915   {
2916     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2917     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2918     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2919     static const UColAttribute att[] = { UCOL_CASE_FIRST};
2920     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2921     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2922     log_verbose("case level\n");
2923     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2924     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2925     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2926     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2927   }
2928 
2929 }
2930 #endif
2931 
TestBocsuCoverage(void)2932 static void TestBocsuCoverage(void) {
2933   UErrorCode status = U_ZERO_ERROR;
2934   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2935   UChar       test[256] = {0};
2936   uint32_t    tlen     = u_unescape(testString, test, 32);
2937   uint8_t key[256]     = {0};
2938   uint32_t klen         = 0;
2939 
2940   UCollator *coll = ucol_open("", &status);
2941   if(U_SUCCESS(status)) {
2942   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2943 
2944   klen = ucol_getSortKey(coll, test, tlen, key, 256);
2945 
2946   ucol_close(coll);
2947   } else {
2948     log_data_err("Couldn't open UCA\n");
2949   }
2950 }
2951 
TestVariableTopSetting(void)2952 static void TestVariableTopSetting(void) {
2953   UErrorCode status = U_ZERO_ERROR;
2954   const UChar *current = NULL;
2955   uint32_t varTopOriginal = 0, varTop1, varTop2;
2956   UCollator *coll = ucol_open("", &status);
2957   if(U_SUCCESS(status)) {
2958 
2959   uint32_t strength = 0;
2960   uint16_t specs = 0;
2961   uint32_t chOffset = 0;
2962   uint32_t chLen = 0;
2963   uint32_t exOffset = 0;
2964   uint32_t exLen = 0;
2965   uint32_t oldChOffset = 0;
2966   uint32_t oldChLen = 0;
2967   uint32_t oldExOffset = 0;
2968   uint32_t oldExLen = 0;
2969   uint32_t prefixOffset = 0;
2970   uint32_t prefixLen = 0;
2971 
2972   UBool startOfRules = TRUE;
2973   UColTokenParser src;
2974   UColOptionSet opts;
2975 
2976   UChar *rulesCopy = NULL;
2977   uint32_t rulesLen;
2978 
2979   UCollationResult result;
2980 
2981   UChar first[256] = { 0 };
2982   UChar second[256] = { 0 };
2983   UParseError parseError;
2984   int32_t myQ = getTestOption(QUICK_OPTION);
2985 
2986   uprv_memset(&src, 0, sizeof(UColTokenParser));
2987 
2988   src.opts = &opts;
2989 
2990   if(getTestOption(QUICK_OPTION) <= 0) {
2991     setTestOption(QUICK_OPTION, 1);
2992   }
2993 
2994   /* this test will fail when normalization is turned on */
2995   /* therefore we always turn off exhaustive mode for it */
2996   { /* QUICK > 0*/
2997     log_verbose("Slide variable top over UCARules\n");
2998     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2999     rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3000     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3001 
3002     if(U_SUCCESS(status) && rulesLen > 0) {
3003       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3004       src.current = src.source = rulesCopy;
3005       src.end = rulesCopy+rulesLen;
3006       src.extraCurrent = src.end;
3007       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3008 
3009 	  /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3010 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3011       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3012         strength = src.parsedToken.strength;
3013         chOffset = src.parsedToken.charsOffset;
3014         chLen = src.parsedToken.charsLen;
3015         exOffset = src.parsedToken.extensionOffset;
3016         exLen = src.parsedToken.extensionLen;
3017         prefixOffset = src.parsedToken.prefixOffset;
3018         prefixLen = src.parsedToken.prefixLen;
3019         specs = src.parsedToken.flags;
3020 
3021         startOfRules = FALSE;
3022         {
3023           log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3024         }
3025         if(strength == UCOL_PRIMARY) {
3026           status = U_ZERO_ERROR;
3027           varTopOriginal = ucol_getVariableTop(coll, &status);
3028           varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3029           if(U_FAILURE(status)) {
3030             char buffer[256];
3031             char *buf = buffer;
3032             uint32_t i = 0, j;
3033             uint32_t CE = UCOL_NO_MORE_CES;
3034 
3035             /* before we start screaming, let's see if there is a problem with the rules */
3036             UErrorCode collIterateStatus = U_ZERO_ERROR;
3037             collIterate *s = uprv_new_collIterate(&collIterateStatus);
3038             uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3039 
3040             CE = ucol_getNextCE(coll, s, &status);
3041 
3042             for(i = 0; i < oldChLen; i++) {
3043               j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3044               buf += j;
3045             }
3046             if(status == U_PRIMARY_TOO_LONG_ERROR) {
3047               log_verbose("= Expected failure for %s =", buffer);
3048             } else {
3049               if(uprv_collIterateAtEnd(s)) {
3050                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3051                   oldChOffset, u_errorName(status), buffer);
3052               } else {
3053                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3054                   buffer);
3055               }
3056             }
3057             uprv_delete_collIterate(s);
3058           }
3059           varTop2 = ucol_getVariableTop(coll, &status);
3060           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3061             log_err("cannot retrieve set varTop value!\n");
3062             continue;
3063           }
3064 
3065           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3066 
3067             u_strncpy(first, src.source+oldChOffset, oldChLen);
3068             u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3069             u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3070             first[2*oldChLen+chLen] = 0;
3071 
3072             if(oldExLen == 0) {
3073               u_strncpy(second, src.source+chOffset, chLen);
3074               second[chLen] = 0;
3075             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3076               u_strncpy(second, src.source+oldExOffset, oldExLen);
3077               u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3078               u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3079               second[2*oldExLen+chLen] = 0;
3080             }
3081             result = ucol_strcoll(coll, first, -1, second, -1);
3082             if(result == UCOL_EQUAL) {
3083               doTest(coll, first, second, UCOL_EQUAL);
3084             } else {
3085               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3086             }
3087           }
3088         }
3089         if(strength != UCOL_TOK_RESET) {
3090           oldChOffset = chOffset;
3091           oldChLen = chLen;
3092           oldExOffset = exOffset;
3093           oldExLen = exLen;
3094         }
3095       }
3096       status = U_ZERO_ERROR;
3097     }
3098     else {
3099       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3100       return;
3101     }
3102     if (U_FAILURE(status)) {
3103         log_err("Error parsing rules %s\n", u_errorName(status));
3104         return;
3105     }
3106     status = U_ZERO_ERROR;
3107   }
3108 
3109   setTestOption(QUICK_OPTION, myQ);
3110 
3111   log_verbose("Testing setting variable top to contractions\n");
3112   {
3113     /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3114     /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3115     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3116     while(*conts != 0) {
3117       if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */
3118         varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3119       } else {
3120         varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3121       }
3122       if(U_FAILURE(status)) {
3123         if(status == U_PRIMARY_TOO_LONG_ERROR) {
3124           /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3125            * therefore it is not an error when it complains about them. */
3126           log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3127                       *conts, *(conts+1), *(conts+2));
3128         } else {
3129           log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3130                   *conts, *(conts+1), *(conts+2), u_errorName(status));
3131         }
3132         status = U_ZERO_ERROR;
3133       }
3134       conts+=3;
3135     }
3136 
3137     status = U_ZERO_ERROR;
3138 
3139     first[0] = 0x0040;
3140     first[1] = 0x0050;
3141     first[2] = 0x0000;
3142 
3143     ucol_setVariableTop(coll, first, -1, &status);
3144 
3145     if(U_SUCCESS(status)) {
3146       log_err("Invalid contraction succeded in setting variable top!\n");
3147     }
3148 
3149   }
3150 
3151   log_verbose("Test restoring variable top\n");
3152 
3153   status = U_ZERO_ERROR;
3154   ucol_restoreVariableTop(coll, varTopOriginal, &status);
3155   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3156     log_err("Couldn't restore old variable top\n");
3157   }
3158 
3159   log_verbose("Testing calling with error set\n");
3160 
3161   status = U_INTERNAL_PROGRAM_ERROR;
3162   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3163   varTop2 = ucol_getVariableTop(coll, &status);
3164   ucol_restoreVariableTop(coll, varTop2, &status);
3165   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3166   varTop2 = ucol_getVariableTop(NULL, &status);
3167   ucol_restoreVariableTop(NULL, varTop2, &status);
3168   if(status != U_INTERNAL_PROGRAM_ERROR) {
3169     log_err("Bad reaction to passed error!\n");
3170   }
3171   uprv_free(src.source);
3172   ucol_close(coll);
3173   } else {
3174     log_data_err("Couldn't open UCA collator\n");
3175   }
3176 
3177 }
3178 
TestNonChars(void)3179 static void TestNonChars(void) {
3180   static const char *test[] = {
3181       "\\u0000",  /* ignorable */
3182       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
3183       "\\uFDD0", "\\uFDEF",
3184       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
3185       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
3186       "\\U0003FFFE", "\\U0003FFFF",
3187       "\\U0004FFFE", "\\U0004FFFF",
3188       "\\U0005FFFE", "\\U0005FFFF",
3189       "\\U0006FFFE", "\\U0006FFFF",
3190       "\\U0007FFFE", "\\U0007FFFF",
3191       "\\U0008FFFE", "\\U0008FFFF",
3192       "\\U0009FFFE", "\\U0009FFFF",
3193       "\\U000AFFFE", "\\U000AFFFF",
3194       "\\U000BFFFE", "\\U000BFFFF",
3195       "\\U000CFFFE", "\\U000CFFFF",
3196       "\\U000DFFFE", "\\U000DFFFF",
3197       "\\U000EFFFE", "\\U000EFFFF",
3198       "\\U000FFFFE", "\\U000FFFFF",
3199       "\\U0010FFFE", "\\U0010FFFF",
3200       "\\uFFFF"  /* special character with maximum primary weight */
3201   };
3202   UErrorCode status = U_ZERO_ERROR;
3203   UCollator *coll = ucol_open("en_US", &status);
3204 
3205   log_verbose("Test non characters\n");
3206 
3207   if(U_SUCCESS(status)) {
3208     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3209   } else {
3210     log_err_status(status, "Unable to open collator\n");
3211   }
3212 
3213   ucol_close(coll);
3214 }
3215 
TestExtremeCompression(void)3216 static void TestExtremeCompression(void) {
3217   static char *test[4];
3218   int32_t j = 0, i = 0;
3219 
3220   for(i = 0; i<4; i++) {
3221     test[i] = (char *)malloc(2048*sizeof(char));
3222   }
3223 
3224   for(j = 20; j < 500; j++) {
3225     for(i = 0; i<4; i++) {
3226       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3227       test[i][j-1] = (char)('a'+i);
3228       test[i][j] = 0;
3229     }
3230     genericLocaleStarter("en_US", (const char **)test, 4);
3231   }
3232 
3233 
3234   for(i = 0; i<4; i++) {
3235     free(test[i]);
3236   }
3237 }
3238 
3239 #if 0
3240 static void TestExtremeCompression(void) {
3241   static char *test[4];
3242   int32_t j = 0, i = 0;
3243   UErrorCode status = U_ZERO_ERROR;
3244   UCollator *coll = ucol_open("en_US", status);
3245   for(i = 0; i<4; i++) {
3246     test[i] = (char *)malloc(2048*sizeof(char));
3247   }
3248   for(j = 10; j < 2048; j++) {
3249     for(i = 0; i<4; i++) {
3250       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3251       test[i][j-1] = (char)('a'+i);
3252       test[i][j] = 0;
3253     }
3254   }
3255   genericLocaleStarter("en_US", (const char **)test, 4);
3256 
3257   for(j = 10; j < 2048; j++) {
3258     for(i = 0; i<1; i++) {
3259       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3260       test[i][j] = 0;
3261     }
3262   }
3263   for(i = 0; i<4; i++) {
3264     free(test[i]);
3265   }
3266 }
3267 #endif
3268 
TestSurrogates(void)3269 static void TestSurrogates(void) {
3270   static const char *test[] = {
3271     "z","\\ud900\\udc25",  "\\ud805\\udc50",
3272        "\\ud800\\udc00y",  "\\ud800\\udc00r",
3273        "\\ud800\\udc00f",  "\\ud800\\udc00",
3274        "\\ud800\\udc00c", "\\ud800\\udc00b",
3275        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3276        "\\ud800\\udc00a",
3277        "c", "b"
3278   };
3279 
3280   static const char *rule =
3281     "&z < \\ud900\\udc25   < \\ud805\\udc50"
3282        "< \\ud800\\udc00y  < \\ud800\\udc00r"
3283        "< \\ud800\\udc00f  << \\ud800\\udc00"
3284        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3285        "< \\ud800\\udc00a  < c < b" ;
3286 
3287   genericRulesStarter(rule, test, 14);
3288 }
3289 
3290 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)3291 static void TestPrefix(void) {
3292   uint32_t i;
3293 
3294   static const struct {
3295     const char *rules;
3296     const char *data[50];
3297     const uint32_t len;
3298   } tests[] = {
3299     { "&z <<< z|a",
3300       {"zz", "za"}, 2 },
3301 
3302     { "&z <<< z|   a",
3303       {"zz", "za"}, 2 },
3304     { "[strength I]"
3305       "&a=\\ud900\\udc25"
3306       "&z<<<\\ud900\\udc25|a",
3307       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3308   };
3309 
3310 
3311   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3312     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3313   }
3314 }
3315 
3316 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3317 /* JIS X 4061 collation order implementation                                   */
TestNewJapanese(void)3318 static void TestNewJapanese(void) {
3319 
3320   static const char * const test1[] = {
3321       "\\u30b7\\u30e3\\u30fc\\u30ec",
3322       "\\u30b7\\u30e3\\u30a4",
3323       "\\u30b7\\u30e4\\u30a3",
3324       "\\u30b7\\u30e3\\u30ec",
3325       "\\u3061\\u3087\\u3053",
3326       "\\u3061\\u3088\\u3053",
3327       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3328       "\\u3066\\u30fc\\u305f",
3329       "\\u30c6\\u30fc\\u30bf",
3330       "\\u30c6\\u30a7\\u30bf",
3331       "\\u3066\\u3048\\u305f",
3332       "\\u3067\\u30fc\\u305f",
3333       "\\u30c7\\u30fc\\u30bf",
3334       "\\u30c7\\u30a7\\u30bf",
3335       "\\u3067\\u3048\\u305f",
3336       "\\u3066\\u30fc\\u305f\\u30fc",
3337       "\\u30c6\\u30fc\\u30bf\\u30a1",
3338       "\\u30c6\\u30a7\\u30bf\\u30fc",
3339       "\\u3066\\u3047\\u305f\\u3041",
3340       "\\u3066\\u3048\\u305f\\u30fc",
3341       "\\u3067\\u30fc\\u305f\\u30fc",
3342       "\\u30c7\\u30fc\\u30bf\\u30a1",
3343       "\\u3067\\u30a7\\u305f\\u30a1",
3344       "\\u30c7\\u3047\\u30bf\\u3041",
3345       "\\u30c7\\u30a8\\u30bf\\u30a2",
3346       "\\u3072\\u3086",
3347       "\\u3073\\u3085\\u3042",
3348       "\\u3074\\u3085\\u3042",
3349       "\\u3073\\u3085\\u3042\\u30fc",
3350       "\\u30d3\\u30e5\\u30a2\\u30fc",
3351       "\\u3074\\u3085\\u3042\\u30fc",
3352       "\\u30d4\\u30e5\\u30a2\\u30fc",
3353       "\\u30d2\\u30e5\\u30a6",
3354       "\\u30d2\\u30e6\\u30a6",
3355       "\\u30d4\\u30e5\\u30a6\\u30a2",
3356       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3357       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3358       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3359       "\\u3072\\u3085\\u3093",
3360       "\\u3074\\u3085\\u3093",
3361       "\\u3075\\u30fc\\u308a",
3362       "\\u30d5\\u30fc\\u30ea",
3363       "\\u3075\\u3045\\u308a",
3364       "\\u3075\\u30a5\\u308a",
3365       "\\u3075\\u30a5\\u30ea",
3366       "\\u30d5\\u30a6\\u30ea",
3367       "\\u3076\\u30fc\\u308a",
3368       "\\u30d6\\u30fc\\u30ea",
3369       "\\u3076\\u3045\\u308a",
3370       "\\u30d6\\u30a5\\u308a",
3371       "\\u3077\\u3046\\u308a",
3372       "\\u30d7\\u30a6\\u30ea",
3373       "\\u3075\\u30fc\\u308a\\u30fc",
3374       "\\u30d5\\u30a5\\u30ea\\u30fc",
3375       "\\u3075\\u30a5\\u308a\\u30a3",
3376       "\\u30d5\\u3045\\u308a\\u3043",
3377       "\\u30d5\\u30a6\\u30ea\\u30fc",
3378       "\\u3075\\u3046\\u308a\\u3043",
3379       "\\u30d6\\u30a6\\u30ea\\u30a4",
3380       "\\u3077\\u30fc\\u308a\\u30fc",
3381       "\\u3077\\u30a5\\u308a\\u30a4",
3382       "\\u3077\\u3046\\u308a\\u30fc",
3383       "\\u30d7\\u30a6\\u30ea\\u30a4",
3384       "\\u30d5\\u30fd",
3385       "\\u3075\\u309e",
3386       "\\u3076\\u309d",
3387       "\\u3076\\u3075",
3388       "\\u3076\\u30d5",
3389       "\\u30d6\\u3075",
3390       "\\u30d6\\u30d5",
3391       "\\u3076\\u309e",
3392       "\\u3076\\u3077",
3393       "\\u30d6\\u3077",
3394       "\\u3077\\u309d",
3395       "\\u30d7\\u30fd",
3396       "\\u3077\\u3075",
3397 };
3398 
3399   static const char *test2[] = {
3400     "\\u306f\\u309d", /* H\\u309d */
3401     "\\u30cf\\u30fd", /* K\\u30fd */
3402     "\\u306f\\u306f", /* HH */
3403     "\\u306f\\u30cf", /* HK */
3404     "\\u30cf\\u30cf", /* KK */
3405     "\\u306f\\u309e", /* H\\u309e */
3406     "\\u30cf\\u30fe", /* K\\u30fe */
3407     "\\u306f\\u3070", /* HH\\u309b */
3408     "\\u30cf\\u30d0", /* KK\\u309b */
3409     "\\u306f\\u3071", /* HH\\u309c */
3410     "\\u30cf\\u3071", /* KH\\u309c */
3411     "\\u30cf\\u30d1", /* KK\\u309c */
3412     "\\u3070\\u309d", /* H\\u309b\\u309d */
3413     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3414     "\\u3070\\u306f", /* H\\u309bH */
3415     "\\u30d0\\u30cf", /* K\\u309bK */
3416     "\\u3070\\u309e", /* H\\u309b\\u309e */
3417     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3418     "\\u3070\\u3070", /* H\\u309bH\\u309b */
3419     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3420     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3421     "\\u3070\\u3071", /* H\\u309bH\\u309c */
3422     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3423     "\\u3071\\u309d", /* H\\u309c\\u309d */
3424     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3425     "\\u3071\\u306f", /* H\\u309cH */
3426     "\\u30d1\\u30cf", /* K\\u309cK */
3427     "\\u3071\\u3070", /* H\\u309cH\\u309b */
3428     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3429     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3430     "\\u3071\\u3071", /* H\\u309cH\\u309c */
3431     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3432   };
3433   /*
3434   static const char *test3[] = {
3435     "\\u221er\\u221e",
3436     "\\u221eR#",
3437     "\\u221et\\u221e",
3438     "#r\\u221e",
3439     "#R#",
3440     "#t%",
3441     "#T%",
3442     "8t\\u221e",
3443     "8T\\u221e",
3444     "8t#",
3445     "8T#",
3446     "8t%",
3447     "8T%",
3448     "8t8",
3449     "8T8",
3450     "\\u03c9r\\u221e",
3451     "\\u03a9R%",
3452     "rr\\u221e",
3453     "rR\\u221e",
3454     "Rr\\u221e",
3455     "RR\\u221e",
3456     "RT%",
3457     "rt8",
3458     "tr\\u221e",
3459     "tr8",
3460     "TR8",
3461     "tt8",
3462     "\\u30b7\\u30e3\\u30fc\\u30ec",
3463   };
3464   */
3465   static const UColAttribute att[] = { UCOL_STRENGTH };
3466   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3467 
3468   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3469   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3470 
3471   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3472   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3473   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3474   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3475   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3476 }
3477 
TestStrCollIdenticalPrefix(void)3478 static void TestStrCollIdenticalPrefix(void) {
3479   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3480   const char* test[] = {
3481     "ab\\ud9b0\\udc70",
3482     "ab\\ud9b0\\udc71"
3483   };
3484   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3485 }
3486 /* Contractions should have all their canonically equivalent */
3487 /* strings included */
TestContractionClosure(void)3488 static void TestContractionClosure(void) {
3489   static const struct {
3490     const char *rules;
3491     const char *data[10];
3492     const uint32_t len;
3493   } tests[] = {
3494     {   "&b=\\u00e4\\u00e4",
3495       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3496     {   "&b=\\u00C5",
3497       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3498   };
3499   uint32_t i;
3500 
3501 
3502   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3503     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3504   }
3505 }
3506 
3507 /* This tests also fails*/
TestBeforePrefixFailure(void)3508 static void TestBeforePrefixFailure(void) {
3509   static const struct {
3510     const char *rules;
3511     const char *data[10];
3512     const uint32_t len;
3513   } tests[] = {
3514     { "&g <<< a"
3515       "&[before 3]\\uff41 <<< x",
3516       {"x", "\\uff41"}, 2 },
3517     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3518         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3519         "&[before 3]\\u30a7<<<\\u30a9",
3520       {"\\u30a9", "\\u30a7"}, 2 },
3521     {   "&[before 3]\\u30a7<<<\\u30a9"
3522         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3523         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3524       {"\\u30a9", "\\u30a7"}, 2 },
3525   };
3526   uint32_t i;
3527 
3528 
3529   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3530     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3531   }
3532 
3533 #if 0
3534   const char* rule1 =
3535         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3536         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3537         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3538   const char* rule2 =
3539         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3540         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3541         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3542   const char* test[] = {
3543       "\\u30c6\\u30fc\\u30bf",
3544       "\\u30c6\\u30a7\\u30bf",
3545   };
3546   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3547   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3548 /* this piece of code should be in some sort of verbose mode     */
3549 /* it gets the collation elements for elements and prints them   */
3550 /* This is useful when trying to see whether the problem is      */
3551   {
3552     UErrorCode status = U_ZERO_ERROR;
3553     uint32_t i = 0;
3554     UCollationElements *it = NULL;
3555     uint32_t CE;
3556     UChar string[256];
3557     uint32_t uStringLen;
3558     UCollator *coll = NULL;
3559 
3560     uStringLen = u_unescape(rule1, string, 256);
3561 
3562     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3563 
3564     /*coll = ucol_open("ja_JP_JIS", &status);*/
3565     it = ucol_openElements(coll, string, 0, &status);
3566 
3567     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3568       log_verbose("%s\n", test[i]);
3569       uStringLen = u_unescape(test[i], string, 256);
3570       ucol_setText(it, string, uStringLen, &status);
3571 
3572       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3573         log_verbose("%08X\n", CE);
3574       }
3575       log_verbose("\n");
3576 
3577     }
3578 
3579     ucol_closeElements(it);
3580     ucol_close(coll);
3581   }
3582 #endif
3583 }
3584 
TestPrefixCompose(void)3585 static void TestPrefixCompose(void) {
3586   const char* rule1 =
3587         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3588   /*
3589   const char* test[] = {
3590       "\\u30c6\\u30fc\\u30bf",
3591       "\\u30c6\\u30a7\\u30bf",
3592   };
3593   */
3594   {
3595     UErrorCode status = U_ZERO_ERROR;
3596     /*uint32_t i = 0;*/
3597     /*UCollationElements *it = NULL;*/
3598 /*    uint32_t CE;*/
3599     UChar string[256];
3600     uint32_t uStringLen;
3601     UCollator *coll = NULL;
3602 
3603     uStringLen = u_unescape(rule1, string, 256);
3604 
3605     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3606     ucol_close(coll);
3607   }
3608 
3609 
3610 }
3611 
3612 /*
3613 [last variable] last variable value
3614 [last primary ignorable] largest CE for primary ignorable
3615 [last secondary ignorable] largest CE for secondary ignorable
3616 [last tertiary ignorable] largest CE for tertiary ignorable
3617 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3618 */
3619 
TestRuleOptions(void)3620 static void TestRuleOptions(void) {
3621   /* values here are hardcoded and are correct for the current UCA
3622    * when the UCA changes, one might be forced to change these
3623    * values.
3624    */
3625 
3626   /*
3627    * These strings contain the last character before [variable top]
3628    * and the first and second characters (by primary weights) after it.
3629    * See FractionalUCA.txt. For example:
3630       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3631       [variable top = 0C FE]
3632       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3633      and
3634       00B4; [0D 0C, 05, 05]
3635    *
3636    * Note: Starting with UCA 6.0, the [variable top] collation element
3637    * is not the weight of any character or string,
3638    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3639    */
3640 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3641 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3642 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3643 
3644   /*
3645    * This string has to match the character that has the [last regular] weight
3646    * which changes with each UCA version.
3647    * See the bottom of FractionalUCA.txt which says something like
3648       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3649    *
3650    * Note: Starting with UCA 6.0, the [last regular] collation element
3651    * is not the weight of any character or string,
3652    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3653    */
3654 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3655 
3656   static const struct {
3657     const char *rules;
3658     const char *data[10];
3659     const uint32_t len;
3660   } tests[] = {
3661     /* - all befores here amount to zero */
3662     { "&[before 3][first tertiary ignorable]<<<a",
3663         { "\\u0000", "a"}, 2
3664     }, /* you cannot go before first tertiary ignorable */
3665 
3666     { "&[before 3][last tertiary ignorable]<<<a",
3667         { "\\u0000", "a"}, 2
3668     }, /* you cannot go before last tertiary ignorable */
3669 
3670     { "&[before 3][first secondary ignorable]<<<a",
3671         { "\\u0000", "a"}, 2
3672     }, /* you cannot go before first secondary ignorable */
3673 
3674     { "&[before 3][last secondary ignorable]<<<a",
3675         { "\\u0000", "a"}, 2
3676     }, /* you cannot go before first secondary ignorable */
3677 
3678     /* 'normal' befores */
3679 
3680     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3681         {  "c", "b", "\\u0332", "a" }, 4
3682     },
3683 
3684     /* we don't have a code point that corresponds to
3685      * the last primary ignorable
3686      */
3687     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3688         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3689     },
3690 
3691     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3692         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3693     },
3694 
3695     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3696         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3697     },
3698 
3699     { "&[first regular]<a"
3700       "&[before 1][first regular]<b",
3701       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3702     },
3703 
3704     { "&[before 1][last regular]<b"
3705       "&[last regular]<a",
3706         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3707     },
3708 
3709     { "&[before 1][first implicit]<b"
3710       "&[first implicit]<a",
3711         { "b", "\\u4e00", "a", "\\u4e01"}, 4
3712     },
3713 
3714     { "&[before 1][last implicit]<b"
3715       "&[last implicit]<a",
3716         { "b", "\\U0010FFFD", "a" }, 3
3717     },
3718 
3719     { "&[last variable]<z"
3720       "&[last primary ignorable]<x"
3721       "&[last secondary ignorable]<<y"
3722       "&[last tertiary ignorable]<<<w"
3723       "&[top]<u",
3724       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3725     }
3726 
3727   };
3728   uint32_t i;
3729 
3730   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3731     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3732   }
3733 }
3734 
3735 
TestOptimize(void)3736 static void TestOptimize(void) {
3737   /* this is not really a test - just trying out
3738    * whether copying of UCA contents will fail
3739    * Cannot really test, since the functionality
3740    * remains the same.
3741    */
3742   static const struct {
3743     const char *rules;
3744     const char *data[10];
3745     const uint32_t len;
3746   } tests[] = {
3747     /* - all befores here amount to zero */
3748     { "[optimize [\\uAC00-\\uD7FF]]",
3749     { "a", "b"}, 2}
3750   };
3751   uint32_t i;
3752 
3753   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3754     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3755   }
3756 }
3757 
3758 /*
3759 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3760 weiv    ucol_strcollIter?
3761 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3762 weiv    these are the input strings?
3763 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3764 weiv    will check - could be a problem with utf-8 iterator
3765 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3766 weiv    hmmm
3767 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3768 weiv    that doesn't sound right
3769 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3770 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3771 cycheng@ca.ibm.c... yes
3772 weiv    and then do the comparison
3773 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3774 weiv    utf-16 strings look like a little endian ones in the example you sent me
3775 weiv    It could be a bug - let me try to test it out
3776 cycheng@ca.ibm.c... ok
3777 cycheng@ca.ibm.c... we can wait till the conf. call
3778 cycheng@ca.ibm.c... next weke
3779 weiv    that would be great
3780 weiv    hmmm
3781 weiv    I might be wrong
3782 weiv    let me play with it some more
3783 cycheng@ca.ibm.c... ok
3784 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3785 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3786 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3787 weiv    ok
3788 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3789 weiv    thanks
3790 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3791 */
3792 #if 0
3793 static void Alexis(void) {
3794   UErrorCode status = U_ZERO_ERROR;
3795   UCollator *coll = ucol_open("", &status);
3796 
3797 
3798   const char utf16be[2][4] = {
3799     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3800     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3801   };
3802 
3803   const char utf8[2][4] = {
3804     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3805     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3806   };
3807 
3808   UCharIterator iterU161, iterU162;
3809   UCharIterator iterU81, iterU82;
3810 
3811   UCollationResult resU16, resU8;
3812 
3813   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3814   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3815 
3816   uiter_setUTF8(&iterU81, utf8[0], 4);
3817   uiter_setUTF8(&iterU82, utf8[1], 4);
3818 
3819   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3820 
3821   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3822   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3823 
3824 
3825   if(resU16 != resU8) {
3826     log_err("different results\n");
3827   }
3828 
3829   ucol_close(coll);
3830 }
3831 #endif
3832 
3833 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)3834 static void Alexis2(void) {
3835   UErrorCode status = U_ZERO_ERROR;
3836   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3837   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3838   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3839   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3840 
3841   UConverter *conv = NULL;
3842 
3843   UCharIterator U16BEItS, U16BEItT;
3844   UCharIterator U8ItS, U8ItT;
3845 
3846   UCollationResult resU16, resU16BE, resU8;
3847 
3848   static const char* const pairs[][2] = {
3849     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3850     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3851     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3852     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3853     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3854     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3855     { "\\u0020", "\\u0020\\u0000"}
3856 /*
3857 5F20 (my result here)
3858 5F204E008E3F
3859 5F20 (your result here)
3860 */
3861   };
3862 
3863   int32_t i = 0;
3864 
3865   UCollator *coll = ucol_open("", &status);
3866   if(status == U_FILE_ACCESS_ERROR) {
3867     log_data_err("Is your data around?\n");
3868     return;
3869   } else if(U_FAILURE(status)) {
3870     log_err("Error opening collator\n");
3871     return;
3872   }
3873   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3874   conv = ucnv_open("UTF16BE", &status);
3875   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3876     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3877     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3878 
3879     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3880 
3881     log_verbose("Result of strcoll is %i\n", resU16);
3882 
3883     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3884     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3885 
3886     /* use the original sizes, as the result from converter is in bytes */
3887     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3888     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3889 
3890     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3891 
3892     log_verbose("Result of U16BE is %i\n", resU16BE);
3893 
3894     if(resU16 != resU16BE) {
3895       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3896     }
3897 
3898     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3899     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3900 
3901     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3902     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3903 
3904     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3905 
3906     if(resU16 != resU8) {
3907       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3908     }
3909 
3910   }
3911 
3912   ucol_close(coll);
3913   ucnv_close(conv);
3914 }
3915 
TestHebrewUCA(void)3916 static void TestHebrewUCA(void) {
3917   UErrorCode status = U_ZERO_ERROR;
3918   static const char *first[] = {
3919     "d790d6b8d79cd795d6bcd7a9",
3920     "d790d79cd79ed7a7d799d799d7a1",
3921     "d790d6b4d79ed795d6bcd7a9",
3922   };
3923 
3924   char utf8String[3][256];
3925   UChar utf16String[3][256];
3926 
3927   int32_t i = 0, j = 0;
3928   int32_t sizeUTF8[3];
3929   int32_t sizeUTF16[3];
3930 
3931   UCollator *coll = ucol_open("", &status);
3932   if (U_FAILURE(status)) {
3933       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3934       return;
3935   }
3936   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3937 
3938   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3939     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3940     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3941     log_verbose("%i: ");
3942     for(j = 0; j < sizeUTF16[i]; j++) {
3943       /*log_verbose("\\u%04X", utf16String[i][j]);*/
3944       log_verbose("%04X", utf16String[i][j]);
3945     }
3946     log_verbose("\n");
3947   }
3948   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3949     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3950       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3951     }
3952   }
3953 
3954   ucol_close(coll);
3955 
3956 }
3957 
TestPartialSortKeyTermination(void)3958 static void TestPartialSortKeyTermination(void) {
3959   static const char* cases[] = {
3960     "\\u1234\\u1234\\udc00",
3961     "\\udc00\\ud800\\ud800"
3962   };
3963 
3964   int32_t i = sizeof(UCollator);
3965 
3966   UErrorCode status = U_ZERO_ERROR;
3967 
3968   UCollator *coll = ucol_open("", &status);
3969 
3970   UCharIterator iter;
3971 
3972   UChar currCase[256];
3973   int32_t length = 0;
3974   int32_t pKeyLen = 0;
3975 
3976   uint8_t key[256];
3977 
3978   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3979     uint32_t state[2] = {0, 0};
3980     length = u_unescape(cases[i], currCase, 256);
3981     uiter_setString(&iter, currCase, length);
3982     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3983 
3984     log_verbose("Done\n");
3985 
3986   }
3987   ucol_close(coll);
3988 }
3989 
TestSettings(void)3990 static void TestSettings(void) {
3991   static const char* cases[] = {
3992     "apple",
3993       "Apple"
3994   };
3995 
3996   static const char* locales[] = {
3997     "",
3998       "en"
3999   };
4000 
4001   UErrorCode status = U_ZERO_ERROR;
4002 
4003   int32_t i = 0, j = 0;
4004 
4005   UChar source[256], target[256];
4006   int32_t sLen = 0, tLen = 0;
4007 
4008   UCollator *collateObject = NULL;
4009   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4010     collateObject = ucol_open(locales[i], &status);
4011     ucol_setStrength(collateObject, UCOL_PRIMARY);
4012     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4013     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4014       sLen = u_unescape(cases[j-1], source, 256);
4015       source[sLen] = 0;
4016       tLen = u_unescape(cases[j], target, 256);
4017       source[tLen] = 0;
4018       doTest(collateObject, source, target, UCOL_EQUAL);
4019     }
4020     ucol_close(collateObject);
4021   }
4022 }
4023 
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)4024 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4025     UErrorCode status = U_ZERO_ERROR;
4026     int32_t errorNo = 0;
4027     /*const UChar *sourceRules = NULL;*/
4028     /*int32_t sourceRulesLen = 0;*/
4029     UColAttributeValue french = UCOL_OFF;
4030     int32_t cloneSize = 0;
4031 
4032     if(!ucol_equals(source, target)) {
4033         log_err("Same collators, different address not equal\n");
4034         errorNo++;
4035     }
4036     ucol_close(target);
4037     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4038         /* currently, safeClone is implemented through getRules/openRules
4039         * so it is the same as the test below - I will comment that test out.
4040         */
4041         /* real thing */
4042         target = ucol_safeClone(source, NULL, &cloneSize, &status);
4043         if(U_FAILURE(status)) {
4044             log_err("Error creating clone\n");
4045             errorNo++;
4046             return errorNo;
4047         }
4048         if(!ucol_equals(source, target)) {
4049             log_err("Collator different from it's clone\n");
4050             errorNo++;
4051         }
4052         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4053         if(french == UCOL_ON) {
4054             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4055         } else {
4056             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4057         }
4058         if(U_FAILURE(status)) {
4059             log_err("Error setting attributes\n");
4060             errorNo++;
4061             return errorNo;
4062         }
4063         if(ucol_equals(source, target)) {
4064             log_err("Collators same even when options changed\n");
4065             errorNo++;
4066         }
4067         ucol_close(target);
4068         /* commented out since safeClone uses exactly the same technique */
4069         /*
4070         sourceRules = ucol_getRules(source, &sourceRulesLen);
4071         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4072         if(U_FAILURE(status)) {
4073         log_err("Error instantiating target from rules\n");
4074         errorNo++;
4075         return errorNo;
4076         }
4077         if(!ucol_equals(source, target)) {
4078         log_err("Collator different from collator that was created from the same rules\n");
4079         errorNo++;
4080         }
4081         ucol_close(target);
4082         */
4083     }
4084     return errorNo;
4085 }
4086 
4087 
TestEquals(void)4088 static void TestEquals(void) {
4089     /* ucol_equals is not currently a public API. There is a chance that it will become
4090     * something like this, but currently it is only used by RuleBasedCollator::operator==
4091     */
4092     /* test whether the two collators instantiated from the same locale are equal */
4093     UErrorCode status = U_ZERO_ERROR;
4094     UParseError parseError;
4095     int32_t noOfLoc = uloc_countAvailable();
4096     const char *locName = NULL;
4097     UCollator *source = NULL, *target = NULL;
4098     int32_t i = 0;
4099 
4100     const char* rules[] = {
4101         "&l < lj <<< Lj <<< LJ",
4102         "&n < nj <<< Nj <<< NJ",
4103         "&ae <<< \\u00e4",
4104         "&AE <<< \\u00c4"
4105     };
4106     /*
4107     const char* badRules[] = {
4108     "&l <<< Lj",
4109     "&n < nj <<< nJ <<< NJ",
4110     "&a <<< \\u00e4",
4111     "&AE <<< \\u00c4 <<< x"
4112     };
4113     */
4114 
4115     UChar sourceRules[1024], targetRules[1024];
4116     int32_t sourceRulesSize = 0, targetRulesSize = 0;
4117     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4118 
4119     for(i = 0; i < rulesSize; i++) {
4120         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4121         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4122     }
4123 
4124     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4125     if(status == U_FILE_ACCESS_ERROR) {
4126         log_data_err("Is your data around?\n");
4127         return;
4128     } else if(U_FAILURE(status)) {
4129         log_err("Error opening collator\n");
4130         return;
4131     }
4132     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4133     if(!ucol_equals(source, target)) {
4134         log_err("Equivalent collators not equal!\n");
4135     }
4136     ucol_close(source);
4137     ucol_close(target);
4138 
4139     source = ucol_open("root", &status);
4140     target = ucol_open("root", &status);
4141     log_verbose("Testing root\n");
4142     if(!ucol_equals(source, source)) {
4143         log_err("Same collator not equal\n");
4144     }
4145     if(TestEqualsForCollator(locName, source, target)) {
4146         log_err("Errors for root\n", locName);
4147     }
4148     ucol_close(source);
4149 
4150     for(i = 0; i<noOfLoc; i++) {
4151         status = U_ZERO_ERROR;
4152         locName = uloc_getAvailable(i);
4153         /*if(hasCollationElements(locName)) {*/
4154         log_verbose("Testing equality for locale %s\n", locName);
4155         source = ucol_open(locName, &status);
4156         target = ucol_open(locName, &status);
4157         if (U_FAILURE(status)) {
4158             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
4159             continue;
4160         }
4161         if(TestEqualsForCollator(locName, source, target)) {
4162             log_err("Errors for locale %s\n", locName);
4163         }
4164         ucol_close(source);
4165         /*}*/
4166     }
4167 }
4168 
TestJ2726(void)4169 static void TestJ2726(void) {
4170     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4171     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4172     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4173     UErrorCode status = U_ZERO_ERROR;
4174     UCollator *coll = ucol_open("en", &status);
4175     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4176     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4177     doTest(coll, a, aSpace, UCOL_EQUAL);
4178     doTest(coll, aSpace, a, UCOL_EQUAL);
4179     doTest(coll, a, spaceA, UCOL_EQUAL);
4180     doTest(coll, spaceA, a, UCOL_EQUAL);
4181     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4182     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4183     ucol_close(coll);
4184 }
4185 
NullRule(void)4186 static void NullRule(void) {
4187     UChar r[3] = {0};
4188     UErrorCode status = U_ZERO_ERROR;
4189     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4190     if(U_SUCCESS(status)) {
4191         log_err("This should have been an error!\n");
4192         ucol_close(coll);
4193     } else {
4194         status = U_ZERO_ERROR;
4195     }
4196     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4197     if(U_FAILURE(status)) {
4198         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4199     } else {
4200         ucol_close(coll);
4201     }
4202 }
4203 
4204 /**
4205  * Test for CollationElementIterator previous and next for the whole set of
4206  * unicode characters with normalization on.
4207  */
TestNumericCollation(void)4208 static void TestNumericCollation(void)
4209 {
4210     UErrorCode status = U_ZERO_ERROR;
4211 
4212     const static char *basicTestStrings[]={
4213     "hello1",
4214     "hello2",
4215     "hello2002",
4216     "hello2003",
4217     "hello123456",
4218     "hello1234567",
4219     "hello10000000",
4220     "hello100000000",
4221     "hello1000000000",
4222     "hello10000000000",
4223     };
4224 
4225     const static char *preZeroTestStrings[]={
4226     "avery10000",
4227     "avery010000",
4228     "avery0010000",
4229     "avery00010000",
4230     "avery000010000",
4231     "avery0000010000",
4232     "avery00000010000",
4233     "avery000000010000",
4234     };
4235 
4236     const static char *thirtyTwoBitNumericStrings[]={
4237     "avery42949672960",
4238     "avery42949672961",
4239     "avery42949672962",
4240     "avery429496729610"
4241     };
4242 
4243      const static char *longNumericStrings[]={
4244      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4245         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4246         are treated as multiple collation elements. */
4247     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4248     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4249     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4250     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4251     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4252     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4253     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4254     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4255     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4256     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4257     };
4258 
4259     const static char *supplementaryDigits[] = {
4260       "\\uD835\\uDFCE", /* 0 */
4261       "\\uD835\\uDFCF", /* 1 */
4262       "\\uD835\\uDFD0", /* 2 */
4263       "\\uD835\\uDFD1", /* 3 */
4264       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4265       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4266       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4267       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4268       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4269       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4270     };
4271 
4272     const static char *foreignDigits[] = {
4273       "\\u0661",
4274         "\\u0662",
4275         "\\u0663",
4276       "\\u0661\\u0660",
4277       "\\u0661\\u0662",
4278       "\\u0661\\u0663",
4279       "\\u0662\\u0660",
4280       "\\u0662\\u0662",
4281       "\\u0662\\u0663",
4282       "\\u0663\\u0660",
4283       "\\u0663\\u0662",
4284       "\\u0663\\u0663"
4285     };
4286 
4287     const static char *evenZeroes[] = {
4288       "2000",
4289       "2001",
4290         "2002",
4291         "2003"
4292     };
4293 
4294     UColAttribute att = UCOL_NUMERIC_COLLATION;
4295     UColAttributeValue val = UCOL_ON;
4296 
4297     /* Open our collator. */
4298     UCollator* coll = ucol_open("root", &status);
4299     if (U_FAILURE(status)){
4300         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4301               myErrorName(status));
4302         return;
4303     }
4304     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4305     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4306     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4307     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4308     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4309     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4310 
4311     /* Setting up our collator to do digits. */
4312     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4313     if (U_FAILURE(status)){
4314         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4315               myErrorName(status));
4316         return;
4317     }
4318 
4319     /*
4320        Testing that prepended zeroes still yield the correct collation behavior.
4321        We expect that every element in our strings array will be equal.
4322     */
4323     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4324 
4325     ucol_close(coll);
4326 }
4327 
TestTibetanConformance(void)4328 static void TestTibetanConformance(void)
4329 {
4330     const char* test[] = {
4331         "\\u0FB2\\u0591\\u0F71\\u0061",
4332         "\\u0FB2\\u0F71\\u0061"
4333     };
4334 
4335     UErrorCode status = U_ZERO_ERROR;
4336     UCollator *coll = ucol_open("", &status);
4337     UChar source[100];
4338     UChar target[100];
4339     int result;
4340     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4341     if (U_SUCCESS(status)) {
4342         u_unescape(test[0], source, 100);
4343         u_unescape(test[1], target, 100);
4344         doTest(coll, source, target, UCOL_EQUAL);
4345         result = ucol_strcoll(coll, source, -1,   target, -1);
4346         log_verbose("result %d\n", result);
4347         if (UCOL_EQUAL != result) {
4348             log_err("Tibetan comparison error\n");
4349         }
4350     }
4351     ucol_close(coll);
4352 
4353     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4354 }
4355 
TestPinyinProblem(void)4356 static void TestPinyinProblem(void) {
4357     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4358     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4359 }
4360 
4361 #define TST_UCOL_MAX_INPUT 0x220001
4362 #define topByte 0xFF000000;
4363 #define bottomByte 0xFF;
4364 #define fourBytes 0xFFFFFFFF;
4365 
4366 
showImplicit(UChar32 i)4367 static void showImplicit(UChar32 i) {
4368     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4369         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4370     }
4371 }
4372 
TestImplicitGeneration(void)4373 static void TestImplicitGeneration(void) {
4374     UErrorCode status = U_ZERO_ERROR;
4375     UChar32 last = 0;
4376     UChar32 current;
4377     UChar32 i = 0, j = 0;
4378     UChar32 roundtrip = 0;
4379     UChar32 lastBottom = 0;
4380     UChar32 currentBottom = 0;
4381     UChar32 lastTop = 0;
4382     UChar32 currentTop = 0;
4383 
4384     UCollator *coll = ucol_open("root", &status);
4385     if(U_FAILURE(status)) {
4386         log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4387         return;
4388     }
4389 
4390     uprv_uca_getRawFromImplicit(0xE20303E7);
4391 
4392     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4393         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4394 
4395         /* check that it round-trips AND that all intervening ones are illegal*/
4396         roundtrip = uprv_uca_getRawFromImplicit(current);
4397         if (roundtrip != i) {
4398             log_err("No roundtrip %08X\n", i);
4399         }
4400         if (last != 0) {
4401             for (j = last + 1; j < current; ++j) {
4402                 roundtrip = uprv_uca_getRawFromImplicit(j);
4403                 /* raise an error if it *doesn't* find an error*/
4404                 if (roundtrip != -1) {
4405                     log_err("Fails to recognize illegal %08X\n", j);
4406                 }
4407             }
4408         }
4409         /* now do other consistency checks*/
4410         lastBottom = last & bottomByte;
4411         currentBottom = current & bottomByte;
4412         lastTop = last & topByte;
4413         currentTop = current & topByte;
4414 
4415         /* print out some values for spot-checking*/
4416         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4417             showImplicit(i-3);
4418             showImplicit(i-2);
4419             showImplicit(i-1);
4420             showImplicit(i);
4421             showImplicit(i+1);
4422             showImplicit(i+2);
4423         }
4424         last = current;
4425 
4426         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4427             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4428         }
4429     }
4430     showImplicit(TST_UCOL_MAX_INPUT-2);
4431     showImplicit(TST_UCOL_MAX_INPUT-1);
4432     showImplicit(TST_UCOL_MAX_INPUT);
4433     ucol_close(coll);
4434 }
4435 
4436 /**
4437  * Iterate through the given iterator, checking to see that all the strings
4438  * in the expected array are present.
4439  * @param expected array of strings we expect to see, or NULL
4440  * @param expectedCount number of elements of expected, or 0
4441  */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)4442 static int32_t checkUEnumeration(const char* msg,
4443                                  UEnumeration* iter,
4444                                  const char** expected,
4445                                  int32_t expectedCount) {
4446     UErrorCode ec = U_ZERO_ERROR;
4447     int32_t i = 0, n, j, bit;
4448     int32_t seenMask = 0;
4449 
4450     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4451     n = uenum_count(iter, &ec);
4452     if (!assertSuccess("count", &ec)) return -1;
4453     log_verbose("%s = [", msg);
4454     for (;; ++i) {
4455         const char* s = uenum_next(iter, NULL, &ec);
4456         if (!assertSuccess("snext", &ec) || s == NULL) break;
4457         if (i != 0) log_verbose(",");
4458         log_verbose("%s", s);
4459         /* check expected list */
4460         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4461             if ((seenMask&bit) == 0 &&
4462                 uprv_strcmp(s, expected[j]) == 0) {
4463                 seenMask |= bit;
4464                 break;
4465             }
4466         }
4467     }
4468     log_verbose("] (%d)\n", i);
4469     assertTrue("count verified", i==n);
4470     /* did we see all expected strings? */
4471     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4472         if ((seenMask&bit)!=0) {
4473             log_verbose("Ok: \"%s\" seen\n", expected[j]);
4474         } else {
4475             log_err("FAIL: \"%s\" not seen\n", expected[j]);
4476         }
4477     }
4478     return n;
4479 }
4480 
4481 /**
4482  * Test new API added for separate collation tree.
4483  */
TestSeparateTrees(void)4484 static void TestSeparateTrees(void) {
4485     UErrorCode ec = U_ZERO_ERROR;
4486     UEnumeration *e = NULL;
4487     int32_t n = -1;
4488     UBool isAvailable;
4489     char loc[256];
4490 
4491     static const char* AVAIL[] = { "en", "de" };
4492 
4493     static const char* KW[] = { "collation" };
4494 
4495     static const char* KWVAL[] = { "phonebook", "stroke" };
4496 
4497 #if !UCONFIG_NO_SERVICE
4498     e = ucol_openAvailableLocales(&ec);
4499     if (e != NULL) {
4500         assertSuccess("ucol_openAvailableLocales", &ec);
4501         assertTrue("ucol_openAvailableLocales!=0", e!=0);
4502         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4503         /* Don't need to check n because we check list */
4504         uenum_close(e);
4505     } else {
4506         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4507     }
4508 #endif
4509 
4510     e = ucol_getKeywords(&ec);
4511     if (e != NULL) {
4512         assertSuccess("ucol_getKeywords", &ec);
4513         assertTrue("ucol_getKeywords!=0", e!=0);
4514         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4515         /* Don't need to check n because we check list */
4516         uenum_close(e);
4517     } else {
4518         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4519     }
4520 
4521     e = ucol_getKeywordValues(KW[0], &ec);
4522     if (e != NULL) {
4523         assertSuccess("ucol_getKeywordValues", &ec);
4524         assertTrue("ucol_getKeywordValues!=0", e!=0);
4525         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4526         /* Don't need to check n because we check list */
4527         uenum_close(e);
4528     } else {
4529         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4530     }
4531 
4532     /* Try setting a warning before calling ucol_getKeywordValues */
4533     ec = U_USING_FALLBACK_WARNING;
4534     e = ucol_getKeywordValues(KW[0], &ec);
4535     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4536         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4537         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4538         /* Don't need to check n because we check list */
4539         uenum_close(e);
4540     }
4541 
4542     /*
4543 U_DRAFT int32_t U_EXPORT2
4544 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4545                              const char* locale, UBool* isAvailable,
4546                              UErrorCode* status);
4547 }
4548 */
4549     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4550                                      &isAvailable, &ec);
4551     if (assertSuccess("getFunctionalEquivalent", &ec)) {
4552         assertEquals("getFunctionalEquivalent(de)", "de", loc);
4553         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4554                    isAvailable == TRUE);
4555     }
4556 
4557     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4558                                      &isAvailable, &ec);
4559     if (assertSuccess("getFunctionalEquivalent", &ec)) {
4560         assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
4561         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4562                    isAvailable == TRUE);
4563     }
4564 }
4565 
4566 /* supercedes TestJ784 */
TestBeforePinyin(void)4567 static void TestBeforePinyin(void) {
4568     const static char rules[] = {
4569         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4570         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4571         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4572         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4573         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4574         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4575     };
4576 
4577     const static char *test[] = {
4578         "l\\u0101",
4579         "la",
4580         "l\\u0101n",
4581         "lan ",
4582         "l\\u0113",
4583         "le",
4584         "l\\u0113n",
4585         "len"
4586     };
4587 
4588     const static char *test2[] = {
4589         "x\\u0101",
4590         "x\\u0100",
4591         "X\\u0101",
4592         "X\\u0100",
4593         "x\\u00E1",
4594         "x\\u00C1",
4595         "X\\u00E1",
4596         "X\\u00C1",
4597         "x\\u01CE",
4598         "x\\u01CD",
4599         "X\\u01CE",
4600         "X\\u01CD",
4601         "x\\u00E0",
4602         "x\\u00C0",
4603         "X\\u00E0",
4604         "X\\u00C0",
4605         "xa",
4606         "xA",
4607         "Xa",
4608         "XA",
4609         "x\\u0101x",
4610         "x\\u0100x",
4611         "x\\u00E1x",
4612         "x\\u00C1x",
4613         "x\\u01CEx",
4614         "x\\u01CDx",
4615         "x\\u00E0x",
4616         "x\\u00C0x",
4617         "xax",
4618         "xAx"
4619     };
4620 
4621     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4622     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4623     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4624     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4625 }
4626 
TestBeforeTightening(void)4627 static void TestBeforeTightening(void) {
4628     static const struct {
4629         const char *rules;
4630         UErrorCode expectedStatus;
4631     } tests[] = {
4632         { "&[before 1]a<x", U_ZERO_ERROR },
4633         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4634         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4635         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4636         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4637         { "&[before 2]a<<x",U_ZERO_ERROR },
4638         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4639         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4640         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4641         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4642         { "&[before 3]a<<<x",U_ZERO_ERROR },
4643         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4644         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4645     };
4646 
4647     int32_t i = 0;
4648 
4649     UErrorCode status = U_ZERO_ERROR;
4650     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4651     uint32_t rlen = 0;
4652 
4653     UCollator *coll = NULL;
4654 
4655 
4656     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4657         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4658         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4659         if(status != tests[i].expectedStatus) {
4660             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4661                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4662         }
4663         ucol_close(coll);
4664         status = U_ZERO_ERROR;
4665     }
4666 
4667 }
4668 
4669 #if 0
4670 &m < a
4671 &[before 1] a < x <<< X << q <<< Q < z
4672 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4673 
4674 &m < a
4675 &[before 2] a << x <<< X << q <<< Q < z
4676 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4677 
4678 &m < a
4679 &[before 3] a <<< x <<< X << q <<< Q < z
4680 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4681 
4682 
4683 &m << a
4684 &[before 1] a < x <<< X << q <<< Q < z
4685 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4686 
4687 &m << a
4688 &[before 2] a << x <<< X << q <<< Q < z
4689 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4690 
4691 &m << a
4692 &[before 3] a <<< x <<< X << q <<< Q < z
4693 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4694 
4695 
4696 &m <<< a
4697 &[before 1] a < x <<< X << q <<< Q < z
4698 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4699 
4700 &m <<< a
4701 &[before 2] a << x <<< X << q <<< Q < z
4702 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4703 
4704 &m <<< a
4705 &[before 3] a <<< x <<< X << q <<< Q < z
4706 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4707 
4708 
4709 &[before 1] s < x <<< X << q <<< Q < z
4710 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4711 
4712 &[before 2] s << x <<< X << q <<< Q < z
4713 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4714 
4715 &[before 3] s <<< x <<< X << q <<< Q < z
4716 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4717 
4718 
4719 &[before 1] \u24DC < x <<< X << q <<< Q < z
4720 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4721 
4722 &[before 2] \u24DC << x <<< X << q <<< Q < z
4723 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4724 
4725 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4726 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4727 #endif
4728 
4729 
4730 #if 0
4731 /* requires features not yet supported */
4732 static void TestMoreBefore(void) {
4733     static const struct {
4734         const char* rules;
4735         const char* order[16];
4736         int32_t size;
4737     } tests[] = {
4738         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4739         { "m","M","x","X","q","Q","z","a","n" }, 9},
4740         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4741         { "m","M","x","X","q","Q","a","z","n" }, 9},
4742         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4743         { "m","M","x","X","a","q","Q","z","n" }, 9},
4744         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4745         { "x","X","q","Q","z","m","M","a","n" }, 9},
4746         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4747         { "m","M","x","X","q","Q","a","z","n" }, 9},
4748         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4749         { "m","M","x","X","a","q","Q","z","n" }, 9},
4750         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4751         { "x","X","q","Q","z","n","m","a","M" }, 9},
4752         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4753         { "x","X","q","Q","m","a","M","z","n" }, 9},
4754         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4755         { "m","x","X","a","M","q","Q","z","n" }, 9},
4756         { "&[before 1] s < x <<< X << q <<< Q < z",
4757         { "r","R","x","X","q","Q","z","s","n" }, 9},
4758         { "&[before 2] s << x <<< X << q <<< Q < z",
4759         { "r","R","x","X","q","Q","s","z","n" }, 9},
4760         { "&[before 3] s <<< x <<< X << q <<< Q < z",
4761         { "r","R","x","X","s","q","Q","z","n" }, 9},
4762         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4763         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4764         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4765         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4766         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4767         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4768     };
4769 
4770     int32_t i = 0;
4771 
4772     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4773         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4774     }
4775 }
4776 #endif
4777 
TestTailorNULL(void)4778 static void TestTailorNULL( void ) {
4779     const static char* rule = "&a <<< '\\u0000'";
4780     UErrorCode status = U_ZERO_ERROR;
4781     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4782     uint32_t rlen = 0;
4783     UChar a = 1, null = 0;
4784     UCollationResult res = UCOL_EQUAL;
4785 
4786     UCollator *coll = NULL;
4787 
4788 
4789     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4790     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4791 
4792     if(U_FAILURE(status)) {
4793         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4794     } else {
4795         res = ucol_strcoll(coll, &a, 1, &null, 1);
4796 
4797         if(res != UCOL_LESS) {
4798             log_err("NULL was not tailored properly!\n");
4799         }
4800     }
4801 
4802     ucol_close(coll);
4803 }
4804 
4805 static void
TestUpperFirstQuaternary(void)4806 TestUpperFirstQuaternary(void)
4807 {
4808   const char* tests[] = { "B", "b", "Bb", "bB" };
4809   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4810   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4811   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4812 }
4813 
4814 static void
TestJ4960(void)4815 TestJ4960(void)
4816 {
4817   const char* tests[] = { "\\u00e2T", "aT" };
4818   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4819   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4820   const char* tests2[] = { "a", "A" };
4821   const char* rule = "&[first tertiary ignorable]=A=a";
4822   UColAttribute att2[] = { UCOL_CASE_LEVEL };
4823   UColAttributeValue attVals2[] = { UCOL_ON };
4824   /* Test whether we correctly ignore primary ignorables on case level when */
4825   /* we have only primary & case level */
4826   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4827   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4828   /* and case level */
4829   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4830   /* Test whether completely ignorable letters have case level info (they shouldn't) */
4831   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4832 }
4833 
4834 static void
TestJ5223(void)4835 TestJ5223(void)
4836 {
4837   static const char *test = "this is a test string";
4838   UChar ustr[256];
4839   int32_t ustr_length = u_unescape(test, ustr, 256);
4840   unsigned char sortkey[256];
4841   int32_t sortkey_length;
4842   UErrorCode status = U_ZERO_ERROR;
4843   static UCollator *coll = NULL;
4844   coll = ucol_open("root", &status);
4845   if(U_FAILURE(status)) {
4846     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4847     return;
4848   }
4849   ucol_setStrength(coll, UCOL_PRIMARY);
4850   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4851   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4852   if (U_FAILURE(status)) {
4853     log_err("Failed setting atributes\n");
4854     return;
4855   }
4856   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4857   if (sortkey_length > 256) return;
4858 
4859   /* we mark the position where the null byte should be written in advance */
4860   sortkey[sortkey_length-1] = 0xAA;
4861 
4862   /* we set the buffer size one byte higher than needed */
4863   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4864     sortkey_length+1);
4865 
4866   /* no error occurs (for me) */
4867   if (sortkey[sortkey_length-1] == 0xAA) {
4868     log_err("Hit bug at first try\n");
4869   }
4870 
4871   /* we mark the position where the null byte should be written again */
4872   sortkey[sortkey_length-1] = 0xAA;
4873 
4874   /* this time we set the buffer size to the exact amount needed */
4875   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4876     sortkey_length);
4877 
4878   /* now the trailing null byte is not written */
4879   if (sortkey[sortkey_length-1] == 0xAA) {
4880     log_err("Hit bug at second try\n");
4881   }
4882 
4883   ucol_close(coll);
4884 }
4885 
4886 /* Regression test for Thai partial sort key problem */
4887 static void
TestJ5232(void)4888 TestJ5232(void)
4889 {
4890     const static char *test[] = {
4891         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4892         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4893     };
4894 
4895     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4896 }
4897 
4898 static void
TestJ5367(void)4899 TestJ5367(void)
4900 {
4901     const static char *test[] = { "a", "y" };
4902     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4903     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4904 }
4905 
4906 static void
TestVI5913(void)4907 TestVI5913(void)
4908 {
4909     UErrorCode status = U_ZERO_ERROR;
4910     int32_t i, j;
4911     UCollator *coll =NULL;
4912     uint8_t  resColl[100], expColl[100];
4913     int32_t  rLen, tLen, ruleLen, sLen, kLen;
4914     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
4915     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
4916     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
4917     static const UChar tData[][20]={
4918         {0x1EAC, 0},
4919         {0x0041, 0x0323, 0x0302, 0},
4920         {0x1EA0, 0x0302, 0},
4921         {0x00C2, 0x0323, 0},
4922         {0x1ED8, 0},  /* O with dot and circumflex */
4923         {0x1ECC, 0x0302, 0},
4924         {0x1EB7, 0},
4925         {0x1EA1, 0x0306, 0},
4926     };
4927     static const UChar tailorData[][20]={
4928         {0x1FA2, 0},  /* Omega with 3 combining marks */
4929         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4930         {0x1FF3, 0x0313, 0x0300, 0},
4931         {0x1F60, 0x0300, 0x0345, 0},
4932         {0x1F62, 0x0345, 0},
4933         {0x1FA0, 0x0300, 0},
4934     };
4935     static const UChar tailorData2[][20]={
4936         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
4937         {0x0073, 0x0323, 0x030C, 0},
4938         {0x0073, 0x030C, 0x0323, 0},
4939     };
4940     static const UChar tailorData3[][20]={
4941         {0x007a, 0},  /*  z */
4942         {0x0061, 0x0065, 0},  /*  a + e */
4943         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4944         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
4945         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4946         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
4947         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
4948         {0x00EA, 0},  /* e with circumflex  */
4949     };
4950 
4951     /* Test Vietnamese sort. */
4952     coll = ucol_open("vi", &status);
4953     if(U_FAILURE(status)) {
4954         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4955         return;
4956     }
4957     log_verbose("\n\nVI collation:");
4958     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4959         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4960     }
4961     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4962         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4963     }
4964     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4965         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4966     }
4967     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4968         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4969     }
4970 
4971     for (j=0; j<8; j++) {
4972         tLen = u_strlen(tData[j]);
4973         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4974         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4975         for(i = 0; i<rLen; i++) {
4976             log_verbose(" %02X", resColl[i]);
4977         }
4978     }
4979 
4980     ucol_close(coll);
4981 
4982     /* Test Romanian sort. */
4983     coll = ucol_open("ro", &status);
4984     log_verbose("\n\nRO collation:");
4985     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4986         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4987     }
4988     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4989         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4990     }
4991     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
4992         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4993     }
4994 
4995     for (j=4; j<8; j++) {
4996         tLen = u_strlen(tData[j]);
4997         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4998         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4999         for(i = 0; i<rLen; i++) {
5000             log_verbose(" %02X", resColl[i]);
5001         }
5002     }
5003     ucol_close(coll);
5004 
5005     /* Test the precomposed Greek character with 3 combining marks. */
5006     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5007     ruleLen = u_strlen(rule);
5008     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5009     if (U_FAILURE(status)) {
5010         log_err("ucol_openRules failed with %s\n", u_errorName(status));
5011         return;
5012     }
5013     sLen = u_strlen(tailorData[0]);
5014     for (j=1; j<6; j++) {
5015         tLen = u_strlen(tailorData[j]);
5016         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
5017             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5018         }
5019     }
5020     /* Test getSortKey. */
5021     tLen = u_strlen(tailorData[0]);
5022     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5023     for (j=0; j<6; j++) {
5024         tLen = u_strlen(tailorData[j]);
5025         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5026         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5027             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5028             for(i = 0; i<rLen; i++) {
5029                 log_err(" %02X", resColl[i]);
5030             }
5031         }
5032     }
5033     ucol_close(coll);
5034 
5035     log_verbose("\n\nTailoring test for s with caron:");
5036     ruleLen = u_strlen(rule2);
5037     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5038     tLen = u_strlen(tailorData2[0]);
5039     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5040     for (j=1; j<3; j++) {
5041         tLen = u_strlen(tailorData2[j]);
5042         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5043         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5044             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5045             for(i = 0; i<rLen; i++) {
5046                 log_err(" %02X", resColl[i]);
5047             }
5048         }
5049     }
5050     ucol_close(coll);
5051 
5052     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5053     ruleLen = u_strlen(rule3);
5054     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5055     tLen = u_strlen(tailorData3[3]);
5056     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5057     for (j=4; j<6; j++) {
5058         tLen = u_strlen(tailorData3[j]);
5059         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5060 
5061         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5062             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5063             for(i = 0; i<rLen; i++) {
5064                 log_err(" %02X", resColl[i]);
5065             }
5066         }
5067 
5068         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5069          for(i = 0; i<rLen; i++) {
5070              log_verbose(" %02X", resColl[i]);
5071          }
5072     }
5073     ucol_close(coll);
5074 }
5075 
5076 static void
TestTailor6179(void)5077 TestTailor6179(void)
5078 {
5079     UErrorCode status = U_ZERO_ERROR;
5080     int32_t i;
5081     UCollator *coll =NULL;
5082     uint8_t  resColl[100];
5083     int32_t  rLen, tLen, ruleLen;
5084     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
5085     UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5086             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5087             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5088             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5089     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5090     UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5091             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5092             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5093             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5094             0x3C,0x3C,0x20,0x62,0};
5095 
5096     UChar tData1[][20]={
5097         {0x61, 0},
5098         {0x62, 0},
5099         { 0xFDD0,0x009E, 0}
5100     };
5101     UChar tData2[][20]={
5102             {0x61, 0},
5103             {0x62, 0},
5104             { 0xFDD0,0x009E, 0}
5105      };
5106 
5107     /*
5108      * These values from FractionalUCA.txt will change,
5109      * and need to be updated here.
5110      */
5111     uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};
5112     uint8_t lastPrimaryIgnCE[6]={1, 0xE3, 0xC9, 1, 5, 0};
5113     uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5114     uint8_t lastSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5115 
5116     /* Test [Last Primary ignorable] */
5117 
5118     log_verbose("\n\nTailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b ");
5119     ruleLen = u_strlen(rule1);
5120     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5121     if (U_FAILURE(status)) {
5122         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5123         return;
5124     }
5125     tLen = u_strlen(tData1[0]);
5126     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5127     if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) {
5128         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
5129         for(i = 0; i<rLen; i++) {
5130             log_err(" %02X", resColl[i]);
5131         }
5132     }
5133     tLen = u_strlen(tData1[1]);
5134     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5135     if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) {
5136         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
5137         for(i = 0; i<rLen; i++) {
5138             log_err(" %02X", resColl[i]);
5139         }
5140     }
5141     ucol_close(coll);
5142 
5143 
5144     /* Test [Last Secondary ignorable] */
5145     log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b ");
5146     ruleLen = u_strlen(rule1);
5147     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5148     if (U_FAILURE(status)) {
5149         log_err("Tailoring test: &[last primary ignorable] failed!");
5150         return;
5151     }
5152     tLen = u_strlen(tData2[0]);
5153     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5154     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5155     for(i = 0; i<rLen; i++) {
5156         log_verbose(" %02X", resColl[i]);
5157     }
5158     if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) {
5159         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5160         for(i = 0; i<rLen; i++) {
5161             log_err(" %02X", resColl[i]);
5162         }
5163     }
5164     tLen = u_strlen(tData2[1]);
5165     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5166     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5167     for(i = 0; i<rLen; i++) {
5168         log_verbose(" %02X", resColl[i]);
5169     }
5170     if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) {
5171         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5172         for(i = 0; i<rLen; i++) {
5173             log_err(" %02X", resColl[i]);
5174         }
5175     }
5176     ucol_close(coll);
5177 }
5178 
5179 static void
TestUCAPrecontext(void)5180 TestUCAPrecontext(void)
5181 {
5182     UErrorCode status = U_ZERO_ERROR;
5183     int32_t i, j;
5184     UCollator *coll =NULL;
5185     uint8_t  resColl[100], prevColl[100];
5186     int32_t  rLen, tLen, ruleLen;
5187     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5188     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5189     /* & l middle-dot << a  a is an expansion. */
5190 
5191     UChar tData1[][20]={
5192             { 0xb7, 0},  /* standalone middle dot(0xb7) */
5193             { 0x387, 0}, /* standalone middle dot(0x387) */
5194             { 0x61, 0},  /* a */
5195             { 0x6C, 0},  /* l */
5196             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
5197             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
5198             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5199             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
5200             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5201             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
5202             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
5203      };
5204 
5205     log_verbose("\n\nEN collation:");
5206     coll = ucol_open("en", &status);
5207     if (U_FAILURE(status)) {
5208         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5209         return;
5210     }
5211     for (j=0; j<11; j++) {
5212         tLen = u_strlen(tData1[j]);
5213         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5214         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5215             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5216                     j, tData1[j]);
5217         }
5218         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5219         for(i = 0; i<rLen; i++) {
5220             log_verbose(" %02X", resColl[i]);
5221         }
5222         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5223      }
5224      ucol_close(coll);
5225 
5226 
5227      log_verbose("\n\nJA collation:");
5228      coll = ucol_open("ja", &status);
5229      if (U_FAILURE(status)) {
5230          log_err("Tailoring test: &z <<a|- failed!");
5231          return;
5232      }
5233      for (j=0; j<11; j++) {
5234          tLen = u_strlen(tData1[j]);
5235          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5236          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5237              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5238                      j, tData1[j]);
5239          }
5240          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5241          for(i = 0; i<rLen; i++) {
5242              log_verbose(" %02X", resColl[i]);
5243          }
5244          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5245       }
5246       ucol_close(coll);
5247 
5248 
5249       log_verbose("\n\nTailoring test: & middle dot < a ");
5250       ruleLen = u_strlen(rule1);
5251       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5252       if (U_FAILURE(status)) {
5253           log_err("Tailoring test: & middle dot < a failed!");
5254           return;
5255       }
5256       for (j=0; j<11; j++) {
5257           tLen = u_strlen(tData1[j]);
5258           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5259           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5260               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5261                       j, tData1[j]);
5262           }
5263           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5264           for(i = 0; i<rLen; i++) {
5265               log_verbose(" %02X", resColl[i]);
5266           }
5267           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5268        }
5269        ucol_close(coll);
5270 
5271 
5272        log_verbose("\n\nTailoring test: & l middle-dot << a ");
5273        ruleLen = u_strlen(rule2);
5274        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5275        if (U_FAILURE(status)) {
5276            log_err("Tailoring test: & l middle-dot << a failed!");
5277            return;
5278        }
5279        for (j=0; j<11; j++) {
5280            tLen = u_strlen(tData1[j]);
5281            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5282            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5283                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5284                        j, tData1[j]);
5285            }
5286            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5287                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5288                        j, tData1[j]);
5289            }
5290            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5291            for(i = 0; i<rLen; i++) {
5292                log_verbose(" %02X", resColl[i]);
5293            }
5294            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5295         }
5296         ucol_close(coll);
5297 }
5298 
5299 static void
TestOutOfBuffer5468(void)5300 TestOutOfBuffer5468(void)
5301 {
5302     static const char *test = "\\u4e00";
5303     UChar ustr[256];
5304     int32_t ustr_length = u_unescape(test, ustr, 256);
5305     unsigned char shortKeyBuf[1];
5306     int32_t sortkey_length;
5307     UErrorCode status = U_ZERO_ERROR;
5308     static UCollator *coll = NULL;
5309 
5310     coll = ucol_open("root", &status);
5311     if(U_FAILURE(status)) {
5312       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5313       return;
5314     }
5315     ucol_setStrength(coll, UCOL_PRIMARY);
5316     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5317     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5318     if (U_FAILURE(status)) {
5319       log_err("Failed setting atributes\n");
5320       return;
5321     }
5322 
5323     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5324     if (sortkey_length != 4) {
5325         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
5326     }
5327     log_verbose("length of sortKey is %d", sortkey_length);
5328     ucol_close(coll);
5329 }
5330 
5331 #define TSKC_DATA_SIZE 5
5332 #define TSKC_BUF_SIZE  50
5333 static void
TestSortKeyConsistency(void)5334 TestSortKeyConsistency(void)
5335 {
5336     UErrorCode icuRC = U_ZERO_ERROR;
5337     UCollator* ucol;
5338     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5339 
5340     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5341     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5342     int32_t i, j, i2;
5343 
5344     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5345     if (U_FAILURE(icuRC))
5346     {
5347         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5348         return;
5349     }
5350 
5351     for (i = 0; i < TSKC_DATA_SIZE; i++)
5352     {
5353         UCharIterator uiter;
5354         uint32_t state[2] = { 0, 0 };
5355         int32_t dataLen = i+1;
5356         for (j=0; j<TSKC_BUF_SIZE; j++)
5357             bufFull[i][j] = bufPart[i][j] = 0;
5358 
5359         /* Full sort key */
5360         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5361 
5362         /* Partial sort key */
5363         uiter_setString(&uiter, data, dataLen);
5364         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5365         if (U_FAILURE(icuRC))
5366         {
5367             log_err("ucol_nextSortKeyPart failed\n");
5368             ucol_close(ucol);
5369             return;
5370         }
5371 
5372         for (i2=0; i2<i; i2++)
5373         {
5374             UBool fullMatch = TRUE;
5375             UBool partMatch = TRUE;
5376             for (j=0; j<TSKC_BUF_SIZE; j++)
5377             {
5378                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5379                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5380             }
5381             if (fullMatch != partMatch) {
5382                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5383                                   : "partial key was consistent, but full key changed\n");
5384                 ucol_close(ucol);
5385                 return;
5386             }
5387         }
5388     }
5389 
5390     /*=============================================*/
5391    ucol_close(ucol);
5392 }
5393 
5394 /* ticket: 6101 */
TestCroatianSortKey(void)5395 static void TestCroatianSortKey(void) {
5396     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5397     UErrorCode status = U_ZERO_ERROR;
5398     UCollator *ucol;
5399     UCharIterator iter;
5400 
5401     static const UChar text[] = { 0x0044, 0xD81A };
5402 
5403     size_t length = sizeof(text)/sizeof(*text);
5404 
5405     uint8_t textSortKey[32];
5406     size_t lenSortKey = 32;
5407     size_t actualSortKeyLen;
5408     uint32_t uStateInfo[2] = { 0, 0 };
5409 
5410     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5411     if (U_FAILURE(status)) {
5412         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5413         return;
5414     }
5415 
5416     uiter_setString(&iter, text, length);
5417 
5418     actualSortKeyLen = ucol_nextSortKeyPart(
5419         ucol, &iter, (uint32_t*)uStateInfo,
5420         textSortKey, lenSortKey, &status
5421         );
5422 
5423     if (actualSortKeyLen == lenSortKey) {
5424         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5425     }
5426 
5427     ucol_close(ucol);
5428 }
5429 
5430 /* ticket: 6140 */
5431 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5432  * they are both Hiragana and Katakana
5433  */
5434 #define SORTKEYLEN 50
TestHiragana(void)5435 static void TestHiragana(void) {
5436     UErrorCode status = U_ZERO_ERROR;
5437     UCollator* ucol;
5438     UCollationResult strcollresult;
5439     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5440     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5441     int32_t data1Len = sizeof(data1)/sizeof(*data1);
5442     int32_t data2Len = sizeof(data2)/sizeof(*data2);
5443     int32_t i, j;
5444     uint8_t sortKey1[SORTKEYLEN];
5445     uint8_t sortKey2[SORTKEYLEN];
5446 
5447     UCharIterator uiter1;
5448     UCharIterator uiter2;
5449     uint32_t state1[2] = { 0, 0 };
5450     uint32_t state2[2] = { 0, 0 };
5451     int32_t keySize1;
5452     int32_t keySize2;
5453 
5454     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5455             &status);
5456     if (U_FAILURE(status)) {
5457         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5458         return;
5459     }
5460 
5461     /* Start of full sort keys */
5462     /* Full sort key1 */
5463     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5464     /* Full sort key2 */
5465     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5466     if (keySize1 == keySize2) {
5467         for (i = 0; i < keySize1; i++) {
5468             if (sortKey1[i] != sortKey2[i]) {
5469                 log_err("Full sort keys are different. Should be equal.");
5470             }
5471         }
5472     } else {
5473         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5474     }
5475     /* End of full sort keys */
5476 
5477     /* Start of partial sort keys */
5478     /* Partial sort key1 */
5479     uiter_setString(&uiter1, data1, data1Len);
5480     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5481     /* Partial sort key2 */
5482     uiter_setString(&uiter2, data2, data2Len);
5483     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5484     if (U_SUCCESS(status) && keySize1 == keySize2) {
5485         for (j = 0; j < keySize1; j++) {
5486             if (sortKey1[j] != sortKey2[j]) {
5487                 log_err("Partial sort keys are different. Should be equal");
5488             }
5489         }
5490     } else {
5491         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5492     }
5493     /* End of partial sort keys */
5494 
5495     /* Start of strcoll */
5496     /* Use ucol_strcoll() to determine ordering */
5497     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5498     if (strcollresult != UCOL_EQUAL) {
5499         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5500     }
5501 
5502     ucol_close(ucol);
5503 }
5504 
5505 /* Convenient struct for running collation tests */
5506 typedef struct {
5507   const UChar source[MAX_TOKEN_LEN];  /* String on left */
5508   const UChar target[MAX_TOKEN_LEN];  /* String on right */
5509   UCollationResult result;            /* -1, 0 or +1, depending on collation */
5510 } OneTestCase;
5511 
5512 /*
5513  * Utility function to test one collation test case.
5514  * @param testcases Array of test cases.
5515  * @param n_testcases Size of the array testcases.
5516  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
5517  * @param n_rules Size of the array str_rules.
5518  */
doTestOneTestCase(const OneTestCase testcases[],int n_testcases,const char * str_rules[],int n_rules)5519 static void doTestOneTestCase(const OneTestCase testcases[],
5520                               int n_testcases,
5521                               const char* str_rules[],
5522                               int n_rules)
5523 {
5524   int rule_no, testcase_no;
5525   UChar rule[500];
5526   int32_t length = 0;
5527   UErrorCode status = U_ZERO_ERROR;
5528   UParseError parse_error;
5529   UCollator  *myCollation;
5530 
5531   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5532 
5533     length = u_unescape(str_rules[rule_no], rule, 500);
5534     if (length == 0) {
5535         log_err("ERROR: The rule cannot be unescaped: %s\n");
5536         return;
5537     }
5538     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5539     if(U_FAILURE(status)){
5540         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5541         return;
5542     }
5543     log_verbose("Testing the <<* syntax\n");
5544     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5545     ucol_setStrength(myCollation, UCOL_TERTIARY);
5546     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5547       doTest(myCollation,
5548              testcases[testcase_no].source,
5549              testcases[testcase_no].target,
5550              testcases[testcase_no].result
5551              );
5552     }
5553     ucol_close(myCollation);
5554   }
5555 }
5556 
5557 const static OneTestCase rangeTestcases[] = {
5558   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
5559   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
5560   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
5561 
5562   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
5563   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
5564   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
5565   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
5566   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
5567 
5568   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
5569   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
5570   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
5571   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
5572 
5573   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
5574   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
5575   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
5576   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
5577   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
5578   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
5579   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
5580   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
5581 };
5582 
5583 static int nRangeTestcases = LEN(rangeTestcases);
5584 
5585 const static OneTestCase rangeTestcasesSupplemental[] = {
5586   { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
5587   { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
5588   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
5589   { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5590   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5591   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5592   { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5593 };
5594 
5595 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5596 
5597 const static OneTestCase rangeTestcasesQwerty[] = {
5598   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
5599   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
5600 
5601   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
5602   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
5603 
5604   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
5605   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
5606 
5607   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
5608   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
5609 
5610   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5611     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
5612   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5613     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
5614 };
5615 
5616 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5617 
TestSameStrengthList(void)5618 static void TestSameStrengthList(void)
5619 {
5620   const char* strRules[] = {
5621     /* Normal */
5622     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
5623 
5624     /* Lists */
5625     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5626   };
5627   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5628 }
5629 
TestSameStrengthListQuoted(void)5630 static void TestSameStrengthListQuoted(void)
5631 {
5632   const char* strRules[] = {
5633     /* Lists with quoted characters */
5634     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5635     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5636 
5637     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5638     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5639 
5640     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
5641     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5642   };
5643   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5644 }
5645 
TestSameStrengthListSupplemental(void)5646 static void TestSameStrengthListSupplemental(void)
5647 {
5648   const char* strRules[] = {
5649     "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5650     "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5651     "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5652     "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5653   };
5654   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5655 }
5656 
TestSameStrengthListQwerty(void)5657 static void TestSameStrengthListQwerty(void)
5658 {
5659   const char* strRules[] = {
5660     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5661     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5662     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5663     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5664     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5665 
5666     /* Quoted characters also will work if two quoted characters are not consecutive.  */
5667     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5668 
5669     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5670     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5671 
5672  };
5673   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5674 }
5675 
TestSameStrengthListQuotedQwerty(void)5676 static void TestSameStrengthListQuotedQwerty(void)
5677 {
5678   const char* strRules[] = {
5679     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5680     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5681     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
5682 
5683     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5684     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5685    };
5686   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5687 }
5688 
TestSameStrengthListRanges(void)5689 static void TestSameStrengthListRanges(void)
5690 {
5691   const char* strRules[] = {
5692     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5693   };
5694   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5695 }
5696 
TestSameStrengthListSupplementalRanges(void)5697 static void TestSameStrengthListSupplementalRanges(void)
5698 {
5699   const char* strRules[] = {
5700     "&\\ufffe<*\\uffff-\\U00010002",
5701   };
5702   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5703 }
5704 
TestSpecialCharacters(void)5705 static void TestSpecialCharacters(void)
5706 {
5707   const char* strRules[] = {
5708     /* Normal */
5709     "&';'<'+'<','<'-'<'&'<'*'",
5710 
5711     /* List */
5712     "&';'<*'+,-&*'",
5713 
5714     /* Range */
5715     "&';'<*'+'-'-&*'",
5716   };
5717 
5718   const static OneTestCase specialCharacterStrings[] = {
5719     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
5720     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
5721     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
5722     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
5723   };
5724   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5725 }
5726 
TestPrivateUseCharacters(void)5727 static void TestPrivateUseCharacters(void)
5728 {
5729   const char* strRules[] = {
5730     /* Normal */
5731     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5732     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5733   };
5734 
5735   const static OneTestCase privateUseCharacterStrings[] = {
5736     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5737     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5738     { {0xe2d9}, {0xe2da}, UCOL_LESS },
5739     { {0xe2da}, {0xe2db}, UCOL_LESS },
5740     { {0xe2db}, {0xe2dc}, UCOL_LESS },
5741     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5742   };
5743   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5744 }
5745 
TestPrivateUseCharactersInList(void)5746 static void TestPrivateUseCharactersInList(void)
5747 {
5748   const char* strRules[] = {
5749     /* List */
5750     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5751     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5752     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5753   };
5754 
5755   const static OneTestCase privateUseCharacterStrings[] = {
5756     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5757     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5758     { {0xe2d9}, {0xe2da}, UCOL_LESS },
5759     { {0xe2da}, {0xe2db}, UCOL_LESS },
5760     { {0xe2db}, {0xe2dc}, UCOL_LESS },
5761     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5762   };
5763   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5764 }
5765 
TestPrivateUseCharactersInRange(void)5766 static void TestPrivateUseCharactersInRange(void)
5767 {
5768   const char* strRules[] = {
5769     /* Range */
5770     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5771     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5772     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5773   };
5774 
5775   const static OneTestCase privateUseCharacterStrings[] = {
5776     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5777     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5778     { {0xe2d9}, {0xe2da}, UCOL_LESS },
5779     { {0xe2da}, {0xe2db}, UCOL_LESS },
5780     { {0xe2db}, {0xe2dc}, UCOL_LESS },
5781     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5782   };
5783   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5784 }
5785 
TestInvalidListsAndRanges(void)5786 static void TestInvalidListsAndRanges(void)
5787 {
5788   const char* invalidRules[] = {
5789     /* Range not in starred expression */
5790     "&\\ufffe<\\uffff-\\U00010002",
5791 
5792     /* Range without start */
5793     "&a<*-c",
5794 
5795     /* Range without end */
5796     "&a<*b-",
5797 
5798     /* More than one hyphen */
5799     "&a<*b-g-l",
5800 
5801     /* Range in the wrong order */
5802     "&a<*k-b",
5803 
5804   };
5805 
5806   UChar rule[500];
5807   UErrorCode status = U_ZERO_ERROR;
5808   UParseError parse_error;
5809   int n_rules = LEN(invalidRules);
5810   int rule_no;
5811   int length;
5812   UCollator  *myCollation;
5813 
5814   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5815 
5816     length = u_unescape(invalidRules[rule_no], rule, 500);
5817     if (length == 0) {
5818         log_err("ERROR: The rule cannot be unescaped: %s\n");
5819         return;
5820     }
5821     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5822     if(!U_FAILURE(status)){
5823       log_err("ERROR: Could not cause a failure as expected: \n");
5824     }
5825     status = U_ZERO_ERROR;
5826   }
5827 }
5828 
5829 /*
5830  * This test ensures that characters placed before a character in a different script have the same lead byte
5831  * in their collation key before and after script reordering.
5832  */
TestBeforeRuleWithScriptReordering(void)5833 static void TestBeforeRuleWithScriptReordering(void)
5834 {
5835     UParseError error;
5836     UErrorCode status = U_ZERO_ERROR;
5837     UCollator  *myCollation;
5838     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5839     UChar rules[500];
5840     uint32_t rulesLength = 0;
5841     int32_t reorderCodes[1] = {USCRIPT_GREEK};
5842     UCollationResult collResult;
5843 
5844     uint8_t baseKey[256];
5845     uint32_t baseKeyLength;
5846     uint8_t beforeKey[256];
5847     uint32_t beforeKeyLength;
5848 
5849     UChar base[] = { 0x03b1 }; /* base */
5850     int32_t baseLen = sizeof(base)/sizeof(*base);
5851 
5852     UChar before[] = { 0x0e01 }; /* ko kai */
5853     int32_t beforeLen = sizeof(before)/sizeof(*before);
5854 
5855     /*UChar *data[] = { before, base };
5856     genericRulesStarter(srules, data, 2);*/
5857 
5858     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5859 
5860 
5861     /* build collator */
5862     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5863 
5864     rulesLength = u_unescape(srules, rules, LEN(rules));
5865     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5866     if(U_FAILURE(status)) {
5867         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5868         return;
5869     }
5870 
5871     /* check collation results - before rule applied but not script reordering */
5872     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5873     if (collResult != UCOL_GREATER) {
5874         log_err("Collation result not correct before script reordering = %d\n", collResult);
5875     }
5876 
5877     /* check the lead byte of the collation keys before script reordering */
5878     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5879     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5880     if (baseKey[0] != beforeKey[0]) {
5881       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5882    }
5883 
5884     /* reorder the scripts */
5885     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5886     if(U_FAILURE(status)) {
5887         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5888         return;
5889     }
5890 
5891     /* check collation results - before rule applied and after script reordering */
5892     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5893     if (collResult != UCOL_GREATER) {
5894         log_err("Collation result not correct after script reordering = %d\n", collResult);
5895     }
5896 
5897     /* check the lead byte of the collation keys after script reordering */
5898     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5899     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5900     if (baseKey[0] != beforeKey[0]) {
5901         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5902     }
5903 
5904     ucol_close(myCollation);
5905 }
5906 
5907 /*
5908  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5909  */
TestNonLeadBytesDuringCollationReordering(void)5910 static void TestNonLeadBytesDuringCollationReordering(void)
5911 {
5912     UErrorCode status = U_ZERO_ERROR;
5913     UCollator  *myCollation;
5914     int32_t reorderCodes[1] = {USCRIPT_GREEK};
5915     UCollationResult collResult;
5916 
5917     uint8_t baseKey[256];
5918     uint32_t baseKeyLength;
5919     uint8_t reorderKey[256];
5920     uint32_t reorderKeyLength;
5921 
5922     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5923 
5924     int i;
5925 
5926 
5927     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5928 
5929     /* build collator tertiary */
5930     myCollation = ucol_open("", &status);
5931     ucol_setStrength(myCollation, UCOL_TERTIARY);
5932     if(U_FAILURE(status)) {
5933         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5934         return;
5935     }
5936     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5937 
5938     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5939     if(U_FAILURE(status)) {
5940         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5941         return;
5942     }
5943     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5944 
5945     if (baseKeyLength != reorderKeyLength) {
5946         log_err("Key lengths not the same during reordering.\n", collResult);
5947         return;
5948     }
5949 
5950     for (i = 1; i < baseKeyLength; i++) {
5951         if (baseKey[i] != reorderKey[i]) {
5952             log_err("Collation key bytes not the same at position %d.\n", i);
5953             return;
5954         }
5955     }
5956     ucol_close(myCollation);
5957 
5958     /* build collator quaternary */
5959     myCollation = ucol_open("", &status);
5960     ucol_setStrength(myCollation, UCOL_QUATERNARY);
5961     if(U_FAILURE(status)) {
5962         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5963         return;
5964     }
5965     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5966 
5967     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5968     if(U_FAILURE(status)) {
5969         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5970         return;
5971     }
5972     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5973 
5974     if (baseKeyLength != reorderKeyLength) {
5975         log_err("Key lengths not the same during reordering.\n", collResult);
5976         return;
5977     }
5978 
5979     for (i = 1; i < baseKeyLength; i++) {
5980         if (baseKey[i] != reorderKey[i]) {
5981             log_err("Collation key bytes not the same at position %d.\n", i);
5982             return;
5983         }
5984     }
5985     ucol_close(myCollation);
5986 }
5987 
5988 /*
5989  * Test reordering API.
5990  */
TestReorderingAPI(void)5991 static void TestReorderingAPI(void)
5992 {
5993     UErrorCode status = U_ZERO_ERROR;
5994     UCollator  *myCollation;
5995     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5996     UCollationResult collResult;
5997     int32_t retrievedReorderCodesLength;
5998     UChar greekString[] = { 0x03b1 };
5999     UChar punctuationString[] = { 0x203e };
6000 
6001     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6002 
6003     /* build collator tertiary */
6004     myCollation = ucol_open("", &status);
6005     ucol_setStrength(myCollation, UCOL_TERTIARY);
6006     if(U_FAILURE(status)) {
6007         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6008         return;
6009     }
6010 
6011     /* set the reorderding */
6012     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6013     if (U_FAILURE(status)) {
6014         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6015         return;
6016     }
6017 
6018     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6019     if (status != U_BUFFER_OVERFLOW_ERROR) {
6020         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6021         return;
6022     }
6023     status = U_ZERO_ERROR;
6024     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6025         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6026         return;
6027     }
6028     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6029     if (collResult != UCOL_LESS) {
6030         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6031         return;
6032     }
6033 
6034     /* clear the reordering */
6035     ucol_setReorderCodes(myCollation, NULL, 0, &status);
6036     if (U_FAILURE(status)) {
6037         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6038         return;
6039     }
6040 
6041     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6042     if (retrievedReorderCodesLength != 0) {
6043         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6044         return;
6045     }
6046 
6047     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6048     if (collResult != UCOL_GREATER) {
6049         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6050         return;
6051     }
6052 
6053     ucol_close(myCollation);
6054 }
6055 
6056 /*
6057  * Utility function to test one collation reordering test case.
6058  * @param testcases Array of test cases.
6059  * @param n_testcases Size of the array testcases.
6060  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
6061  * @param n_rules Size of the array str_rules.
6062  */
doTestOneReorderingAPITestCase(const OneTestCase testCases[],uint32_t testCasesLen,const int32_t reorderTokens[],int32_t reorderTokensLen)6063 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6064 {
6065     int testCaseNum;
6066     UErrorCode status = U_ZERO_ERROR;
6067     UCollator  *myCollation;
6068 
6069     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6070         myCollation = ucol_open("", &status);
6071         if (U_FAILURE(status)) {
6072             log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6073             return;
6074         }
6075         ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6076         if(U_FAILURE(status)) {
6077             log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6078             return;
6079         }
6080 
6081         for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6082             doTest(myCollation,
6083                 testCases[testCaseNum].source,
6084                 testCases[testCaseNum].target,
6085                 testCases[testCaseNum].result
6086             );
6087         }
6088         ucol_close(myCollation);
6089     }
6090 }
6091 
TestGreekFirstReorder(void)6092 static void TestGreekFirstReorder(void)
6093 {
6094     const char* strRules[] = {
6095         "[reorder Grek]"
6096     };
6097 
6098     const int32_t apiRules[] = {
6099         USCRIPT_GREEK
6100     };
6101 
6102     const static OneTestCase privateUseCharacterStrings[] = {
6103         { {0x0391}, {0x0391}, UCOL_EQUAL },
6104         { {0x0041}, {0x0391}, UCOL_GREATER },
6105         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6106         { {0x0060}, {0x0391}, UCOL_LESS },
6107         { {0x0391}, {0xe2dc}, UCOL_LESS },
6108         { {0x0391}, {0x0060}, UCOL_GREATER },
6109     };
6110 
6111     /* Test rules creation */
6112     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6113 
6114     /* Test collation reordering API */
6115     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6116 }
6117 
TestGreekLastReorder(void)6118 static void TestGreekLastReorder(void)
6119 {
6120     const char* strRules[] = {
6121         "[reorder Zzzz Grek]"
6122     };
6123 
6124     const int32_t apiRules[] = {
6125         USCRIPT_UNKNOWN, USCRIPT_GREEK
6126     };
6127 
6128     const static OneTestCase privateUseCharacterStrings[] = {
6129         { {0x0391}, {0x0391}, UCOL_EQUAL },
6130         { {0x0041}, {0x0391}, UCOL_LESS },
6131         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6132         { {0x0060}, {0x0391}, UCOL_LESS },
6133         { {0x0391}, {0xe2dc}, UCOL_GREATER },
6134     };
6135 
6136     /* Test rules creation */
6137     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6138 
6139     /* Test collation reordering API */
6140     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6141 }
6142 
TestNonScriptReorder(void)6143 static void TestNonScriptReorder(void)
6144 {
6145     const char* strRules[] = {
6146         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6147     };
6148 
6149     const int32_t apiRules[] = {
6150         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6151         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6152         UCOL_REORDER_CODE_CURRENCY
6153     };
6154 
6155     const static OneTestCase privateUseCharacterStrings[] = {
6156         { {0x0391}, {0x0041}, UCOL_LESS },
6157         { {0x0041}, {0x0391}, UCOL_GREATER },
6158         { {0x0060}, {0x0041}, UCOL_LESS },
6159         { {0x0060}, {0x0391}, UCOL_GREATER },
6160         { {0x0024}, {0x0041}, UCOL_GREATER },
6161     };
6162 
6163     /* Test rules creation */
6164     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6165 
6166     /* Test collation reordering API */
6167     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6168 }
6169 
TestHaniReorder(void)6170 static void TestHaniReorder(void)
6171 {
6172     const char* strRules[] = {
6173         "[reorder Hani]"
6174     };
6175     const int32_t apiRules[] = {
6176         USCRIPT_HAN
6177     };
6178 
6179     const static OneTestCase privateUseCharacterStrings[] = {
6180         { {0x4e00}, {0x0041}, UCOL_LESS },
6181         { {0x4e00}, {0x0060}, UCOL_GREATER },
6182         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6183         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6184         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6185         { {0xfa27}, {0x0041}, UCOL_LESS },
6186         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6187     };
6188 
6189     /* Test rules creation */
6190     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6191 
6192     /* Test collation reordering API */
6193     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6194 }
6195 
compare_uint8_t_arrays(const uint8_t * a,const uint8_t * b)6196 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6197 {
6198   for (; *a == *b; ++a, ++b) {
6199     if (*a == 0) {
6200       return 0;
6201     }
6202   }
6203   return (*a < *b ? -1 : 1);
6204 }
6205 
TestImport(void)6206 static void TestImport(void)
6207 {
6208     UCollator* vicoll;
6209     UCollator* escoll;
6210     UCollator* viescoll;
6211     UCollator* importviescoll;
6212     UParseError error;
6213     UErrorCode status = U_ZERO_ERROR;
6214     UChar* virules;
6215     int32_t viruleslength;
6216     UChar* esrules;
6217     int32_t esruleslength;
6218     UChar* viesrules;
6219     int32_t viesruleslength;
6220     char srules[500] = "[import vi][import es]";
6221     UChar rules[500];
6222     uint32_t length = 0;
6223     int32_t itemCount;
6224     int32_t i, k;
6225     UChar32 start;
6226     UChar32 end;
6227     UChar str[500];
6228     int32_t strLength;
6229 
6230     uint8_t sk1[500];
6231     uint8_t sk2[500];
6232 
6233     UBool b;
6234     USet* tailoredSet;
6235     USet* importTailoredSet;
6236 
6237 
6238     vicoll = ucol_open("vi", &status);
6239     if(U_FAILURE(status)){
6240         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6241         return;
6242     }
6243 
6244     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6245     escoll = ucol_open("es", &status);
6246     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6247     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6248     viesrules[0] = 0;
6249     u_strcat(viesrules, virules);
6250     u_strcat(viesrules, esrules);
6251     viesruleslength = viruleslength + esruleslength;
6252     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6253 
6254     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6255     length = u_unescape(srules, rules, 500);
6256     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6257     if(U_FAILURE(status)){
6258         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6259         return;
6260     }
6261 
6262     tailoredSet = ucol_getTailoredSet(viescoll, &status);
6263     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6264 
6265     if(!uset_equals(tailoredSet, importTailoredSet)){
6266         log_err("Tailored sets not equal");
6267     }
6268 
6269     uset_close(importTailoredSet);
6270 
6271     itemCount = uset_getItemCount(tailoredSet);
6272 
6273     for( i = 0; i < itemCount; i++){
6274         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6275         if(strLength < 2){
6276             for (; start <= end; start++){
6277                 k = 0;
6278                 U16_APPEND(str, k, 500, start, b);
6279                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
6280                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6281                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6282                     log_err("Sort key for %s not equal\n", str);
6283                     break;
6284                 }
6285             }
6286         }else{
6287             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6288             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6289             if(compare_uint8_t_arrays(sk1, sk2) != 0){
6290                 log_err("ZZSort key for %s not equal\n", str);
6291                 break;
6292             }
6293 
6294         }
6295     }
6296 
6297     uset_close(tailoredSet);
6298 
6299     uprv_free(viesrules);
6300 
6301     ucol_close(vicoll);
6302     ucol_close(escoll);
6303     ucol_close(viescoll);
6304     ucol_close(importviescoll);
6305 }
6306 
TestImportWithType(void)6307 static void TestImportWithType(void)
6308 {
6309     UCollator* vicoll;
6310     UCollator* decoll;
6311     UCollator* videcoll;
6312     UCollator* importvidecoll;
6313     UParseError error;
6314     UErrorCode status = U_ZERO_ERROR;
6315     const UChar* virules;
6316     int32_t viruleslength;
6317     const UChar* derules;
6318     int32_t deruleslength;
6319     UChar* viderules;
6320     int32_t videruleslength;
6321     const char srules[500] = "[import vi][import de-u-co-phonebk]";
6322     UChar rules[500];
6323     uint32_t length = 0;
6324     int32_t itemCount;
6325     int32_t i, k;
6326     UChar32 start;
6327     UChar32 end;
6328     UChar str[500];
6329     int32_t strLength;
6330 
6331     uint8_t sk1[500];
6332     uint8_t sk2[500];
6333 
6334     USet* tailoredSet;
6335     USet* importTailoredSet;
6336 
6337     vicoll = ucol_open("vi", &status);
6338     if(U_FAILURE(status)){
6339         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6340         return;
6341     }
6342     virules = ucol_getRules(vicoll, &viruleslength);
6343     /* decoll = ucol_open("de@collation=phonebook", &status); */
6344     decoll = ucol_open("de-u-co-phonebk", &status);
6345     if(U_FAILURE(status)){
6346         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6347         return;
6348     }
6349 
6350 
6351     derules = ucol_getRules(decoll, &deruleslength);
6352     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6353     viderules[0] = 0;
6354     u_strcat(viderules, virules);
6355     u_strcat(viderules, derules);
6356     videruleslength = viruleslength + deruleslength;
6357     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6358 
6359     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6360     length = u_unescape(srules, rules, 500);
6361     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6362     if(U_FAILURE(status)){
6363         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6364         return;
6365     }
6366 
6367     tailoredSet = ucol_getTailoredSet(videcoll, &status);
6368     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6369 
6370     if(!uset_equals(tailoredSet, importTailoredSet)){
6371         log_err("Tailored sets not equal");
6372     }
6373 
6374     uset_close(importTailoredSet);
6375 
6376     itemCount = uset_getItemCount(tailoredSet);
6377 
6378     for( i = 0; i < itemCount; i++){
6379         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6380         if(strLength < 2){
6381             for (; start <= end; start++){
6382                 k = 0;
6383                 U16_APPEND_UNSAFE(str, k, start);
6384                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
6385                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6386                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6387                     log_err("Sort key for %s not equal\n", str);
6388                     break;
6389                 }
6390             }
6391         }else{
6392             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
6393             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
6394             if(compare_uint8_t_arrays(sk1, sk2) != 0){
6395                 log_err("Sort key for %s not equal\n", str);
6396                 break;
6397             }
6398 
6399         }
6400     }
6401 
6402     uset_close(tailoredSet);
6403 
6404     uprv_free(viderules);
6405 
6406     ucol_close(videcoll);
6407     ucol_close(importvidecoll);
6408     ucol_close(vicoll);
6409     ucol_close(decoll);
6410 
6411 }
6412 
6413 
6414 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
6415 
addMiscCollTest(TestNode ** root)6416 void addMiscCollTest(TestNode** root)
6417 {
6418     TEST(TestRuleOptions);
6419     TEST(TestBeforePrefixFailure);
6420     TEST(TestContractionClosure);
6421     TEST(TestPrefixCompose);
6422     TEST(TestStrCollIdenticalPrefix);
6423     TEST(TestPrefix);
6424     TEST(TestNewJapanese);
6425     /*TEST(TestLimitations);*/
6426     TEST(TestNonChars);
6427     TEST(TestExtremeCompression);
6428     TEST(TestSurrogates);
6429     /* BEGIN android-removed
6430        To save space, Android does not include the collation tailoring rules.
6431        We skip the tailing tests for collations. */
6432     /* TEST(TestVariableTopSetting); */
6433     /* END android-removed */
6434     TEST(TestBocsuCoverage);
6435     TEST(TestCyrillicTailoring);
6436     TEST(TestCase);
6437     TEST(IncompleteCntTest);
6438     TEST(BlackBirdTest);
6439     TEST(FunkyATest);
6440     TEST(BillFairmanTest);
6441     TEST(RamsRulesTest);
6442     TEST(IsTailoredTest);
6443     TEST(TestCollations);
6444     TEST(TestChMove);
6445     TEST(TestImplicitTailoring);
6446     TEST(TestFCDProblem);
6447     TEST(TestEmptyRule);
6448     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
6449     TEST(TestJ815);
6450     /*TEST(TestJ831);*/ /* we changed lv locale */
6451     TEST(TestBefore);
6452     TEST(TestRedundantRules);
6453     TEST(TestExpansionSyntax);
6454     TEST(TestHangulTailoring);
6455     TEST(TestUCARules);
6456     TEST(TestIncrementalNormalize);
6457     TEST(TestComposeDecompose);
6458     TEST(TestCompressOverlap);
6459     TEST(TestContraction);
6460     TEST(TestExpansion);
6461     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
6462     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
6463     TEST(TestOptimize);
6464     TEST(TestSuppressContractions);
6465     TEST(Alexis2);
6466     TEST(TestHebrewUCA);
6467     TEST(TestPartialSortKeyTermination);
6468     TEST(TestSettings);
6469     TEST(TestEquals);
6470     TEST(TestJ2726);
6471     TEST(NullRule);
6472     TEST(TestNumericCollation);
6473     TEST(TestTibetanConformance);
6474     TEST(TestPinyinProblem);
6475     TEST(TestImplicitGeneration);
6476     TEST(TestSeparateTrees);
6477     TEST(TestBeforePinyin);
6478     TEST(TestBeforeTightening);
6479     /*TEST(TestMoreBefore);*/
6480     TEST(TestTailorNULL);
6481     TEST(TestUpperFirstQuaternary);
6482     TEST(TestJ4960);
6483     TEST(TestJ5223);
6484     TEST(TestJ5232);
6485     TEST(TestJ5367);
6486     TEST(TestHiragana);
6487     TEST(TestSortKeyConsistency);
6488     TEST(TestVI5913);  /* VI, RO tailored rules */
6489     TEST(TestCroatianSortKey);
6490     TEST(TestTailor6179);
6491     TEST(TestUCAPrecontext);
6492     TEST(TestOutOfBuffer5468);
6493     TEST(TestSameStrengthList);
6494 
6495     TEST(TestSameStrengthListQuoted);
6496     TEST(TestSameStrengthListSupplemental);
6497     TEST(TestSameStrengthListQwerty);
6498     TEST(TestSameStrengthListQuotedQwerty);
6499     TEST(TestSameStrengthListRanges);
6500     TEST(TestSameStrengthListSupplementalRanges);
6501     TEST(TestSpecialCharacters);
6502     TEST(TestPrivateUseCharacters);
6503     TEST(TestPrivateUseCharactersInList);
6504     TEST(TestPrivateUseCharactersInRange);
6505     TEST(TestInvalidListsAndRanges);
6506     TEST(TestImport);
6507     TEST(TestImportWithType);
6508 
6509     TEST(TestBeforeRuleWithScriptReordering);
6510     TEST(TestNonLeadBytesDuringCollationReordering);
6511     TEST(TestReorderingAPI);
6512     TEST(TestGreekFirstReorder);
6513     TEST(TestGreekLastReorder);
6514     TEST(TestNonScriptReorder);
6515     TEST(TestHaniReorder);
6516 }
6517 
6518 #endif /* #if !UCONFIG_NO_COLLATION */
6519