• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 2001-2009, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /*******************************************************************************
7 *
8 * File cmsccoll.C
9 *
10 *******************************************************************************/
11 /**
12  * These are the tests specific to ICU 1.8 and above, that I didn't know where
13  * to fit.
14  */
15 
16 #include <stdio.h>
17 
18 #include "unicode/utypes.h"
19 
20 #if !UCONFIG_NO_COLLATION
21 
22 #include "unicode/ucol.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/uloc.h"
25 #include "cintltst.h"
26 #include "ccolltst.h"
27 #include "callcoll.h"
28 #include "unicode/ustring.h"
29 #include "string.h"
30 #include "ucol_imp.h"
31 #include "ucol_tok.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "uassert.h"
35 #include "unicode/parseerr.h"
36 #include "unicode/ucnv.h"
37 #include "unicode/ures.h"
38 #include "uparse.h"
39 #include "putilimp.h"
40 
41 
42 #define LEN(a) (sizeof(a)/sizeof(a[0]))
43 
44 #define MAX_TOKEN_LEN 16
45 
46 typedef UCollationResult tst_strcoll(void *collator, const int object,
47                         const UChar *source, const int sLen,
48                         const UChar *target, const int tLen);
49 
50 
51 
52 const static char cnt1[][10] = {
53 
54   "AA",
55   "AC",
56   "AZ",
57   "AQ",
58   "AB",
59   "ABZ",
60   "ABQ",
61   "Z",
62   "ABC",
63   "Q",
64   "B"
65 };
66 
67 const static char cnt2[][10] = {
68   "DA",
69   "DAD",
70   "DAZ",
71   "MAR",
72   "Z",
73   "DAVIS",
74   "MARK",
75   "DAV",
76   "DAVI"
77 };
78 
IncompleteCntTest(void)79 static void IncompleteCntTest(void)
80 {
81   UErrorCode status = U_ZERO_ERROR;
82   UChar temp[90];
83   UChar t1[90];
84   UChar t2[90];
85 
86   UCollator *coll =  NULL;
87   uint32_t i = 0, j = 0;
88   uint32_t size = 0;
89 
90   u_uastrcpy(temp, " & Z < ABC < Q < B");
91 
92   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
93 
94   if(U_SUCCESS(status)) {
95     size = sizeof(cnt1)/sizeof(cnt1[0]);
96     for(i = 0; i < size-1; i++) {
97       for(j = i+1; j < size; j++) {
98         UCollationElements *iter;
99         u_uastrcpy(t1, cnt1[i]);
100         u_uastrcpy(t2, cnt1[j]);
101         doTest(coll, t1, t2, UCOL_LESS);
102         /* synwee : added collation element iterator test */
103         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
104         if (U_FAILURE(status)) {
105           log_err("Creation of iterator failed\n");
106           break;
107         }
108         backAndForth(iter);
109         ucol_closeElements(iter);
110       }
111     }
112   }
113 
114   ucol_close(coll);
115 
116 
117   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
118   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
119 
120   if(U_SUCCESS(status)) {
121     size = sizeof(cnt2)/sizeof(cnt2[0]);
122     for(i = 0; i < size-1; i++) {
123       for(j = i+1; j < size; j++) {
124         UCollationElements *iter;
125         u_uastrcpy(t1, cnt2[i]);
126         u_uastrcpy(t2, cnt2[j]);
127         doTest(coll, t1, t2, UCOL_LESS);
128 
129         /* synwee : added collation element iterator test */
130         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
131         if (U_FAILURE(status)) {
132           log_err("Creation of iterator failed\n");
133           break;
134         }
135         backAndForth(iter);
136         ucol_closeElements(iter);
137       }
138     }
139   }
140 
141   ucol_close(coll);
142 
143 
144 }
145 
146 const static char shifted[][20] = {
147   "black bird",
148   "black-bird",
149   "blackbird",
150   "black Bird",
151   "black-Bird",
152   "blackBird",
153   "black birds",
154   "black-birds",
155   "blackbirds"
156 };
157 
158 const static UCollationResult shiftedTert[] = {
159   UCOL_EQUAL,
160   UCOL_EQUAL,
161   UCOL_EQUAL,
162   UCOL_LESS,
163   UCOL_EQUAL,
164   UCOL_EQUAL,
165   UCOL_LESS,
166   UCOL_EQUAL,
167   UCOL_EQUAL
168 };
169 
170 const static char nonignorable[][20] = {
171   "black bird",
172   "black Bird",
173   "black birds",
174   "black-bird",
175   "black-Bird",
176   "black-birds",
177   "blackbird",
178   "blackBird",
179   "blackbirds"
180 };
181 
BlackBirdTest(void)182 static void BlackBirdTest(void) {
183   UErrorCode status = U_ZERO_ERROR;
184   UChar t1[90];
185   UChar t2[90];
186 
187   uint32_t i = 0, j = 0;
188   uint32_t size = 0;
189   UCollator *coll = ucol_open("en_US", &status);
190 
191   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
192   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
193 
194   if(U_SUCCESS(status)) {
195     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
196     for(i = 0; i < size-1; i++) {
197       for(j = i+1; j < size; j++) {
198         u_uastrcpy(t1, nonignorable[i]);
199         u_uastrcpy(t2, nonignorable[j]);
200         doTest(coll, t1, t2, UCOL_LESS);
201       }
202     }
203   }
204 
205   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
206   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
207 
208   if(U_SUCCESS(status)) {
209     size = sizeof(shifted)/sizeof(shifted[0]);
210     for(i = 0; i < size-1; i++) {
211       for(j = i+1; j < size; j++) {
212         u_uastrcpy(t1, shifted[i]);
213         u_uastrcpy(t2, shifted[j]);
214         doTest(coll, t1, t2, UCOL_LESS);
215       }
216     }
217   }
218 
219   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
220   if(U_SUCCESS(status)) {
221     size = sizeof(shifted)/sizeof(shifted[0]);
222     for(i = 1; i < size; i++) {
223       u_uastrcpy(t1, shifted[i-1]);
224       u_uastrcpy(t2, shifted[i]);
225       doTest(coll, t1, t2, shiftedTert[i]);
226     }
227   }
228 
229   ucol_close(coll);
230 }
231 
232 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
233     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
234     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
235     {0x0041/*'A'*/, 0x0300, 0x0000},
236     {0x00C0, 0x0301, 0x0000},
237     /* this would work with forced normalization */
238     {0x00C0, 0x0316, 0x0000}
239 };
240 
241 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
242     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
243     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
244     {0x00C0, 0},
245     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246     /* this would work with forced normalization */
247     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
248 };
249 
250 const static UCollationResult results[] = {
251     UCOL_GREATER,
252     UCOL_EQUAL,
253     UCOL_EQUAL,
254     UCOL_GREATER,
255     UCOL_EQUAL
256 };
257 
FunkyATest(void)258 static void FunkyATest(void)
259 {
260 
261     int32_t i;
262     UErrorCode status = U_ZERO_ERROR;
263     UCollator  *myCollation;
264     myCollation = ucol_open("en_US", &status);
265     if(U_FAILURE(status)){
266         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
267         return;
268     }
269     log_verbose("Testing some A letters, for some reason\n");
270     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
271     ucol_setStrength(myCollation, UCOL_TERTIARY);
272     for (i = 0; i < 4 ; i++)
273     {
274         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
275     }
276     ucol_close(myCollation);
277 }
278 
279 UColAttributeValue caseFirst[] = {
280     UCOL_OFF,
281     UCOL_LOWER_FIRST,
282     UCOL_UPPER_FIRST
283 };
284 
285 
286 UColAttributeValue alternateHandling[] = {
287     UCOL_NON_IGNORABLE,
288     UCOL_SHIFTED
289 };
290 
291 UColAttributeValue caseLevel[] = {
292     UCOL_OFF,
293     UCOL_ON
294 };
295 
296 UColAttributeValue strengths[] = {
297     UCOL_PRIMARY,
298     UCOL_SECONDARY,
299     UCOL_TERTIARY,
300     UCOL_QUATERNARY,
301     UCOL_IDENTICAL
302 };
303 
304 #if 0
305 static const char * strengthsC[] = {
306     "UCOL_PRIMARY",
307     "UCOL_SECONDARY",
308     "UCOL_TERTIARY",
309     "UCOL_QUATERNARY",
310     "UCOL_IDENTICAL"
311 };
312 
313 static const char * caseFirstC[] = {
314     "UCOL_OFF",
315     "UCOL_LOWER_FIRST",
316     "UCOL_UPPER_FIRST"
317 };
318 
319 
320 static const char * alternateHandlingC[] = {
321     "UCOL_NON_IGNORABLE",
322     "UCOL_SHIFTED"
323 };
324 
325 static const char * caseLevelC[] = {
326     "UCOL_OFF",
327     "UCOL_ON"
328 };
329 
330 /* not used currently - does not test only prints */
331 static void PrintMarkDavis(void)
332 {
333   UErrorCode status = U_ZERO_ERROR;
334   UChar m[256];
335   uint8_t sortkey[256];
336   UCollator *coll = ucol_open("en_US", &status);
337   uint32_t h,i,j,k, sortkeysize;
338   uint32_t sizem = 0;
339   char buffer[512];
340   uint32_t len = 512;
341 
342   log_verbose("PrintMarkDavis");
343 
344   u_uastrcpy(m, "Mark Davis");
345   sizem = u_strlen(m);
346 
347 
348   m[1] = 0xe4;
349 
350   for(i = 0; i<sizem; i++) {
351     fprintf(stderr, "\\u%04X ", m[i]);
352   }
353   fprintf(stderr, "\n");
354 
355   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
356     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
357     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
358 
359     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
360       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
361       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
362 
363       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
364         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
365         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
366 
367         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
368           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
369           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
370           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
371           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
372         }
373 
374       }
375 
376     }
377 
378   }
379 }
380 #endif
381 
BillFairmanTest(void)382 static void BillFairmanTest(void) {
383 /*
384 ** check for actual locale via ICU resource bundles
385 **
386 ** lp points to the original locale ("fr_FR_....")
387 */
388 
389     UResourceBundle *lr,*cr;
390     UErrorCode              lec = U_ZERO_ERROR;
391     const char *lp = "fr_FR_you_ll_never_find_this_locale";
392 
393     log_verbose("BillFairmanTest\n");
394 
395     lr = ures_open(NULL,lp,&lec);
396     if (lr) {
397         cr = ures_getByKey(lr,"collations",0,&lec);
398         if (cr) {
399             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
400             if (lp) {
401                 if (U_SUCCESS(lec)) {
402                     if(strcmp(lp, "fr") != 0) {
403                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
404                     }
405                 }
406             }
407             ures_close(cr);
408         }
409         ures_close(lr);
410     }
411 }
412 
testPrimary(UCollator * col,const UChar * p,const UChar * q)413 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
414     UChar source[256] = { '\0'};
415     UChar target[256] = { '\0'};
416     UChar preP = 0x31a3;
417     UChar preQ = 0x310d;
418 /*
419     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
420     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
421 */
422     /*log_verbose("Testing primary\n");*/
423 
424     doTest(col, p, q, UCOL_LESS);
425 /*
426     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
427 
428     if(result!=UCOL_LESS){
429        aescstrdup(p,utfSource,256);
430        aescstrdup(q,utfTarget,256);
431        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
432     }
433 */
434     source[0] = preP;
435     u_strcpy(source+1,p);
436     target[0] = preQ;
437     u_strcpy(target+1,q);
438     doTest(col, source, target, UCOL_LESS);
439 /*
440     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
441 */
442 }
443 
testSecondary(UCollator * col,const UChar * p,const UChar * q)444 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
445     UChar source[256] = { '\0'};
446     UChar target[256] = { '\0'};
447 
448     /*log_verbose("Testing secondary\n");*/
449 
450     doTest(col, p, q, UCOL_LESS);
451 /*
452     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
453 */
454     source[0] = 0x0053;
455     u_strcpy(source+1,p);
456     target[0]= 0x0073;
457     u_strcpy(target+1,q);
458 
459     doTest(col, source, target, UCOL_LESS);
460 /*
461     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
462 */
463 
464 
465     u_strcpy(source,p);
466     source[u_strlen(p)] = 0x62;
467     source[u_strlen(p)+1] = 0;
468 
469 
470     u_strcpy(target,q);
471     target[u_strlen(q)] = 0x61;
472     target[u_strlen(q)+1] = 0;
473 
474     doTest(col, source, target, UCOL_GREATER);
475 
476 /*
477     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
478 */
479 }
480 
testTertiary(UCollator * col,const UChar * p,const UChar * q)481 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
482     UChar source[256] = { '\0'};
483     UChar target[256] = { '\0'};
484 
485     /*log_verbose("Testing tertiary\n");*/
486 
487     doTest(col, p, q, UCOL_LESS);
488 /*
489     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
490 */
491     source[0] = 0x0020;
492     u_strcpy(source+1,p);
493     target[0]= 0x002D;
494     u_strcpy(target+1,q);
495 
496     doTest(col, source, target, UCOL_LESS);
497 /*
498     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
499 */
500 
501     u_strcpy(source,p);
502     source[u_strlen(p)] = 0xE0;
503     source[u_strlen(p)+1] = 0;
504 
505     u_strcpy(target,q);
506     target[u_strlen(q)] = 0x61;
507     target[u_strlen(q)+1] = 0;
508 
509     doTest(col, source, target, UCOL_GREATER);
510 
511 /*
512     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
513 */
514 }
515 
testEquality(UCollator * col,const UChar * p,const UChar * q)516 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
517 /*
518     UChar source[256] = { '\0'};
519     UChar target[256] = { '\0'};
520 */
521 
522     doTest(col, p, q, UCOL_EQUAL);
523 /*
524     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
525 */
526 }
527 
testCollator(UCollator * coll,UErrorCode * status)528 static void testCollator(UCollator *coll, UErrorCode *status) {
529   const UChar *rules = NULL, *current = NULL;
530   int32_t ruleLen = 0;
531   uint32_t strength = 0;
532   uint32_t chOffset = 0; uint32_t chLen = 0;
533   uint32_t exOffset = 0; uint32_t exLen = 0;
534   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
535   uint32_t firstEx = 0;
536 /*  uint32_t rExpsLen = 0; */
537   uint32_t firstLen = 0;
538   UBool varT = FALSE; UBool top_ = TRUE;
539   uint16_t specs = 0;
540   UBool startOfRules = TRUE;
541   UBool lastReset = FALSE;
542   UBool before = FALSE;
543   uint32_t beforeStrength = 0;
544   UColTokenParser src;
545   UColOptionSet opts;
546 
547   UChar first[256];
548   UChar second[256];
549   UChar tempB[256];
550   uint32_t tempLen;
551   UChar *rulesCopy = NULL;
552   UParseError parseError;
553 
554   src.opts = &opts;
555 
556   rules = ucol_getRules(coll, &ruleLen);
557   if(U_SUCCESS(*status) && ruleLen > 0) {
558     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
559     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
560     src.current = src.source = rulesCopy;
561     src.end = rulesCopy+ruleLen;
562     src.extraCurrent = src.end;
563     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
564     *first = *second = 0;
565 
566     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
567       strength = src.parsedToken.strength;
568       chOffset = src.parsedToken.charsOffset;
569       chLen = src.parsedToken.charsLen;
570       exOffset = src.parsedToken.extensionOffset;
571       exLen = src.parsedToken.extensionLen;
572       prefixOffset = src.parsedToken.prefixOffset;
573       prefixLen = src.parsedToken.prefixLen;
574       specs = src.parsedToken.flags;
575 
576       startOfRules = FALSE;
577       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
578       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
579       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
580         second[0] = 0;
581       } else {
582         u_strncpy(second,rulesCopy+chOffset, chLen);
583         second[chLen] = 0;
584 
585         if(exLen > 0 && firstEx == 0) {
586           u_strncat(first, rulesCopy+exOffset, exLen);
587           first[firstLen+exLen] = 0;
588         }
589 
590         if(lastReset == TRUE && prefixLen != 0) {
591           u_strncpy(first+prefixLen, first, firstLen);
592           u_strncpy(first, rulesCopy+prefixOffset, prefixLen);
593           first[firstLen+prefixLen] = 0;
594           firstLen = firstLen+prefixLen;
595         }
596 
597         if(before == TRUE) { /* swap first and second */
598           u_strcpy(tempB, first);
599           u_strcpy(first, second);
600           u_strcpy(second, tempB);
601 
602           tempLen = firstLen;
603           firstLen = chLen;
604           chLen = tempLen;
605 
606           tempLen = firstEx;
607           firstEx = exLen;
608           exLen = tempLen;
609           if(beforeStrength < strength) {
610             strength = beforeStrength;
611           }
612         }
613       }
614       lastReset = FALSE;
615 
616       switch(strength){
617       case UCOL_IDENTICAL:
618           testEquality(coll,first,second);
619           break;
620       case UCOL_PRIMARY:
621           testPrimary(coll,first,second);
622           break;
623       case UCOL_SECONDARY:
624           testSecondary(coll,first,second);
625           break;
626       case UCOL_TERTIARY:
627           testTertiary(coll,first,second);
628           break;
629       case UCOL_TOK_RESET:
630         lastReset = TRUE;
631         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
632         if(before) {
633           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
634         }
635         break;
636       default:
637           break;
638       }
639 
640       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
641         before = FALSE;
642       } else {
643         firstLen = chLen;
644         firstEx = exLen;
645         u_strcpy(first, second);
646       }
647     }
648     free(rulesCopy);
649   }
650 }
651 
ucaTest(void * collator,const int object,const UChar * source,const int sLen,const UChar * target,const int tLen)652 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
653   UCollator *UCA = (UCollator *)collator;
654   return ucol_strcoll(UCA, source, sLen, target, tLen);
655 }
656 
657 /*
658 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
659 #ifdef U_WINDOWS
660   LCID lcid = (LCID)collator;
661   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
662 #else
663   return 0;
664 #endif
665 }
666 */
667 
swampEarlier(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)668 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
669                                      UChar s1, UChar s2,
670                                      const UChar *s, const uint32_t sLen,
671                                      const UChar *t, const uint32_t tLen) {
672   UChar source[256] = {0};
673   UChar target[256] = {0};
674 
675   source[0] = s1;
676   u_strcpy(source+1, s);
677   target[0] = s2;
678   u_strcpy(target+1, t);
679 
680   return func(collator, opts, source, sLen+1, target, tLen+1);
681 }
682 
swampLater(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)683 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
684                                    UChar s1, UChar s2,
685                                    const UChar *s, const uint32_t sLen,
686                                    const UChar *t, const uint32_t tLen) {
687   UChar source[256] = {0};
688   UChar target[256] = {0};
689 
690   u_strcpy(source, s);
691   source[sLen] = s1;
692   u_strcpy(target, t);
693   target[tLen] = s2;
694 
695   return func(collator, opts, source, sLen+1, target, tLen+1);
696 }
697 
probeStrength(tst_strcoll * func,void * collator,int opts,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen,UCollationResult result)698 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
699                               const UChar *s, const uint32_t sLen,
700                               const UChar *t, const uint32_t tLen,
701                               UCollationResult result) {
702   /*UChar fPrimary = 0x6d;*/
703   /*UChar sPrimary = 0x6e;*/
704   UChar fSecondary = 0x310d;
705   UChar sSecondary = 0x31a3;
706   UChar fTertiary = 0x310f;
707   UChar sTertiary = 0x31b7;
708 
709   UCollationResult oposite;
710   if(result == UCOL_EQUAL) {
711     return UCOL_IDENTICAL;
712   } else if(result == UCOL_GREATER) {
713     oposite = UCOL_LESS;
714   } else {
715     oposite = UCOL_GREATER;
716   }
717 
718   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
719     return UCOL_PRIMARY;
720   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
721     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
722     return UCOL_SECONDARY;
723   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
724     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
725     return UCOL_TERTIARY;
726   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
727     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
728     return UCOL_QUATERNARY;
729   } else {
730     return UCOL_IDENTICAL;
731   }
732 }
733 
getRelationSymbol(UCollationResult res,uint32_t strength,char * buffer)734 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
735   uint32_t i = 0;
736 
737   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
738     buffer[0] = '=';
739     buffer[1] = '=';
740     buffer[2] = '\0';
741   } else if(res == UCOL_GREATER) {
742     for(i = 0; i<strength+1; i++) {
743       buffer[i] = '>';
744     }
745     buffer[strength+1] = '\0';
746   } else {
747     for(i = 0; i<strength+1; i++) {
748       buffer[i] = '<';
749     }
750     buffer[strength+1] = '\0';
751   }
752 
753   return buffer;
754 }
755 
756 
757 
logFailure(const char * platform,const char * test,const UChar * source,const uint32_t sLen,const UChar * target,const uint32_t tLen,UCollationResult realRes,uint32_t realStrength,UCollationResult expRes,uint32_t expStrength,UBool error)758 static void logFailure (const char *platform, const char *test,
759                         const UChar *source, const uint32_t sLen,
760                         const UChar *target, const uint32_t tLen,
761                         UCollationResult realRes, uint32_t realStrength,
762                         UCollationResult expRes, uint32_t expStrength, UBool error) {
763 
764   uint32_t i = 0;
765 
766   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
767   static int32_t maxOutputLength = 0;
768   int32_t outputLength;
769 
770   *sEsc = *tEsc = *s = *t = 0;
771   if(error == TRUE) {
772     log_err("Difference between expected and generated order. Run test with -v for more info\n");
773   } else if(VERBOSITY == 0) {
774     return;
775   }
776   for(i = 0; i<sLen; i++) {
777     sprintf(b, "%04X", source[i]);
778     strcat(sEsc, "\\u");
779     strcat(sEsc, b);
780     strcat(s, b);
781     strcat(s, " ");
782     if(source[i] < 0x80) {
783       sprintf(b, "(%c)", source[i]);
784       strcat(sEsc, b);
785     }
786   }
787   for(i = 0; i<tLen; i++) {
788     sprintf(b, "%04X", target[i]);
789     strcat(tEsc, "\\u");
790     strcat(tEsc, b);
791     strcat(t, b);
792     strcat(t, " ");
793     if(target[i] < 0x80) {
794       sprintf(b, "(%c)", target[i]);
795       strcat(tEsc, b);
796     }
797   }
798 /*
799   strcpy(output, "[[ ");
800   strcat(output, sEsc);
801   strcat(output, getRelationSymbol(expRes, expStrength, relation));
802   strcat(output, tEsc);
803 
804   strcat(output, " : ");
805 
806   strcat(output, sEsc);
807   strcat(output, getRelationSymbol(realRes, realStrength, relation));
808   strcat(output, tEsc);
809   strcat(output, " ]] ");
810 
811   log_verbose("%s", output);
812 */
813 
814 
815   strcpy(output, "DIFF: ");
816 
817   strcat(output, s);
818   strcat(output, " : ");
819   strcat(output, t);
820 
821   strcat(output, test);
822   strcat(output, ": ");
823 
824   strcat(output, sEsc);
825   strcat(output, getRelationSymbol(expRes, expStrength, relation));
826   strcat(output, tEsc);
827 
828   strcat(output, " ");
829 
830   strcat(output, platform);
831   strcat(output, ": ");
832 
833   strcat(output, sEsc);
834   strcat(output, getRelationSymbol(realRes, realStrength, relation));
835   strcat(output, tEsc);
836 
837   outputLength = (int32_t)strlen(output);
838   if(outputLength > maxOutputLength) {
839     maxOutputLength = outputLength;
840     U_ASSERT(outputLength < sizeof(output));
841   }
842 
843   log_verbose("%s\n", output);
844 
845 }
846 
847 /*
848 static void printOutRules(const UChar *rules) {
849   uint32_t len = u_strlen(rules);
850   uint32_t i = 0;
851   char toPrint;
852   uint32_t line = 0;
853 
854   fprintf(stdout, "Rules:");
855 
856   for(i = 0; i<len; i++) {
857     if(rules[i]<0x7f && rules[i]>=0x20) {
858       toPrint = (char)rules[i];
859       if(toPrint == '&') {
860         line = 1;
861         fprintf(stdout, "\n&");
862       } else if(toPrint == ';') {
863         fprintf(stdout, "<<");
864         line+=2;
865       } else if(toPrint == ',') {
866         fprintf(stdout, "<<<");
867         line+=3;
868       } else {
869         fprintf(stdout, "%c", toPrint);
870         line++;
871       }
872     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
873       fprintf(stdout, "\\u%04X", rules[i]);
874       line+=6;
875     }
876     if(line>72) {
877       fprintf(stdout, "\n");
878       line = 0;
879     }
880   }
881 
882   log_verbose("\n");
883 
884 }
885 */
886 
testSwitch(tst_strcoll * func,void * collator,int opts,uint32_t strength,const UChar * first,const UChar * second,const char * msg,UBool error)887 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
888   uint32_t diffs = 0;
889   UCollationResult realResult;
890   uint32_t realStrength;
891 
892   uint32_t sLen = u_strlen(first);
893   uint32_t tLen = u_strlen(second);
894 
895   realResult = func(collator, opts, first, sLen, second, tLen);
896   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
897 
898   if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
899     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
900     diffs++;
901   } else if(realResult != UCOL_LESS || realStrength != strength) {
902     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
903     diffs++;
904   }
905   return diffs;
906 }
907 
908 
testAgainstUCA(UCollator * coll,UCollator * UCA,const char * refName,UBool error,UErrorCode * status)909 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
910   const UChar *rules = NULL, *current = NULL;
911   int32_t ruleLen = 0;
912   uint32_t strength = 0;
913   uint32_t chOffset = 0; uint32_t chLen = 0;
914   uint32_t exOffset = 0; uint32_t exLen = 0;
915   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
916 /*  uint32_t rExpsLen = 0; */
917   uint32_t firstLen = 0, secondLen = 0;
918   UBool varT = FALSE; UBool top_ = TRUE;
919   uint16_t specs = 0;
920   UBool startOfRules = TRUE;
921   UColTokenParser src;
922   UColOptionSet opts;
923 
924   UChar first[256];
925   UChar second[256];
926   UChar *rulesCopy = NULL;
927 
928   uint32_t UCAdiff = 0;
929   uint32_t Windiff = 1;
930   UParseError parseError;
931 
932   src.opts = &opts;
933 
934   rules = ucol_getRules(coll, &ruleLen);
935 
936   /*printOutRules(rules);*/
937 
938   if(U_SUCCESS(*status) && ruleLen > 0) {
939     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
940     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
941     src.current = src.source = rulesCopy;
942     src.end = rulesCopy+ruleLen;
943     src.extraCurrent = src.end;
944     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
945     *first = *second = 0;
946 
947     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
948       strength = src.parsedToken.strength;
949       chOffset = src.parsedToken.charsOffset;
950       chLen = src.parsedToken.charsLen;
951       exOffset = src.parsedToken.extensionOffset;
952       exLen = src.parsedToken.extensionLen;
953       prefixOffset = src.parsedToken.prefixOffset;
954       prefixLen = src.parsedToken.prefixLen;
955       specs = src.parsedToken.flags;
956 
957       startOfRules = FALSE;
958       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
959       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
960 
961       u_strncpy(second,rulesCopy+chOffset, chLen);
962       second[chLen] = 0;
963       secondLen = chLen;
964 
965       if(exLen > 0) {
966         u_strncat(first, rulesCopy+exOffset, exLen);
967         first[firstLen+exLen] = 0;
968         firstLen += exLen;
969       }
970 
971       if(strength != UCOL_TOK_RESET) {
972         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
973           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
974           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
975         }
976       }
977 
978 
979       firstLen = chLen;
980       u_strcpy(first, second);
981 
982     }
983     if(UCAdiff != 0 && Windiff != 0) {
984       log_verbose("\n");
985     }
986     if(UCAdiff == 0) {
987       log_verbose("No immediate difference with %s!\n", refName);
988     }
989     if(Windiff == 0) {
990       log_verbose("No immediate difference with Win32!\n");
991     }
992     free(rulesCopy);
993   }
994 }
995 
996 /*
997  * Takes two CEs (lead and continuation) and
998  * compares them as CEs should be compared:
999  * primary vs. primary, secondary vs. secondary
1000  * tertiary vs. tertiary
1001  */
compareCEs(uint32_t s1,uint32_t s2,uint32_t t1,uint32_t t2)1002 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1003                    uint32_t t1, uint32_t t2) {
1004   uint32_t s = 0, t = 0;
1005   if(s1 == t1 && s2 == t2) {
1006     return 0;
1007   }
1008   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1009   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1010   if(s < t) {
1011     return -1;
1012   } else if(s > t) {
1013     return 1;
1014   } else {
1015     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1016     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1017     if(s < t) {
1018       return -1;
1019     } else if(s > t) {
1020       return 1;
1021     } else {
1022       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1023       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1024       if(s < t) {
1025         return -1;
1026       } else {
1027         return 1;
1028       }
1029     }
1030   }
1031 }
1032 
1033 typedef struct {
1034   uint32_t startCE;
1035   uint32_t startContCE;
1036   uint32_t limitCE;
1037   uint32_t limitContCE;
1038 } indirectBoundaries;
1039 
1040 /* these values are used for finding CE values for indirect positioning. */
1041 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
1042 /* values. It only works for resets and you cannot tailor indirect names */
1043 /* An indirect name can define either an anchor point or a range. An     */
1044 /* anchor point behaves in exactly the same way as a code point in reset */
1045 /* would, except that it cannot be tailored. A range (we currently only  */
1046 /* know for the [top] range will explicitly set the upper bound for      */
1047 /* generated CEs, thus allowing for better control over how many CEs can */
1048 /* be squeezed between in the range without performance penalty.         */
1049 /* In that respect, we use [top] for tailoring of locales that use CJK   */
1050 /* characters. Other indirect values are currently a pure convenience,   */
1051 /* they can be used to assure that the CEs will be always positioned in  */
1052 /* the same place relative to a point with known properties (e.g. first  */
1053 /* primary ignorable). */
1054 static indirectBoundaries ucolIndirectBoundaries[15];
1055 static UBool indirectBoundariesSet = FALSE;
setIndirectBoundaries(uint32_t indexR,uint32_t * start,uint32_t * end)1056 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1057     /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1058     /* to initalize here. */
1059     ucolIndirectBoundaries[indexR].startCE = start[0];
1060     ucolIndirectBoundaries[indexR].startContCE = start[1];
1061     if(end) {
1062         ucolIndirectBoundaries[indexR].limitCE = end[0];
1063         ucolIndirectBoundaries[indexR].limitContCE = end[1];
1064     } else {
1065         ucolIndirectBoundaries[indexR].limitCE = 0;
1066         ucolIndirectBoundaries[indexR].limitContCE = 0;
1067     }
1068 }
1069 
testCEs(UCollator * coll,UErrorCode * status)1070 static void testCEs(UCollator *coll, UErrorCode *status) {
1071     const UChar *rules = NULL, *current = NULL;
1072     int32_t ruleLen = 0;
1073 
1074     uint32_t strength = 0;
1075     uint32_t maxStrength = UCOL_IDENTICAL;
1076     uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1077     uint32_t lastCE;
1078     uint32_t lastContCE;
1079 
1080     int32_t result = 0;
1081     uint32_t chOffset = 0; uint32_t chLen = 0;
1082     uint32_t exOffset = 0; uint32_t exLen = 0;
1083     uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1084     uint32_t oldOffset = 0;
1085 
1086     /* uint32_t rExpsLen = 0; */
1087     /* uint32_t firstLen = 0; */
1088     uint16_t specs = 0;
1089     UBool varT = FALSE; UBool top_ = TRUE;
1090     UBool startOfRules = TRUE;
1091     UBool before = FALSE;
1092     UColTokenParser src;
1093     UColOptionSet opts;
1094     UParseError parseError;
1095     UChar *rulesCopy = NULL;
1096     collIterate c;
1097     UCAConstants *consts = NULL;
1098     uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1099         UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1100     const char *colLoc;
1101     UCollator *UCA = ucol_open("root", status);
1102 
1103     if (U_FAILURE(*status)) {
1104         log_err("Could not open root collator %s\n", u_errorName(*status));
1105         return;
1106     }
1107 
1108     colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1109     if (U_FAILURE(*status)) {
1110         log_err("Could not get collator name: %s\n", u_errorName(*status));
1111         return;
1112     }
1113 
1114     consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1115     UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1116     /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1117     UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1118     UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1119 
1120     baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1121 
1122     src.opts = &opts;
1123 
1124     rules = ucol_getRules(coll, &ruleLen);
1125 
1126     src.invUCA = ucol_initInverseUCA(status);
1127 
1128     if(indirectBoundariesSet == FALSE) {
1129         /* UCOL_RESET_TOP_VALUE */
1130         setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1131         /* UCOL_FIRST_PRIMARY_IGNORABLE */
1132         setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1133         /* UCOL_LAST_PRIMARY_IGNORABLE */
1134         setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1135         /* UCOL_FIRST_SECONDARY_IGNORABLE */
1136         setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1137         /* UCOL_LAST_SECONDARY_IGNORABLE */
1138         setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1139         /* UCOL_FIRST_TERTIARY_IGNORABLE */
1140         setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1141         /* UCOL_LAST_TERTIARY_IGNORABLE */
1142         setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1143         /* UCOL_FIRST_VARIABLE */
1144         setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1145         /* UCOL_LAST_VARIABLE */
1146         setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1147         /* UCOL_FIRST_NON_VARIABLE */
1148         setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1149         /* UCOL_LAST_NON_VARIABLE */
1150         setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1151         /* UCOL_FIRST_IMPLICIT */
1152         setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1153         /* UCOL_LAST_IMPLICIT */
1154         setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1155         /* UCOL_FIRST_TRAILING */
1156         setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1157         /* UCOL_LAST_TRAILING */
1158         setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1159         ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1160         indirectBoundariesSet = TRUE;
1161     }
1162 
1163 
1164     if(U_SUCCESS(*status) && ruleLen > 0) {
1165         rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1166         uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1167         src.current = src.source = rulesCopy;
1168         src.end = rulesCopy+ruleLen;
1169         src.extraCurrent = src.end;
1170         src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1171 
1172         while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1173             strength = src.parsedToken.strength;
1174             chOffset = src.parsedToken.charsOffset;
1175             chLen = src.parsedToken.charsLen;
1176             exOffset = src.parsedToken.extensionOffset;
1177             exLen = src.parsedToken.extensionLen;
1178             prefixOffset = src.parsedToken.prefixOffset;
1179             prefixLen = src.parsedToken.prefixLen;
1180             specs = src.parsedToken.flags;
1181 
1182             startOfRules = FALSE;
1183             varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1184             top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1185 
1186             uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
1187 
1188             currCE = ucol_getNextCE(coll, &c, status);
1189             if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
1190                 log_verbose("Thai prevowel detected. Will pick next CE\n");
1191                 currCE = ucol_getNextCE(coll, &c, status);
1192             }
1193 
1194             currContCE = ucol_getNextCE(coll, &c, status);
1195             if(!isContinuation(currContCE)) {
1196                 currContCE = 0;
1197             }
1198 
1199             /* we need to repack CEs here */
1200 
1201             if(strength == UCOL_TOK_RESET) {
1202                 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1203                 if(top_ == TRUE) {
1204                     int32_t index = src.parsedToken.indirectIndex;
1205 
1206                     nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
1207                     nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
1208                 } else {
1209                     nextCE = baseCE = currCE;
1210                     nextContCE = baseContCE = currContCE;
1211                 }
1212                 maxStrength = UCOL_IDENTICAL;
1213             } else {
1214                 if(strength < maxStrength) {
1215                     maxStrength = strength;
1216                     if(baseCE == UCOL_RESET_TOP_VALUE) {
1217                         log_verbose("Resetting to [top]\n");
1218                         nextCE = UCOL_NEXT_TOP_VALUE;
1219                         nextContCE = UCOL_NEXT_TOP_CONT;
1220                     } else {
1221                         result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1222                     }
1223                     if(result < 0) {
1224                         if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
1225                             log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
1226                             return;
1227                         } else {
1228                             log_err("%s: couldn't find the CE\n", colLoc);
1229                             return;
1230                         }
1231                     }
1232                 }
1233 
1234                 currCE &= 0xFFFFFF3F;
1235                 currContCE &= 0xFFFFFFBF;
1236 
1237                 if(maxStrength == UCOL_IDENTICAL) {
1238                     if(baseCE != currCE || baseContCE != currContCE) {
1239                         log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1240                     }
1241                 } else {
1242                     if(strength == UCOL_IDENTICAL) {
1243                         if(lastCE != currCE || lastContCE != currContCE) {
1244                             log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1245                         }
1246                     } else {
1247                         if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1248                             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1249                             log_err("%s: current CE is not less than base CE\n", colLoc);
1250                         }
1251                         if(!before) {
1252                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1253                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1254                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1255                             }
1256                         } else {
1257                             before = FALSE;
1258                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1259                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1260                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1261                             }
1262                         }
1263                     }
1264                 }
1265 
1266             }
1267 
1268             oldOffset = chOffset;
1269             lastCE = currCE & 0xFFFFFF3F;
1270             lastContCE = currContCE & 0xFFFFFFBF;
1271         }
1272         free(rulesCopy);
1273     }
1274     ucol_close(UCA);
1275 }
1276 
1277 #if 0
1278 /* these locales are now picked from index RB */
1279 static const char* localesToTest[] = {
1280 "ar", "bg", "ca", "cs", "da",
1281 "el", "en_BE", "en_US_POSIX",
1282 "es", "et", "fi", "fr", "hi",
1283 "hr", "hu", "is", "iw", "ja",
1284 "ko", "lt", "lv", "mk", "mt",
1285 "nb", "nn", "nn_NO", "pl", "ro",
1286 "ru", "sh", "sk", "sl", "sq",
1287 "sr", "sv", "th", "tr", "uk",
1288 "vi", "zh", "zh_TW"
1289 };
1290 #endif
1291 
1292 static const char* rulesToTest[] = {
1293   /* Funky fa rule */
1294   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1295   /*"& Z < p, P",*/
1296     /* Cui Mins rules */
1297     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1298     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1299     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1300     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1301     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1302     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1303     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1304 };
1305 
1306 
TestCollations(void)1307 static void TestCollations(void) {
1308     int32_t noOfLoc = uloc_countAvailable();
1309     int32_t i = 0, j = 0;
1310 
1311     UErrorCode status = U_ZERO_ERROR;
1312     char cName[256];
1313     UChar name[256];
1314     int32_t nameSize;
1315 
1316 
1317     const char *locName = NULL;
1318     UCollator *coll = NULL;
1319     UCollator *UCA = ucol_open("", &status);
1320     UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1321     if (U_FAILURE(status)) {
1322         log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1323         return;
1324     }
1325     ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1326 
1327     for(i = 0; i<noOfLoc; i++) {
1328         status = U_ZERO_ERROR;
1329         locName = uloc_getAvailable(i);
1330         if(uprv_strcmp("ja", locName) == 0) {
1331             log_verbose("Don't know how to test prefixes\n");
1332             continue;
1333         }
1334         if(hasCollationElements(locName)) {
1335             nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1336             for(j = 0; j<nameSize; j++) {
1337                 cName[j] = (char)name[j];
1338             }
1339             cName[nameSize] = 0;
1340             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1341             coll = ucol_open(locName, &status);
1342             if(U_SUCCESS(status)) {
1343                 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1344                 ucol_close(coll);
1345             } else {
1346                 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1347                 status = U_ZERO_ERROR;
1348             }
1349         }
1350     }
1351     ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1352     ucol_close(UCA);
1353 }
1354 
RamsRulesTest(void)1355 static void RamsRulesTest(void) {
1356     UErrorCode status = U_ZERO_ERROR;
1357     int32_t i = 0;
1358     UCollator *coll = NULL;
1359     UChar rule[2048];
1360     uint32_t ruleLen;
1361     int32_t noOfLoc = uloc_countAvailable();
1362     const char *locName = NULL;
1363 
1364     log_verbose("RamsRulesTest\n");
1365 
1366     for(i = 0; i<noOfLoc; i++) {
1367         status = U_ZERO_ERROR;
1368         locName = uloc_getAvailable(i);
1369         if(hasCollationElements(locName)) {
1370             if (uprv_strcmp("ja", locName)==0) {
1371                 log_verbose("Don't know how to test Japanese because of prefixes\n");
1372                 continue;
1373             }
1374             if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1375                 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1376                 continue;
1377             }
1378             if (uprv_strcmp("km", locName)==0 ||
1379                 uprv_strcmp("km_KH", locName)==0 ||
1380                 uprv_strcmp("si", locName)==0 ||
1381                 uprv_strcmp("si_LK", locName)==0 ||
1382                 uprv_strcmp("zh", locName)==0 ||
1383                 uprv_strcmp("zh_Hant", locName)==0 ) {
1384                     continue;  /* TODO: enable these locale tests after trac#6040 is fixed. */
1385             }
1386             log_verbose("Testing locale %s\n", locName);
1387             coll = ucol_open(locName, &status);
1388             if(U_SUCCESS(status)) {
1389                 if(coll->image->jamoSpecial == TRUE) {
1390                     log_err("%s has special JAMOs\n", locName);
1391                 }
1392                 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1393                 testCollator(coll, &status);
1394                 testCEs(coll, &status);
1395                 ucol_close(coll);
1396             }
1397         }
1398     }
1399 
1400     for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1401         log_verbose("Testing rule: %s\n", rulesToTest[i]);
1402         ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1403         coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1404         if(U_SUCCESS(status)) {
1405             testCollator(coll, &status);
1406             testCEs(coll, &status);
1407             ucol_close(coll);
1408         }
1409     }
1410 
1411 }
1412 
IsTailoredTest(void)1413 static void IsTailoredTest(void) {
1414     UErrorCode status = U_ZERO_ERROR;
1415     uint32_t i = 0;
1416     UCollator *coll = NULL;
1417     UChar rule[2048];
1418     UChar tailored[2048];
1419     UChar notTailored[2048];
1420     uint32_t ruleLen, tailoredLen, notTailoredLen;
1421 
1422     log_verbose("IsTailoredTest\n");
1423 
1424     u_uastrcpy(rule, "&Z < A, B, C;c < d");
1425     ruleLen = u_strlen(rule);
1426 
1427     u_uastrcpy(tailored, "ABCcd");
1428     tailoredLen = u_strlen(tailored);
1429 
1430     u_uastrcpy(notTailored, "ZabD");
1431     notTailoredLen = u_strlen(notTailored);
1432 
1433     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1434     if(U_SUCCESS(status)) {
1435         for(i = 0; i<tailoredLen; i++) {
1436             if(!ucol_isTailored(coll, tailored[i], &status)) {
1437                 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1438             }
1439         }
1440         for(i = 0; i<notTailoredLen; i++) {
1441             if(ucol_isTailored(coll, notTailored[i], &status)) {
1442                 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1443             }
1444         }
1445         ucol_close(coll);
1446     }
1447     else {
1448         log_err_status(status, "Can't tailor rules\n");
1449     }
1450     /* Code coverage */
1451     status = U_ZERO_ERROR;
1452     coll = ucol_open("ja", &status);
1453     if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1454         log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1455     }
1456     ucol_close(coll);
1457 }
1458 
1459 
1460 const static char chTest[][20] = {
1461   "c",
1462   "C",
1463   "ca", "cb", "cx", "cy", "CZ",
1464   "c\\u030C", "C\\u030C",
1465   "h",
1466   "H",
1467   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1468   "ch", "cH", "Ch", "CH",
1469   "cha", "charly", "che", "chh", "chch", "chr",
1470   "i", "I", "iarly",
1471   "r", "R",
1472   "r\\u030C", "R\\u030C",
1473   "s",
1474   "S",
1475   "s\\u030C", "S\\u030C",
1476   "z", "Z",
1477   "z\\u030C", "Z\\u030C"
1478 };
1479 
TestChMove(void)1480 static void TestChMove(void) {
1481     UChar t1[256] = {0};
1482     UChar t2[256] = {0};
1483 
1484     uint32_t i = 0, j = 0;
1485     uint32_t size = 0;
1486     UErrorCode status = U_ZERO_ERROR;
1487 
1488     UCollator *coll = ucol_open("cs", &status);
1489 
1490     if(U_SUCCESS(status)) {
1491         size = sizeof(chTest)/sizeof(chTest[0]);
1492         for(i = 0; i < size-1; i++) {
1493             for(j = i+1; j < size; j++) {
1494                 u_unescape(chTest[i], t1, 256);
1495                 u_unescape(chTest[j], t2, 256);
1496                 doTest(coll, t1, t2, UCOL_LESS);
1497             }
1498         }
1499     }
1500     else {
1501         log_err("Can't open collator");
1502     }
1503     ucol_close(coll);
1504 }
1505 
1506 
1507 
1508 
1509 const static char impTest[][20] = {
1510   "\\u4e00",
1511     "a",
1512     "A",
1513     "b",
1514     "B",
1515     "\\u4e01"
1516 };
1517 
1518 
TestImplicitTailoring(void)1519 static void TestImplicitTailoring(void) {
1520   static const struct {
1521     const char *rules;
1522     const char *data[10];
1523     const uint32_t len;
1524   } tests[] = {
1525       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1526       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1527       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1528       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1529   };
1530 
1531   int32_t i = 0;
1532 
1533   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1534       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1535   }
1536 
1537 /*
1538   UChar t1[256] = {0};
1539   UChar t2[256] = {0};
1540 
1541   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1542 
1543   uint32_t i = 0, j = 0;
1544   uint32_t size = 0;
1545   uint32_t ruleLen = 0;
1546   UErrorCode status = U_ZERO_ERROR;
1547   UCollator *coll = NULL;
1548   ruleLen = u_unescape(rule, t1, 256);
1549 
1550   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1551 
1552   if(U_SUCCESS(status)) {
1553     size = sizeof(impTest)/sizeof(impTest[0]);
1554     for(i = 0; i < size-1; i++) {
1555       for(j = i+1; j < size; j++) {
1556         u_unescape(impTest[i], t1, 256);
1557         u_unescape(impTest[j], t2, 256);
1558         doTest(coll, t1, t2, UCOL_LESS);
1559       }
1560     }
1561   }
1562   else {
1563     log_err("Can't open collator");
1564   }
1565   ucol_close(coll);
1566   */
1567 }
1568 
TestFCDProblem(void)1569 static void TestFCDProblem(void) {
1570   UChar t1[256] = {0};
1571   UChar t2[256] = {0};
1572 
1573   const char *s1 = "\\u0430\\u0306\\u0325";
1574   const char *s2 = "\\u04D1\\u0325";
1575 
1576   UErrorCode status = U_ZERO_ERROR;
1577   UCollator *coll = ucol_open("", &status);
1578   u_unescape(s1, t1, 256);
1579   u_unescape(s2, t2, 256);
1580 
1581   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1582   doTest(coll, t1, t2, UCOL_EQUAL);
1583 
1584   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1585   doTest(coll, t1, t2, UCOL_EQUAL);
1586 
1587   ucol_close(coll);
1588 }
1589 
1590 /*
1591 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1592 We're only using NFC/NFD in this test.
1593 */
1594 #define NORM_BUFFER_TEST_LEN 18
1595 typedef struct {
1596   UChar32 u;
1597   UChar NFC[NORM_BUFFER_TEST_LEN];
1598   UChar NFD[NORM_BUFFER_TEST_LEN];
1599 } tester;
1600 
TestComposeDecompose(void)1601 static void TestComposeDecompose(void) {
1602     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1603     static const UChar UNICODESET_STR[] = {
1604         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1605         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1606         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1607     };
1608     int32_t noOfLoc;
1609     int32_t i = 0, j = 0;
1610 
1611     UErrorCode status = U_ZERO_ERROR;
1612     const char *locName = NULL;
1613     uint32_t nfcSize;
1614     uint32_t nfdSize;
1615     tester **t;
1616     uint32_t noCases = 0;
1617     UCollator *coll = NULL;
1618     UChar32 u = 0;
1619     UChar comp[NORM_BUFFER_TEST_LEN];
1620     uint32_t len = 0;
1621     UCollationElements *iter;
1622     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1623     int32_t charsToTestSize;
1624 
1625     noOfLoc = uloc_countAvailable();
1626 
1627     coll = ucol_open("", &status);
1628     if (U_FAILURE(status)) {
1629         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1630         return;
1631     }
1632     charsToTestSize = uset_size(charsToTest);
1633     if (charsToTestSize <= 0) {
1634         log_err("Set was zero. Missing data?\n");
1635         return;
1636     }
1637     t = malloc(charsToTestSize * sizeof(tester *));
1638     t[0] = (tester *)malloc(sizeof(tester));
1639     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1640 
1641     for(u = 0; u < charsToTestSize; u++) {
1642         UChar32 ch = uset_charAt(charsToTest, u);
1643         len = 0;
1644         UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
1645         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1646         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1647 
1648         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1649           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1650             t[noCases]->u = ch;
1651             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1652                 u_strncpy(t[noCases]->NFC, comp, len);
1653                 t[noCases]->NFC[len] = 0;
1654             }
1655             noCases++;
1656             t[noCases] = (tester *)malloc(sizeof(tester));
1657             uprv_memset(t[noCases], 0, sizeof(tester));
1658         }
1659     }
1660     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1661     uset_close(charsToTest);
1662     charsToTest = NULL;
1663 
1664     for(u=0; u<(UChar32)noCases; u++) {
1665         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1666             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1667             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1668         }
1669     }
1670     /*
1671     for(u = 0; u < charsToTestSize; u++) {
1672       if(!(u&0xFFFF)) {
1673         log_verbose("%08X ", u);
1674       }
1675       uprv_memset(t[noCases], 0, sizeof(tester));
1676       t[noCases]->u = u;
1677       len = 0;
1678       UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1679       comp[len] = 0;
1680       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1681       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1682       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1683       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1684     }
1685     */
1686 
1687     ucol_close(coll);
1688 
1689     log_verbose("Testing locales, number of cases = %i\n", noCases);
1690     for(i = 0; i<noOfLoc; i++) {
1691         status = U_ZERO_ERROR;
1692         locName = uloc_getAvailable(i);
1693         if(hasCollationElements(locName)) {
1694             char cName[256];
1695             UChar name[256];
1696             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1697 
1698             for(j = 0; j<nameSize; j++) {
1699                 cName[j] = (char)name[j];
1700             }
1701             cName[nameSize] = 0;
1702             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1703 
1704             coll = ucol_open(locName, &status);
1705             ucol_setStrength(coll, UCOL_IDENTICAL);
1706             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1707 
1708             for(u=0; u<(UChar32)noCases; u++) {
1709                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1710                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1711                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1712                     log_verbose("Testing NFC\n");
1713                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1714                     backAndForth(iter);
1715                     log_verbose("Testing NFD\n");
1716                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1717                     backAndForth(iter);
1718                 }
1719             }
1720             ucol_closeElements(iter);
1721             ucol_close(coll);
1722         }
1723     }
1724     for(u = 0; u <= (UChar32)noCases; u++) {
1725         free(t[u]);
1726     }
1727     free(t);
1728 }
1729 
TestEmptyRule(void)1730 static void TestEmptyRule(void) {
1731   UErrorCode status = U_ZERO_ERROR;
1732   UChar rulez[] = { 0 };
1733   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1734 
1735   ucol_close(coll);
1736 }
1737 
TestUCARules(void)1738 static void TestUCARules(void) {
1739   UErrorCode status = U_ZERO_ERROR;
1740   UChar b[256];
1741   UChar *rules = b;
1742   uint32_t ruleLen = 0;
1743   UCollator *UCAfromRules = NULL;
1744   UCollator *coll = ucol_open("", &status);
1745   if(status == U_FILE_ACCESS_ERROR) {
1746     log_data_err("Is your data around?\n");
1747     return;
1748   } else if(U_FAILURE(status)) {
1749     log_err("Error opening collator\n");
1750     return;
1751   }
1752   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1753 
1754   log_verbose("TestUCARules\n");
1755   if(ruleLen > 256) {
1756     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1757     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1758   }
1759   log_verbose("Rules length is %d\n", ruleLen);
1760   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1761   if(U_SUCCESS(status)) {
1762     ucol_close(UCAfromRules);
1763   } else {
1764     log_verbose("Unable to create a collator from UCARules!\n");
1765   }
1766 /*
1767   u_unescape(blah, b, 256);
1768   ucol_getSortKey(coll, b, 1, res, 256);
1769 */
1770   ucol_close(coll);
1771   if(rules != b) {
1772     free(rules);
1773   }
1774 }
1775 
1776 
1777 /* Pinyin tonal order */
1778 /*
1779     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1780           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1781     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1782     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1783     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1784     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1785       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1786 .. (\u00fc)
1787 
1788 However, in testing we got the following order:
1789     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1790           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1791     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1792 .. (\u0113)
1793     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1794     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1795     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1796 .. (\u01d8)
1797       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1798 */
1799 
TestBefore(void)1800 static void TestBefore(void) {
1801   const static char *data[] = {
1802       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1803       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1804       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1805       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1806       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1807       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1808   };
1809   genericRulesStarter(
1810     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1811     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1812     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1813     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1814     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1815     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1816     data, sizeof(data)/sizeof(data[0]));
1817 }
1818 
1819 #if 0
1820 /* superceded by TestBeforePinyin */
1821 static void TestJ784(void) {
1822   const static char *data[] = {
1823       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1824       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1825       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1826       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1827       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1828       "\\u00fc",
1829            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1830   };
1831   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1832 }
1833 #endif
1834 
1835 #if 0
1836 /* superceded by the changes to the lv locale */
1837 static void TestJ831(void) {
1838   const static char *data[] = {
1839     "I",
1840       "i",
1841       "Y",
1842       "y"
1843   };
1844   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1845 }
1846 #endif
1847 
TestJ815(void)1848 static void TestJ815(void) {
1849   const static char *data[] = {
1850     "aa",
1851       "Aa",
1852       "ab",
1853       "Ab",
1854       "ad",
1855       "Ad",
1856       "ae",
1857       "Ae",
1858       "\\u00e6",
1859       "\\u00c6",
1860       "af",
1861       "Af",
1862       "b",
1863       "B"
1864   };
1865   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1866   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1867 }
1868 
1869 
1870 /*
1871 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1872 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1873 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1874 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1875 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1876 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1877 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1878 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1879 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1880 */
TestRedundantRules(void)1881 static void TestRedundantRules(void) {
1882   int32_t i;
1883 
1884   static const struct {
1885       const char *rules;
1886       const char *expectedRules;
1887       const char *testdata[8];
1888       uint32_t testdatalen;
1889   } tests[] = {
1890     /* this test conflicts with positioning of CODAN placeholder */
1891        /*{
1892         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1893         "&\\u2089<<<x",
1894         {"\\u2089", "x"}, 2
1895        }, */
1896     /* this test conflicts with the [before x] syntax tightening */
1897       /*{
1898         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1899         "&\\u0252<<<x",
1900         {"\\u0252", "x"}, 2
1901       }, */
1902     /* this test conflicts with the [before x] syntax tightening */
1903       /*{
1904          "& a < b <<< c << d <<< e& [before 1] e <<< x",
1905          "& a <<< x < b <<< c << d <<< e",
1906         {"a", "x", "b", "c", "d", "e"}, 6
1907       }, */
1908       {
1909         "& a < b < c < d& [before 1] c < m",
1910         "& a < b < m < c < d",
1911         {"a", "b", "m", "c", "d"}, 5
1912       },
1913       {
1914         "& a < b <<< c << d <<< e& [before 3] e <<< x",
1915         "& a < b <<< c << d <<< x <<< e",
1916         {"a", "b", "c", "d", "x", "e"}, 6
1917       },
1918     /* this test conflicts with the [before x] syntax tightening */
1919       /* {
1920         "& a < b <<< c << d <<< e& [before 2] e <<< x",
1921         "& a < b <<< c <<< x << d <<< e",
1922         {"a", "b", "c", "x", "d", "e"},, 6
1923       }, */
1924       {
1925         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1926         "& a < b <<< c << d <<< e <<< f < x < g",
1927         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1928       },
1929       {
1930         "& a <<< b << c < d& a < m",
1931         "& a <<< b << c < m < d",
1932         {"a", "b", "c", "m", "d"}, 5
1933       },
1934       {
1935         "&a<b<<b\\u0301 &z<b",
1936         "&a<b\\u0301 &z<b",
1937         {"a", "b\\u0301", "z", "b"}, 4
1938       },
1939       {
1940         "&z<m<<<q<<<m",
1941         "&z<q<<<m",
1942         {"z", "q", "m"},3
1943       },
1944       {
1945         "&z<<<m<q<<<m",
1946         "&z<q<<<m",
1947         {"z", "q", "m"}, 3
1948       },
1949       {
1950         "& a < b < c < d& r < c",
1951         "& a < b < d& r < c",
1952         {"a", "b", "d"}, 3
1953       },
1954       {
1955         "& a < b < c < d& r < c",
1956         "& a < b < d& r < c",
1957         {"r", "c"}, 2
1958       },
1959       {
1960         "& a < b < c < d& c < m",
1961         "& a < b < c < m < d",
1962         {"a", "b", "c", "m", "d"}, 5
1963       },
1964       {
1965         "& a < b < c < d& a < m",
1966         "& a < m < b < c < d",
1967         {"a", "m", "b", "c", "d"}, 5
1968       }
1969   };
1970 
1971 
1972   UCollator *credundant = NULL;
1973   UCollator *cresulting = NULL;
1974   UErrorCode status = U_ZERO_ERROR;
1975   UChar rlz[2048] = { 0 };
1976   uint32_t rlen = 0;
1977 
1978   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
1979     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
1980     rlen = u_unescape(tests[i].rules, rlz, 2048);
1981 
1982     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1983     if(status == U_FILE_ACCESS_ERROR) {
1984       log_data_err("Is your data around?\n");
1985       return;
1986     } else if(U_FAILURE(status)) {
1987       log_err("Error opening collator\n");
1988       return;
1989     }
1990 
1991     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
1992     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1993 
1994     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
1995 
1996     ucol_close(credundant);
1997     ucol_close(cresulting);
1998 
1999     log_verbose("testing using data\n");
2000 
2001     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2002   }
2003 
2004 }
2005 
TestExpansionSyntax(void)2006 static void TestExpansionSyntax(void) {
2007   int32_t i;
2008 
2009   const static char *rules[] = {
2010     "&AE <<< a << b <<< c &d <<< f",
2011     "&AE <<< a <<< b << c << d < e < f <<< g",
2012     "&AE <<< B <<< C / D <<< F"
2013   };
2014 
2015   const static char *expectedRules[] = {
2016     "&A <<< a / E << b / E <<< c /E  &d <<< f",
2017     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2018     "&A <<< B / E <<< C / ED <<< F / E"
2019   };
2020 
2021   const static char *testdata[][8] = {
2022     {"AE", "a", "b", "c"},
2023     {"AE", "a", "b", "c", "d", "e", "f", "g"},
2024     {"AE", "B", "C"} /* / ED <<< F / E"},*/
2025   };
2026 
2027   const static uint32_t testdatalen[] = {
2028       4,
2029       8,
2030       3
2031   };
2032 
2033 
2034 
2035   UCollator *credundant = NULL;
2036   UCollator *cresulting = NULL;
2037   UErrorCode status = U_ZERO_ERROR;
2038   UChar rlz[2048] = { 0 };
2039   uint32_t rlen = 0;
2040 
2041   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2042     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2043     rlen = u_unescape(rules[i], rlz, 2048);
2044 
2045     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2046     if(status == U_FILE_ACCESS_ERROR) {
2047       log_data_err("Is your data around?\n");
2048       return;
2049     } else if(U_FAILURE(status)) {
2050       log_err("Error opening collator\n");
2051       return;
2052     }
2053     rlen = u_unescape(expectedRules[i], rlz, 2048);
2054     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2055 
2056     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2057     /* as a hard error test, but only in information mode */
2058     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2059 
2060     ucol_close(credundant);
2061     ucol_close(cresulting);
2062 
2063     log_verbose("testing using data\n");
2064 
2065     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2066   }
2067 }
2068 
TestCase(void)2069 static void TestCase(void)
2070 {
2071     const static UChar gRules[MAX_TOKEN_LEN] =
2072     /*" & 0 < 1,\u2461<a,A"*/
2073     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2074 
2075     const static UChar testCase[][MAX_TOKEN_LEN] =
2076     {
2077         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2078         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2079         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2080         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2081     };
2082 
2083     const static UCollationResult caseTestResults[][9] =
2084     {
2085         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2086         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2087         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2088         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2089     };
2090 
2091     const static UColAttributeValue caseTestAttributes[][2] =
2092     {
2093         { UCOL_LOWER_FIRST, UCOL_OFF},
2094         { UCOL_UPPER_FIRST, UCOL_OFF},
2095         { UCOL_LOWER_FIRST, UCOL_ON},
2096         { UCOL_UPPER_FIRST, UCOL_ON}
2097     };
2098     int32_t i,j,k;
2099     UErrorCode status = U_ZERO_ERROR;
2100     UCollationElements *iter;
2101     UCollator  *myCollation;
2102     myCollation = ucol_open("en_US", &status);
2103 
2104     if(U_FAILURE(status)){
2105         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2106         return;
2107     }
2108     log_verbose("Testing different case settings\n");
2109     ucol_setStrength(myCollation, UCOL_TERTIARY);
2110 
2111     for(k = 0; k<4; k++) {
2112       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2113       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2114       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2115       for (i = 0; i < 3 ; i++) {
2116         for(j = i+1; j<4; j++) {
2117           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2118         }
2119       }
2120     }
2121     ucol_close(myCollation);
2122 
2123     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2124     if(U_FAILURE(status)){
2125         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2126         return;
2127     }
2128     log_verbose("Testing different case settings with custom rules\n");
2129     ucol_setStrength(myCollation, UCOL_TERTIARY);
2130 
2131     for(k = 0; k<4; k++) {
2132       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2133       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2134       for (i = 0; i < 3 ; i++) {
2135         for(j = i+1; j<4; j++) {
2136           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2137           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2138           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2139           backAndForth(iter);
2140           ucol_closeElements(iter);
2141           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2142           backAndForth(iter);
2143           ucol_closeElements(iter);
2144         }
2145       }
2146     }
2147     ucol_close(myCollation);
2148     {
2149       const static char *lowerFirst[] = {
2150         "h",
2151         "H",
2152         "ch",
2153         "Ch",
2154         "CH",
2155         "cha",
2156         "chA",
2157         "Cha",
2158         "ChA",
2159         "CHa",
2160         "CHA",
2161         "i",
2162         "I"
2163       };
2164 
2165       const static char *upperFirst[] = {
2166         "H",
2167         "h",
2168         "CH",
2169         "Ch",
2170         "ch",
2171         "CHA",
2172         "CHa",
2173         "ChA",
2174         "Cha",
2175         "chA",
2176         "cha",
2177         "I",
2178         "i"
2179       };
2180       log_verbose("mixed case test\n");
2181       log_verbose("lower first, case level off\n");
2182       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2183       log_verbose("upper first, case level off\n");
2184       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2185       log_verbose("lower first, case level on\n");
2186       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2187       log_verbose("upper first, case level on\n");
2188       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2189     }
2190 
2191 }
2192 
TestIncrementalNormalize(void)2193 static void TestIncrementalNormalize(void) {
2194 
2195     /*UChar baseA     =0x61;*/
2196     UChar baseA     =0x41;
2197 /*    UChar baseB     = 0x42;*/
2198     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
2199     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2200     /*
2201         0x316 is combining grave accent below, cc=220
2202         0x321 is combining palatalized hook below, cc=202
2203         0x300 is combining grave accent, cc=230
2204     */
2205 
2206 #define MAXSLEN 2000
2207     /*int          maxSLen   = 64000;*/
2208     int          sLen;
2209     int          i;
2210 
2211     UCollator        *coll;
2212     UErrorCode       status = U_ZERO_ERROR;
2213     UCollationResult result;
2214 
2215     int32_t myQ = QUICK;
2216 
2217     if(QUICK < 0) {
2218       QUICK = 1;
2219     }
2220 
2221     {
2222         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2223         /*          most buffers along the way.*/
2224         UChar            strA[MAXSLEN+1];
2225         UChar            strB[MAXSLEN+1];
2226 
2227         coll = ucol_open("en_US", &status);
2228         if(status == U_FILE_ACCESS_ERROR) {
2229           log_data_err("Is your data around?\n");
2230           return;
2231         } else if(U_FAILURE(status)) {
2232           log_err("Error opening collator\n");
2233           return;
2234         }
2235         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2236 
2237         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2238         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2239         /*for (sLen = 1000; sLen<1001; sLen++) {*/
2240         for (sLen = 500; sLen<501; sLen++) {
2241         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2242             strA[0] = baseA;
2243             strB[0] = baseA;
2244             for (i=1; i<=sLen-1; i++) {
2245                 strA[i] = ccMix[i % 3];
2246                 strB[sLen-i] = ccMix[i % 3];
2247             }
2248             strA[sLen]   = 0;
2249             strB[sLen]   = 0;
2250 
2251             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2252             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2253             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2254             doTest(coll, strA, strB, UCOL_EQUAL);
2255         }
2256     }
2257 
2258     QUICK = myQ;
2259 
2260 
2261     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2262     /*         of the string.  Checks a couple of edge cases.*/
2263 
2264     {
2265         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2266         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2267         ucol_setStrength(coll, UCOL_TERTIARY);
2268         doTest(coll, strA, strB, UCOL_EQUAL);
2269     }
2270 
2271     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2272 
2273     {
2274       /* New UCA  3.1.1.
2275        * test below used a code point from Desseret, which sorts differently
2276        * than d800 dc00
2277        */
2278         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2279         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2280         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2281         ucol_setStrength(coll, UCOL_TERTIARY);
2282         doTest(coll, strA, strB, UCOL_GREATER);
2283     }
2284 
2285     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2286 
2287     {
2288         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2289         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2290         char  sortKeyA[50];
2291         char  sortKeyAz[50];
2292         char  sortKeyB[50];
2293         char  sortKeyBz[50];
2294         int   r;
2295 
2296         /* there used to be -3 here. Hmmmm.... */
2297         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2298         result = ucol_strcoll(coll, strA, 3, strB, 3);
2299         if (result != UCOL_GREATER) {
2300             log_err("ERROR 1 in test 4\n");
2301         }
2302         result = ucol_strcoll(coll, strA, -1, strB, -1);
2303         if (result != UCOL_EQUAL) {
2304             log_err("ERROR 2 in test 4\n");
2305         }
2306 
2307         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2308         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2309         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2310         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2311 
2312         r = strcmp(sortKeyA, sortKeyAz);
2313         if (r <= 0) {
2314             log_err("Error 3 in test 4\n");
2315         }
2316         r = strcmp(sortKeyA, sortKeyB);
2317         if (r <= 0) {
2318             log_err("Error 4 in test 4\n");
2319         }
2320         r = strcmp(sortKeyAz, sortKeyBz);
2321         if (r != 0) {
2322             log_err("Error 5 in test 4\n");
2323         }
2324 
2325         ucol_setStrength(coll, UCOL_IDENTICAL);
2326         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2327         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2328         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2329         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2330 
2331         r = strcmp(sortKeyA, sortKeyAz);
2332         if (r <= 0) {
2333             log_err("Error 6 in test 4\n");
2334         }
2335         r = strcmp(sortKeyA, sortKeyB);
2336         if (r <= 0) {
2337             log_err("Error 7 in test 4\n");
2338         }
2339         r = strcmp(sortKeyAz, sortKeyBz);
2340         if (r != 0) {
2341             log_err("Error 8 in test 4\n");
2342         }
2343         ucol_setStrength(coll, UCOL_TERTIARY);
2344     }
2345 
2346 
2347     /*  Test 5:  Null characters in non-normal source strings.*/
2348 
2349     {
2350         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2351         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2352         char  sortKeyA[50];
2353         char  sortKeyAz[50];
2354         char  sortKeyB[50];
2355         char  sortKeyBz[50];
2356         int   r;
2357 
2358         result = ucol_strcoll(coll, strA, 6, strB, 6);
2359         if (result != UCOL_GREATER) {
2360             log_err("ERROR 1 in test 5\n");
2361         }
2362         result = ucol_strcoll(coll, strA, -1, strB, -1);
2363         if (result != UCOL_EQUAL) {
2364             log_err("ERROR 2 in test 5\n");
2365         }
2366 
2367         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2368         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2369         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2370         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2371 
2372         r = strcmp(sortKeyA, sortKeyAz);
2373         if (r <= 0) {
2374             log_err("Error 3 in test 5\n");
2375         }
2376         r = strcmp(sortKeyA, sortKeyB);
2377         if (r <= 0) {
2378             log_err("Error 4 in test 5\n");
2379         }
2380         r = strcmp(sortKeyAz, sortKeyBz);
2381         if (r != 0) {
2382             log_err("Error 5 in test 5\n");
2383         }
2384 
2385         ucol_setStrength(coll, UCOL_IDENTICAL);
2386         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2387         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2388         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2389         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2390 
2391         r = strcmp(sortKeyA, sortKeyAz);
2392         if (r <= 0) {
2393             log_err("Error 6 in test 5\n");
2394         }
2395         r = strcmp(sortKeyA, sortKeyB);
2396         if (r <= 0) {
2397             log_err("Error 7 in test 5\n");
2398         }
2399         r = strcmp(sortKeyAz, sortKeyBz);
2400         if (r != 0) {
2401             log_err("Error 8 in test 5\n");
2402         }
2403         ucol_setStrength(coll, UCOL_TERTIARY);
2404     }
2405 
2406 
2407     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2408 
2409     {
2410         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2411         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2412 
2413         result = ucol_strcoll(coll, strA, 5, strB, 5);
2414         if (result != UCOL_LESS) {
2415             log_err("Error 1 in test 6\n");
2416         }
2417         result = ucol_strcoll(coll, strA, -1, strB, -1);
2418         if (result != UCOL_EQUAL) {
2419             log_err("Error 2 in test 6\n");
2420         }
2421     }
2422 
2423     ucol_close(coll);
2424 }
2425 
2426 
2427 
2428 #if 0
2429 static void TestGetCaseBit(void) {
2430   static const char *caseBitData[] = {
2431     "a", "A", "ch", "Ch", "CH",
2432       "\\uFF9E", "\\u0009"
2433   };
2434 
2435   static const uint8_t results[] = {
2436     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2437       UCOL_UPPER_CASE, UCOL_LOWER_CASE
2438   };
2439 
2440   uint32_t i, blen = 0;
2441   UChar b[256] = {0};
2442   UErrorCode status = U_ZERO_ERROR;
2443   UCollator *UCA = ucol_open("", &status);
2444   uint8_t res = 0;
2445 
2446   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2447     blen = u_unescape(caseBitData[i], b, 256);
2448     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2449     if(results[i] != res) {
2450       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2451     }
2452   }
2453 }
2454 #endif
2455 
TestHangulTailoring(void)2456 static void TestHangulTailoring(void) {
2457     static const char *koreanData[] = {
2458         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2459             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2460             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2461             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2462             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2463             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2464     };
2465 
2466     const char *rules =
2467         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2468         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2469         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2470         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2471         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2472         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2473 
2474 
2475   UErrorCode status = U_ZERO_ERROR;
2476   UChar rlz[2048] = { 0 };
2477   uint32_t rlen = u_unescape(rules, rlz, 2048);
2478 
2479   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2480   if(status == U_FILE_ACCESS_ERROR) {
2481     log_data_err("Is your data around?\n");
2482     return;
2483   } else if(U_FAILURE(status)) {
2484     log_err("Error opening collator\n");
2485     return;
2486   }
2487 
2488   log_verbose("Using start of korean rules\n");
2489 
2490   if(U_SUCCESS(status)) {
2491     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2492   } else {
2493     log_err("Unable to open collator with rules %s\n", rules);
2494   }
2495 
2496   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2497   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2498   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2499 
2500   ucol_close(coll);
2501 
2502   log_verbose("Using ko__LOTUS locale\n");
2503   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2504 }
2505 
TestCompressOverlap(void)2506 static void TestCompressOverlap(void) {
2507     UChar       secstr[150];
2508     UChar       tertstr[150];
2509     UErrorCode  status = U_ZERO_ERROR;
2510     UCollator  *coll;
2511     char        result[200];
2512     uint32_t    resultlen;
2513     int         count = 0;
2514     char       *tempptr;
2515 
2516     coll = ucol_open("", &status);
2517 
2518     if (U_FAILURE(status)) {
2519         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2520         return;
2521     }
2522     while (count < 149) {
2523         secstr[count] = 0x0020; /* [06, 05, 05] */
2524         tertstr[count] = 0x0020;
2525         count ++;
2526     }
2527 
2528     /* top down compression ----------------------------------- */
2529     secstr[count] = 0x0332; /* [, 87, 05] */
2530     tertstr[count] = 0x3000; /* [06, 05, 07] */
2531 
2532     /* no compression secstr should have 150 secondary bytes, tertstr should
2533     have 150 tertiary bytes.
2534     with correct overlapping compression, secstr should have 4 secondary
2535     bytes, tertstr should have > 2 tertiary bytes */
2536     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2537     tempptr = uprv_strchr(result, 1) + 1;
2538     while (*(tempptr + 1) != 1) {
2539         /* the last secondary collation element is not checked since it is not
2540         part of the compression */
2541         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2542             log_err("Secondary compression overlapped\n");
2543         }
2544         tempptr ++;
2545     }
2546 
2547     /* tertiary top/bottom/common for en_US is similar to the secondary
2548     top/bottom/common */
2549     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2550     tempptr = uprv_strrchr(result, 1) + 1;
2551     while (*(tempptr + 1) != 0) {
2552         /* the last secondary collation element is not checked since it is not
2553         part of the compression */
2554         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2555             log_err("Tertiary compression overlapped\n");
2556         }
2557         tempptr ++;
2558     }
2559 
2560     /* bottom up compression ------------------------------------- */
2561     secstr[count] = 0;
2562     tertstr[count] = 0;
2563     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2564     tempptr = uprv_strchr(result, 1) + 1;
2565     while (*(tempptr + 1) != 1) {
2566         /* the last secondary collation element is not checked since it is not
2567         part of the compression */
2568         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2569             log_err("Secondary compression overlapped\n");
2570         }
2571         tempptr ++;
2572     }
2573 
2574     /* tertiary top/bottom/common for en_US is similar to the secondary
2575     top/bottom/common */
2576     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2577     tempptr = uprv_strrchr(result, 1) + 1;
2578     while (*(tempptr + 1) != 0) {
2579         /* the last secondary collation element is not checked since it is not
2580         part of the compression */
2581         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2582             log_err("Tertiary compression overlapped\n");
2583         }
2584         tempptr ++;
2585     }
2586 
2587     ucol_close(coll);
2588 }
2589 
TestCyrillicTailoring(void)2590 static void TestCyrillicTailoring(void) {
2591   static const char *test[] = {
2592     "\\u0410b",
2593       "\\u0410\\u0306a",
2594       "\\u04d0A"
2595   };
2596 
2597     /* Russian overrides contractions, so this test is not valid anymore */
2598     /*genericLocaleStarter("ru", test, 3);*/
2599 
2600     genericLocaleStarter("root", test, 3);
2601     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2602     genericRulesStarter("&Z < \\u0410", test, 3);
2603     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2604     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2605     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2606     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2607 }
2608 
TestSuppressContractions(void)2609 static void TestSuppressContractions(void) {
2610 
2611   static const char *testNoCont2[] = {
2612       "\\u0410\\u0302a",
2613       "\\u0410\\u0306b",
2614       "\\u0410c"
2615   };
2616   static const char *testNoCont[] = {
2617       "a\\u0410",
2618       "A\\u0410\\u0306",
2619       "\\uFF21\\u0410\\u0302"
2620   };
2621 
2622   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2623   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2624 }
2625 
TestContraction(void)2626 static void TestContraction(void) {
2627     const static char *testrules[] = {
2628         "&A = AB / B",
2629         "&A = A\\u0306/\\u0306",
2630         "&c = ch / h"
2631     };
2632     const static UChar testdata[][2] = {
2633         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2634         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2635         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2636     };
2637     const static UChar testdata2[][2] = {
2638         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2639         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2640         {0x0063 /* 'c' */, 0x006C /* 'l' */}
2641     };
2642     const static char *testrules3[] = {
2643         "&z < xyz &xyzw << B",
2644         "&z < xyz &xyz << B / w",
2645         "&z < ch &achm << B",
2646         "&z < ch &a << B / chm",
2647         "&\\ud800\\udc00w << B",
2648         "&\\ud800\\udc00 << B / w",
2649         "&a\\ud800\\udc00m << B",
2650         "&a << B / \\ud800\\udc00m",
2651     };
2652 
2653     UErrorCode  status   = U_ZERO_ERROR;
2654     UCollator  *coll;
2655     UChar       rule[256] = {0};
2656     uint32_t    rlen     = 0;
2657     int         i;
2658 
2659     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2660         UCollationElements *iter1;
2661         int j = 0;
2662         log_verbose("Rule %s for testing\n", testrules[i]);
2663         rlen = u_unescape(testrules[i], rule, 32);
2664         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2665         if (U_FAILURE(status)) {
2666             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2667             return;
2668         }
2669         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2670         if (U_FAILURE(status)) {
2671             log_err("Collation iterator creation failed\n");
2672             return;
2673         }
2674         while (j < 2) {
2675             UCollationElements *iter2 = ucol_openElements(coll,
2676                                                          &(testdata[i][j]),
2677                                                          1, &status);
2678             uint32_t ce;
2679             if (U_FAILURE(status)) {
2680                 log_err("Collation iterator creation failed\n");
2681                 return;
2682             }
2683             ce = ucol_next(iter2, &status);
2684             while (ce != UCOL_NULLORDER) {
2685                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2686                     log_err("Collation elements in contraction split does not match\n");
2687                     return;
2688                 }
2689                 ce = ucol_next(iter2, &status);
2690             }
2691             j ++;
2692             ucol_closeElements(iter2);
2693         }
2694         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2695             log_err("Collation elements not exhausted\n");
2696             return;
2697         }
2698         ucol_closeElements(iter1);
2699         ucol_close(coll);
2700     }
2701 
2702     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2703     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2704     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2705         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2706                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2707                 testdata2[1][1]);
2708         return;
2709     }
2710     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2711         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2712                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2713                 testdata2[2][1]);
2714         return;
2715     }
2716     ucol_close(coll);
2717 
2718     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2719         UCollator          *coll1,
2720                            *coll2;
2721         UCollationElements *iter1,
2722                            *iter2;
2723         UChar               ch = 0x0042 /* 'B' */;
2724         uint32_t            ce;
2725         rlen = u_unescape(testrules3[i], rule, 32);
2726         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2727         rlen = u_unescape(testrules3[i + 1], rule, 32);
2728         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2729         if (U_FAILURE(status)) {
2730             log_err("Collator creation failed %s\n", testrules[i]);
2731             return;
2732         }
2733         iter1 = ucol_openElements(coll1, &ch, 1, &status);
2734         iter2 = ucol_openElements(coll2, &ch, 1, &status);
2735         if (U_FAILURE(status)) {
2736             log_err("Collation iterator creation failed\n");
2737             return;
2738         }
2739         ce = ucol_next(iter1, &status);
2740         if (U_FAILURE(status)) {
2741             log_err("Retrieving ces failed\n");
2742             return;
2743         }
2744         while (ce != UCOL_NULLORDER) {
2745             if (ce != (uint32_t)ucol_next(iter2, &status)) {
2746                 log_err("CEs does not match\n");
2747                 return;
2748             }
2749             ce = ucol_next(iter1, &status);
2750             if (U_FAILURE(status)) {
2751                 log_err("Retrieving ces failed\n");
2752                 return;
2753             }
2754         }
2755         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2756             log_err("CEs not exhausted\n");
2757             return;
2758         }
2759         ucol_closeElements(iter1);
2760         ucol_closeElements(iter2);
2761         ucol_close(coll1);
2762         ucol_close(coll2);
2763     }
2764 }
2765 
TestExpansion(void)2766 static void TestExpansion(void) {
2767     const static char *testrules[] = {
2768         "&J << K / B & K << M",
2769         "&J << K / B << M"
2770     };
2771     const static UChar testdata[][3] = {
2772         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2773         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2774         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2775         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2776         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2777         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2778     };
2779 
2780     UErrorCode  status   = U_ZERO_ERROR;
2781     UCollator  *coll;
2782     UChar       rule[256] = {0};
2783     uint32_t    rlen     = 0;
2784     int         i;
2785 
2786     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2787         int j = 0;
2788         log_verbose("Rule %s for testing\n", testrules[i]);
2789         rlen = u_unescape(testrules[i], rule, 32);
2790         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2791         if (U_FAILURE(status)) {
2792             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2793             return;
2794         }
2795 
2796         for (j = 0; j < 5; j ++) {
2797             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2798         }
2799         ucol_close(coll);
2800     }
2801 }
2802 
2803 #if 0
2804 /* this test tests the current limitations of the engine */
2805 /* it always fail, so it is disabled by default */
2806 static void TestLimitations(void) {
2807   /* recursive expansions */
2808   {
2809     static const char *rule = "&a=b/c&d=c/e";
2810     static const char *tlimit01[] = {"add","b","adf"};
2811     static const char *tlimit02[] = {"aa","b","af"};
2812     log_verbose("recursive expansions\n");
2813     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2814     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2815   }
2816   /* contractions spanning expansions */
2817   {
2818     static const char *rule = "&a<<<c/e&g<<<eh";
2819     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2820     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2821     log_verbose("contractions spanning expansions\n");
2822     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2823     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2824   }
2825   /* normalization: nulls in contractions */
2826   {
2827     static const char *rule = "&a<<<\\u0000\\u0302";
2828     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2829     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2830     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2831     static const UColAttributeValue valOn[] = { UCOL_ON };
2832     static const UColAttributeValue valOff[] = { UCOL_OFF };
2833 
2834     log_verbose("NULL in contractions\n");
2835     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2836     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2837     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2838     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2839 
2840   }
2841   /* normalization: contractions spanning normalization */
2842   {
2843     static const char *rule = "&a<<<\\u0000\\u0302";
2844     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2845     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2846     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2847     static const UColAttributeValue valOn[] = { UCOL_ON };
2848     static const UColAttributeValue valOff[] = { UCOL_OFF };
2849 
2850     log_verbose("contractions spanning normalization\n");
2851     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2852     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2853     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2854     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2855 
2856   }
2857   /* variable top:  */
2858   {
2859     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2860     static const char *rule = "&\\u2010<x<[variable top]=z";
2861     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2862     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2863     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2864     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2865     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2866     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2867     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2868 
2869     log_verbose("variable top\n");
2870     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2871     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2872     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2873     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2874     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2875 
2876   }
2877   /* case level */
2878   {
2879     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2880     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2881     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2882     static const UColAttribute att[] = { UCOL_CASE_FIRST};
2883     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2884     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2885     log_verbose("case level\n");
2886     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2887     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2888     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2889     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2890   }
2891 
2892 }
2893 #endif
2894 
TestBocsuCoverage(void)2895 static void TestBocsuCoverage(void) {
2896   UErrorCode status = U_ZERO_ERROR;
2897   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2898   UChar       test[256] = {0};
2899   uint32_t    tlen     = u_unescape(testString, test, 32);
2900   uint8_t key[256]     = {0};
2901   uint32_t klen         = 0;
2902 
2903   UCollator *coll = ucol_open("", &status);
2904   if(U_SUCCESS(status)) {
2905   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2906 
2907   klen = ucol_getSortKey(coll, test, tlen, key, 256);
2908 
2909   ucol_close(coll);
2910   } else {
2911     log_data_err("Couldn't open UCA\n");
2912   }
2913 }
2914 
TestVariableTopSetting(void)2915 static void TestVariableTopSetting(void) {
2916   UErrorCode status = U_ZERO_ERROR;
2917   const UChar *current = NULL;
2918   uint32_t varTopOriginal = 0, varTop1, varTop2;
2919   UCollator *coll = ucol_open("", &status);
2920   if(U_SUCCESS(status)) {
2921 
2922   uint32_t strength = 0;
2923   uint16_t specs = 0;
2924   uint32_t chOffset = 0;
2925   uint32_t chLen = 0;
2926   uint32_t exOffset = 0;
2927   uint32_t exLen = 0;
2928   uint32_t oldChOffset = 0;
2929   uint32_t oldChLen = 0;
2930   uint32_t oldExOffset = 0;
2931   uint32_t oldExLen = 0;
2932   uint32_t prefixOffset = 0;
2933   uint32_t prefixLen = 0;
2934 
2935   UBool startOfRules = TRUE;
2936   UColTokenParser src;
2937   UColOptionSet opts;
2938 
2939   UChar *rulesCopy = NULL;
2940   uint32_t rulesLen;
2941 
2942   UCollationResult result;
2943 
2944   UChar first[256] = { 0 };
2945   UChar second[256] = { 0 };
2946   UParseError parseError;
2947   int32_t myQ = QUICK;
2948 
2949   src.opts = &opts;
2950 
2951   if(QUICK <= 0) {
2952     QUICK = 1;
2953   }
2954 
2955   /* this test will fail when normalization is turned on */
2956   /* therefore we always turn off exhaustive mode for it */
2957   { /* QUICK > 0*/
2958     log_verbose("Slide variable top over UCARules\n");
2959     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2960     rulesCopy = (UChar *)malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
2961     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
2962 
2963     if(U_SUCCESS(status) && rulesLen > 0) {
2964       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2965       src.current = src.source = rulesCopy;
2966       src.end = rulesCopy+rulesLen;
2967       src.extraCurrent = src.end;
2968       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
2969 
2970       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
2971         strength = src.parsedToken.strength;
2972         chOffset = src.parsedToken.charsOffset;
2973         chLen = src.parsedToken.charsLen;
2974         exOffset = src.parsedToken.extensionOffset;
2975         exLen = src.parsedToken.extensionLen;
2976         prefixOffset = src.parsedToken.prefixOffset;
2977         prefixLen = src.parsedToken.prefixLen;
2978         specs = src.parsedToken.flags;
2979 
2980         startOfRules = FALSE;
2981         {
2982           log_verbose("%04X %d ", *(rulesCopy+chOffset), chLen);
2983         }
2984         if(strength == UCOL_PRIMARY) {
2985           status = U_ZERO_ERROR;
2986           varTopOriginal = ucol_getVariableTop(coll, &status);
2987           varTop1 = ucol_setVariableTop(coll, rulesCopy+oldChOffset, oldChLen, &status);
2988           if(U_FAILURE(status)) {
2989             char buffer[256];
2990             char *buf = buffer;
2991             uint32_t i = 0, j;
2992             uint32_t CE = UCOL_NO_MORE_CES;
2993 
2994             /* before we start screaming, let's see if there is a problem with the rules */
2995             collIterate s;
2996             uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s);
2997 
2998             CE = ucol_getNextCE(coll, &s, &status);
2999 
3000             for(i = 0; i < oldChLen; i++) {
3001               j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i));
3002               buf += j;
3003             }
3004             if(status == U_PRIMARY_TOO_LONG_ERROR) {
3005               log_verbose("= Expected failure for %s =", buffer);
3006             } else {
3007               if(s.pos == s.endp) {
3008                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3009                   oldChOffset, u_errorName(status), buffer);
3010               } else {
3011                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3012                   buffer);
3013               }
3014             }
3015           }
3016           varTop2 = ucol_getVariableTop(coll, &status);
3017           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3018             log_err("cannot retrieve set varTop value!\n");
3019             continue;
3020           }
3021 
3022           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3023 
3024             u_strncpy(first, rulesCopy+oldChOffset, oldChLen);
3025             u_strncpy(first+oldChLen, rulesCopy+chOffset, chLen);
3026             u_strncpy(first+oldChLen+chLen, rulesCopy+oldChOffset, oldChLen);
3027             first[2*oldChLen+chLen] = 0;
3028 
3029             if(oldExLen == 0) {
3030               u_strncpy(second, rulesCopy+chOffset, chLen);
3031               second[chLen] = 0;
3032             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3033               u_strncpy(second, rulesCopy+oldExOffset, oldExLen);
3034               u_strncpy(second+oldChLen, rulesCopy+chOffset, chLen);
3035               u_strncpy(second+oldChLen+chLen, rulesCopy+oldExOffset, oldExLen);
3036               second[2*oldExLen+chLen] = 0;
3037             }
3038             result = ucol_strcoll(coll, first, -1, second, -1);
3039             if(result == UCOL_EQUAL) {
3040               doTest(coll, first, second, UCOL_EQUAL);
3041             } else {
3042               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy+oldChOffset), *(rulesCopy+chOffset));
3043             }
3044           }
3045         }
3046         if(strength != UCOL_TOK_RESET) {
3047           oldChOffset = chOffset;
3048           oldChLen = chLen;
3049           oldExOffset = exOffset;
3050           oldExLen = exLen;
3051         }
3052       }
3053       status = U_ZERO_ERROR;
3054     }
3055     else {
3056       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3057       return;
3058     }
3059     if (U_FAILURE(status)) {
3060         log_err("Error parsing rules %s\n", u_errorName(status));
3061         return;
3062     }
3063     status = U_ZERO_ERROR;
3064   }
3065 
3066   QUICK = myQ;
3067 
3068   log_verbose("Testing setting variable top to contractions\n");
3069   {
3070     /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3071     /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3072     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3073     while(*conts != 0) {
3074       if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */
3075         varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3076       } else {
3077         varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3078       }
3079       if(U_FAILURE(status)) {
3080         log_err("Couldn't set variable top to a contraction %04X %04X %04X\n",
3081           *conts, *(conts+1), *(conts+2));
3082         status = U_ZERO_ERROR;
3083       }
3084       conts+=3;
3085     }
3086 
3087     status = U_ZERO_ERROR;
3088 
3089     first[0] = 0x0040;
3090     first[1] = 0x0050;
3091     first[2] = 0x0000;
3092 
3093     ucol_setVariableTop(coll, first, -1, &status);
3094 
3095     if(U_SUCCESS(status)) {
3096       log_err("Invalid contraction succeded in setting variable top!\n");
3097     }
3098 
3099   }
3100 
3101   log_verbose("Test restoring variable top\n");
3102 
3103   status = U_ZERO_ERROR;
3104   ucol_restoreVariableTop(coll, varTopOriginal, &status);
3105   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3106     log_err("Couldn't restore old variable top\n");
3107   }
3108 
3109   log_verbose("Testing calling with error set\n");
3110 
3111   status = U_INTERNAL_PROGRAM_ERROR;
3112   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3113   varTop2 = ucol_getVariableTop(coll, &status);
3114   ucol_restoreVariableTop(coll, varTop2, &status);
3115   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3116   varTop2 = ucol_getVariableTop(NULL, &status);
3117   ucol_restoreVariableTop(NULL, varTop2, &status);
3118   if(status != U_INTERNAL_PROGRAM_ERROR) {
3119     log_err("Bad reaction to passed error!\n");
3120   }
3121   free(rulesCopy);
3122   ucol_close(coll);
3123   } else {
3124     log_data_err("Couldn't open UCA collator\n");
3125   }
3126 
3127 }
3128 
TestNonChars(void)3129 static void TestNonChars(void) {
3130   static const char *test[] = {
3131     "\\u0000",
3132     "\\uFFFE", "\\uFFFF",
3133       "\\U0001FFFE", "\\U0001FFFF",
3134       "\\U0002FFFE", "\\U0002FFFF",
3135       "\\U0003FFFE", "\\U0003FFFF",
3136       "\\U0004FFFE", "\\U0004FFFF",
3137       "\\U0005FFFE", "\\U0005FFFF",
3138       "\\U0006FFFE", "\\U0006FFFF",
3139       "\\U0007FFFE", "\\U0007FFFF",
3140       "\\U0008FFFE", "\\U0008FFFF",
3141       "\\U0009FFFE", "\\U0009FFFF",
3142       "\\U000AFFFE", "\\U000AFFFF",
3143       "\\U000BFFFE", "\\U000BFFFF",
3144       "\\U000CFFFE", "\\U000CFFFF",
3145       "\\U000DFFFE", "\\U000DFFFF",
3146       "\\U000EFFFE", "\\U000EFFFF",
3147       "\\U000FFFFE", "\\U000FFFFF",
3148       "\\U0010FFFE", "\\U0010FFFF"
3149   };
3150   UErrorCode status = U_ZERO_ERROR;
3151   UCollator *coll = ucol_open("en_US", &status);
3152 
3153   log_verbose("Test non characters\n");
3154 
3155   if(U_SUCCESS(status)) {
3156     genericOrderingTestWithResult(coll, test, 35, UCOL_EQUAL);
3157   } else {
3158     log_err_status(status, "Unable to open collator\n");
3159   }
3160 
3161   ucol_close(coll);
3162 }
3163 
TestExtremeCompression(void)3164 static void TestExtremeCompression(void) {
3165   static char *test[4];
3166   int32_t j = 0, i = 0;
3167 
3168   for(i = 0; i<4; i++) {
3169     test[i] = (char *)malloc(2048*sizeof(char));
3170   }
3171 
3172   for(j = 20; j < 500; j++) {
3173     for(i = 0; i<4; i++) {
3174       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3175       test[i][j-1] = (char)('a'+i);
3176       test[i][j] = 0;
3177     }
3178     genericLocaleStarter("en_US", (const char **)test, 4);
3179   }
3180 
3181 
3182   for(i = 0; i<4; i++) {
3183     free(test[i]);
3184   }
3185 }
3186 
3187 #if 0
3188 static void TestExtremeCompression(void) {
3189   static char *test[4];
3190   int32_t j = 0, i = 0;
3191   UErrorCode status = U_ZERO_ERROR;
3192   UCollator *coll = ucol_open("en_US", status);
3193   for(i = 0; i<4; i++) {
3194     test[i] = (char *)malloc(2048*sizeof(char));
3195   }
3196   for(j = 10; j < 2048; j++) {
3197     for(i = 0; i<4; i++) {
3198       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3199       test[i][j-1] = (char)('a'+i);
3200       test[i][j] = 0;
3201     }
3202   }
3203   genericLocaleStarter("en_US", (const char **)test, 4);
3204 
3205   for(j = 10; j < 2048; j++) {
3206     for(i = 0; i<1; i++) {
3207       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3208       test[i][j] = 0;
3209     }
3210   }
3211   for(i = 0; i<4; i++) {
3212     free(test[i]);
3213   }
3214 }
3215 #endif
3216 
TestSurrogates(void)3217 static void TestSurrogates(void) {
3218   static const char *test[] = {
3219     "z","\\ud900\\udc25",  "\\ud805\\udc50",
3220        "\\ud800\\udc00y",  "\\ud800\\udc00r",
3221        "\\ud800\\udc00f",  "\\ud800\\udc00",
3222        "\\ud800\\udc00c", "\\ud800\\udc00b",
3223        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3224        "\\ud800\\udc00a",
3225        "c", "b"
3226   };
3227 
3228   static const char *rule =
3229     "&z < \\ud900\\udc25   < \\ud805\\udc50"
3230        "< \\ud800\\udc00y  < \\ud800\\udc00r"
3231        "< \\ud800\\udc00f  << \\ud800\\udc00"
3232        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3233        "< \\ud800\\udc00a  < c < b" ;
3234 
3235   genericRulesStarter(rule, test, 14);
3236 }
3237 
3238 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)3239 static void TestPrefix(void) {
3240   uint32_t i;
3241 
3242   static const struct {
3243     const char *rules;
3244     const char *data[50];
3245     const uint32_t len;
3246   } tests[] = {
3247     { "&z <<< z|a",
3248       {"zz", "za"}, 2 },
3249 
3250     { "&z <<< z|   a",
3251       {"zz", "za"}, 2 },
3252     { "[strength I]"
3253       "&a=\\ud900\\udc25"
3254       "&z<<<\\ud900\\udc25|a",
3255       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3256   };
3257 
3258 
3259   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3260     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3261   }
3262 }
3263 
3264 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3265 /* JIS X 4061 collation order implementation                                   */
TestNewJapanese(void)3266 static void TestNewJapanese(void) {
3267 
3268   static const char * const test1[] = {
3269       "\\u30b7\\u30e3\\u30fc\\u30ec",
3270       "\\u30b7\\u30e3\\u30a4",
3271       "\\u30b7\\u30e4\\u30a3",
3272       "\\u30b7\\u30e3\\u30ec",
3273       "\\u3061\\u3087\\u3053",
3274       "\\u3061\\u3088\\u3053",
3275       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3276       "\\u3066\\u30fc\\u305f",
3277       "\\u30c6\\u30fc\\u30bf",
3278       "\\u30c6\\u30a7\\u30bf",
3279       "\\u3066\\u3048\\u305f",
3280       "\\u3067\\u30fc\\u305f",
3281       "\\u30c7\\u30fc\\u30bf",
3282       "\\u30c7\\u30a7\\u30bf",
3283       "\\u3067\\u3048\\u305f",
3284       "\\u3066\\u30fc\\u305f\\u30fc",
3285       "\\u30c6\\u30fc\\u30bf\\u30a1",
3286       "\\u30c6\\u30a7\\u30bf\\u30fc",
3287       "\\u3066\\u3047\\u305f\\u3041",
3288       "\\u3066\\u3048\\u305f\\u30fc",
3289       "\\u3067\\u30fc\\u305f\\u30fc",
3290       "\\u30c7\\u30fc\\u30bf\\u30a1",
3291       "\\u3067\\u30a7\\u305f\\u30a1",
3292       "\\u30c7\\u3047\\u30bf\\u3041",
3293       "\\u30c7\\u30a8\\u30bf\\u30a2",
3294       "\\u3072\\u3086",
3295       "\\u3073\\u3085\\u3042",
3296       "\\u3074\\u3085\\u3042",
3297       "\\u3073\\u3085\\u3042\\u30fc",
3298       "\\u30d3\\u30e5\\u30a2\\u30fc",
3299       "\\u3074\\u3085\\u3042\\u30fc",
3300       "\\u30d4\\u30e5\\u30a2\\u30fc",
3301       "\\u30d2\\u30e5\\u30a6",
3302       "\\u30d2\\u30e6\\u30a6",
3303       "\\u30d4\\u30e5\\u30a6\\u30a2",
3304       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3305       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3306       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3307       "\\u3072\\u3085\\u3093",
3308       "\\u3074\\u3085\\u3093",
3309       "\\u3075\\u30fc\\u308a",
3310       "\\u30d5\\u30fc\\u30ea",
3311       "\\u3075\\u3045\\u308a",
3312       "\\u3075\\u30a5\\u308a",
3313       "\\u3075\\u30a5\\u30ea",
3314       "\\u30d5\\u30a6\\u30ea",
3315       "\\u3076\\u30fc\\u308a",
3316       "\\u30d6\\u30fc\\u30ea",
3317       "\\u3076\\u3045\\u308a",
3318       "\\u30d6\\u30a5\\u308a",
3319       "\\u3077\\u3046\\u308a",
3320       "\\u30d7\\u30a6\\u30ea",
3321       "\\u3075\\u30fc\\u308a\\u30fc",
3322       "\\u30d5\\u30a5\\u30ea\\u30fc",
3323       "\\u3075\\u30a5\\u308a\\u30a3",
3324       "\\u30d5\\u3045\\u308a\\u3043",
3325       "\\u30d5\\u30a6\\u30ea\\u30fc",
3326       "\\u3075\\u3046\\u308a\\u3043",
3327       "\\u30d6\\u30a6\\u30ea\\u30a4",
3328       "\\u3077\\u30fc\\u308a\\u30fc",
3329       "\\u3077\\u30a5\\u308a\\u30a4",
3330       "\\u3077\\u3046\\u308a\\u30fc",
3331       "\\u30d7\\u30a6\\u30ea\\u30a4",
3332       "\\u30d5\\u30fd",
3333       "\\u3075\\u309e",
3334       "\\u3076\\u309d",
3335       "\\u3076\\u3075",
3336       "\\u3076\\u30d5",
3337       "\\u30d6\\u3075",
3338       "\\u30d6\\u30d5",
3339       "\\u3076\\u309e",
3340       "\\u3076\\u3077",
3341       "\\u30d6\\u3077",
3342       "\\u3077\\u309d",
3343       "\\u30d7\\u30fd",
3344       "\\u3077\\u3075",
3345 };
3346 
3347   static const char *test2[] = {
3348     "\\u306f\\u309d", /* H\\u309d */
3349     "\\u30cf\\u30fd", /* K\\u30fd */
3350     "\\u306f\\u306f", /* HH */
3351     "\\u306f\\u30cf", /* HK */
3352     "\\u30cf\\u30cf", /* KK */
3353     "\\u306f\\u309e", /* H\\u309e */
3354     "\\u30cf\\u30fe", /* K\\u30fe */
3355     "\\u306f\\u3070", /* HH\\u309b */
3356     "\\u30cf\\u30d0", /* KK\\u309b */
3357     "\\u306f\\u3071", /* HH\\u309c */
3358     "\\u30cf\\u3071", /* KH\\u309c */
3359     "\\u30cf\\u30d1", /* KK\\u309c */
3360     "\\u3070\\u309d", /* H\\u309b\\u309d */
3361     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3362     "\\u3070\\u306f", /* H\\u309bH */
3363     "\\u30d0\\u30cf", /* K\\u309bK */
3364     "\\u3070\\u309e", /* H\\u309b\\u309e */
3365     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3366     "\\u3070\\u3070", /* H\\u309bH\\u309b */
3367     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3368     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3369     "\\u3070\\u3071", /* H\\u309bH\\u309c */
3370     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3371     "\\u3071\\u309d", /* H\\u309c\\u309d */
3372     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3373     "\\u3071\\u306f", /* H\\u309cH */
3374     "\\u30d1\\u30cf", /* K\\u309cK */
3375     "\\u3071\\u3070", /* H\\u309cH\\u309b */
3376     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3377     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3378     "\\u3071\\u3071", /* H\\u309cH\\u309c */
3379     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3380   };
3381   /*
3382   static const char *test3[] = {
3383     "\\u221er\\u221e",
3384     "\\u221eR#",
3385     "\\u221et\\u221e",
3386     "#r\\u221e",
3387     "#R#",
3388     "#t%",
3389     "#T%",
3390     "8t\\u221e",
3391     "8T\\u221e",
3392     "8t#",
3393     "8T#",
3394     "8t%",
3395     "8T%",
3396     "8t8",
3397     "8T8",
3398     "\\u03c9r\\u221e",
3399     "\\u03a9R%",
3400     "rr\\u221e",
3401     "rR\\u221e",
3402     "Rr\\u221e",
3403     "RR\\u221e",
3404     "RT%",
3405     "rt8",
3406     "tr\\u221e",
3407     "tr8",
3408     "TR8",
3409     "tt8",
3410     "\\u30b7\\u30e3\\u30fc\\u30ec",
3411   };
3412   */
3413   static const UColAttribute att[] = { UCOL_STRENGTH };
3414   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3415 
3416   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3417   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3418 
3419   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3420   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3421   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3422   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3423   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3424 }
3425 
TestStrCollIdenticalPrefix(void)3426 static void TestStrCollIdenticalPrefix(void) {
3427   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3428   const char* test[] = {
3429     "ab\\ud9b0\\udc70",
3430     "ab\\ud9b0\\udc71"
3431   };
3432   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3433 }
3434 /* Contractions should have all their canonically equivalent */
3435 /* strings included */
TestContractionClosure(void)3436 static void TestContractionClosure(void) {
3437   static const struct {
3438     const char *rules;
3439     const char *data[10];
3440     const uint32_t len;
3441   } tests[] = {
3442     {   "&b=\\u00e4\\u00e4",
3443       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3444     {   "&b=\\u00C5",
3445       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3446   };
3447   uint32_t i;
3448 
3449 
3450   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3451     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3452   }
3453 }
3454 
3455 /* This tests also fails*/
TestBeforePrefixFailure(void)3456 static void TestBeforePrefixFailure(void) {
3457   static const struct {
3458     const char *rules;
3459     const char *data[10];
3460     const uint32_t len;
3461   } tests[] = {
3462     { "&g <<< a"
3463       "&[before 3]\\uff41 <<< x",
3464       {"x", "\\uff41"}, 2 },
3465     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3466         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3467         "&[before 3]\\u30a7<<<\\u30a9",
3468       {"\\u30a9", "\\u30a7"}, 2 },
3469     {   "&[before 3]\\u30a7<<<\\u30a9"
3470         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3471         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3472       {"\\u30a9", "\\u30a7"}, 2 },
3473   };
3474   uint32_t i;
3475 
3476 
3477   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3478     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3479   }
3480 
3481 #if 0
3482   const char* rule1 =
3483         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3484         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3485         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3486   const char* rule2 =
3487         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3488         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3489         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3490   const char* test[] = {
3491       "\\u30c6\\u30fc\\u30bf",
3492       "\\u30c6\\u30a7\\u30bf",
3493   };
3494   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3495   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3496 /* this piece of code should be in some sort of verbose mode     */
3497 /* it gets the collation elements for elements and prints them   */
3498 /* This is useful when trying to see whether the problem is      */
3499   {
3500     UErrorCode status = U_ZERO_ERROR;
3501     uint32_t i = 0;
3502     UCollationElements *it = NULL;
3503     uint32_t CE;
3504     UChar string[256];
3505     uint32_t uStringLen;
3506     UCollator *coll = NULL;
3507 
3508     uStringLen = u_unescape(rule1, string, 256);
3509 
3510     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3511 
3512     /*coll = ucol_open("ja_JP_JIS", &status);*/
3513     it = ucol_openElements(coll, string, 0, &status);
3514 
3515     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3516       log_verbose("%s\n", test[i]);
3517       uStringLen = u_unescape(test[i], string, 256);
3518       ucol_setText(it, string, uStringLen, &status);
3519 
3520       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3521         log_verbose("%08X\n", CE);
3522       }
3523       log_verbose("\n");
3524 
3525     }
3526 
3527     ucol_closeElements(it);
3528     ucol_close(coll);
3529   }
3530 #endif
3531 }
3532 
TestPrefixCompose(void)3533 static void TestPrefixCompose(void) {
3534   const char* rule1 =
3535         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3536   /*
3537   const char* test[] = {
3538       "\\u30c6\\u30fc\\u30bf",
3539       "\\u30c6\\u30a7\\u30bf",
3540   };
3541   */
3542   {
3543     UErrorCode status = U_ZERO_ERROR;
3544     /*uint32_t i = 0;*/
3545     /*UCollationElements *it = NULL;*/
3546 /*    uint32_t CE;*/
3547     UChar string[256];
3548     uint32_t uStringLen;
3549     UCollator *coll = NULL;
3550 
3551     uStringLen = u_unescape(rule1, string, 256);
3552 
3553     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3554     ucol_close(coll);
3555   }
3556 
3557 
3558 }
3559 
3560 /*
3561 [last variable] last variable value
3562 [last primary ignorable] largest CE for primary ignorable
3563 [last secondary ignorable] largest CE for secondary ignorable
3564 [last tertiary ignorable] largest CE for tertiary ignorable
3565 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3566 */
3567 
TestRuleOptions(void)3568 static void TestRuleOptions(void) {
3569   /* values here are hardcoded and are correct for the current UCA
3570    * when the UCA changes, one might be forced to change these
3571    * values. (\\u02d0, \\U00010FFFC etc...)
3572    */
3573   static const struct {
3574     const char *rules;
3575     const char *data[10];
3576     const uint32_t len;
3577   } tests[] = {
3578     /* - all befores here amount to zero */
3579     { "&[before 3][first tertiary ignorable]<<<a",
3580         { "\\u0000", "a"}, 2
3581     }, /* you cannot go before first tertiary ignorable */
3582 
3583     { "&[before 3][last tertiary ignorable]<<<a",
3584         { "\\u0000", "a"}, 2
3585     }, /* you cannot go before last tertiary ignorable */
3586 
3587     { "&[before 3][first secondary ignorable]<<<a",
3588         { "\\u0000", "a"}, 2
3589     }, /* you cannot go before first secondary ignorable */
3590 
3591     { "&[before 3][last secondary ignorable]<<<a",
3592         { "\\u0000", "a"}, 2
3593     }, /* you cannot go before first secondary ignorable */
3594 
3595     /* 'normal' befores */
3596 
3597     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3598         {  "c", "b", "\\u0332", "a" }, 4
3599     },
3600 
3601     /* we don't have a code point that corresponds to
3602      * the last primary ignorable
3603      */
3604     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3605         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3606     },
3607 
3608     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3609         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3610     },
3611 
3612     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3613         {  "c", "b", "\\uD834\\uDF71", "a", "\\u02d0" }, 5
3614     },
3615 
3616     { "&[first regular]<a"
3617       "&[before 1][first regular]<b",
3618       { "b", "\\u02d0", "a", "\\u02d1"}, 4
3619     },
3620 
3621     { "&[before 1][last regular]<b"
3622       "&[last regular]<a",
3623         { "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4
3624     },
3625 
3626     { "&[before 1][first implicit]<b"
3627       "&[first implicit]<a",
3628         { "b", "\\u4e00", "a", "\\u4e01"}, 4
3629     },
3630 
3631     { "&[before 1][last implicit]<b"
3632       "&[last implicit]<a",
3633         { "b", "\\U0010FFFD", "a" }, 3
3634     },
3635 
3636     { "&[last variable]<z"
3637       "&[last primary ignorable]<x"
3638       "&[last secondary ignorable]<<y"
3639       "&[last tertiary ignorable]<<<w"
3640       "&[top]<u",
3641       {"\\ufffb",  "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
3642     }
3643 
3644   };
3645   uint32_t i;
3646 
3647 
3648   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3649     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3650   }
3651 }
3652 
3653 
TestOptimize(void)3654 static void TestOptimize(void) {
3655   /* this is not really a test - just trying out
3656    * whether copying of UCA contents will fail
3657    * Cannot really test, since the functionality
3658    * remains the same.
3659    */
3660   static const struct {
3661     const char *rules;
3662     const char *data[10];
3663     const uint32_t len;
3664   } tests[] = {
3665     /* - all befores here amount to zero */
3666     { "[optimize [\\uAC00-\\uD7FF]]",
3667     { "a", "b"}, 2}
3668   };
3669   uint32_t i;
3670 
3671   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3672     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3673   }
3674 }
3675 
3676 /*
3677 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3678 weiv    ucol_strcollIter?
3679 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3680 weiv    these are the input strings?
3681 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3682 weiv    will check - could be a problem with utf-8 iterator
3683 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3684 weiv    hmmm
3685 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3686 weiv    that doesn't sound right
3687 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3688 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3689 cycheng@ca.ibm.c... yes
3690 weiv    and then do the comparison
3691 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3692 weiv    utf-16 strings look like a little endian ones in the example you sent me
3693 weiv    It could be a bug - let me try to test it out
3694 cycheng@ca.ibm.c... ok
3695 cycheng@ca.ibm.c... we can wait till the conf. call
3696 cycheng@ca.ibm.c... next weke
3697 weiv    that would be great
3698 weiv    hmmm
3699 weiv    I might be wrong
3700 weiv    let me play with it some more
3701 cycheng@ca.ibm.c... ok
3702 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3703 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3704 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3705 weiv    ok
3706 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3707 weiv    thanks
3708 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3709 */
3710 #if 0
3711 static void Alexis(void) {
3712   UErrorCode status = U_ZERO_ERROR;
3713   UCollator *coll = ucol_open("", &status);
3714 
3715 
3716   const char utf16be[2][4] = {
3717     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3718     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3719   };
3720 
3721   const char utf8[2][4] = {
3722     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3723     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3724   };
3725 
3726   UCharIterator iterU161, iterU162;
3727   UCharIterator iterU81, iterU82;
3728 
3729   UCollationResult resU16, resU8;
3730 
3731   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3732   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3733 
3734   uiter_setUTF8(&iterU81, utf8[0], 4);
3735   uiter_setUTF8(&iterU82, utf8[1], 4);
3736 
3737   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3738 
3739   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3740   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3741 
3742 
3743   if(resU16 != resU8) {
3744     log_err("different results\n");
3745   }
3746 
3747   ucol_close(coll);
3748 }
3749 #endif
3750 
3751 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)3752 static void Alexis2(void) {
3753   UErrorCode status = U_ZERO_ERROR;
3754   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3755   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3756   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3757   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3758 
3759   UConverter *conv = NULL;
3760 
3761   UCharIterator U16BEItS, U16BEItT;
3762   UCharIterator U8ItS, U8ItT;
3763 
3764   UCollationResult resU16, resU16BE, resU8;
3765 
3766   static const char* const pairs[][2] = {
3767     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3768     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3769     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3770     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3771     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3772     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3773     { "\\u0020", "\\u0020\\u0000"}
3774 /*
3775 5F20 (my result here)
3776 5F204E008E3F
3777 5F20 (your result here)
3778 */
3779   };
3780 
3781   int32_t i = 0;
3782 
3783   UCollator *coll = ucol_open("", &status);
3784   if(status == U_FILE_ACCESS_ERROR) {
3785     log_data_err("Is your data around?\n");
3786     return;
3787   } else if(U_FAILURE(status)) {
3788     log_err("Error opening collator\n");
3789     return;
3790   }
3791   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3792   conv = ucnv_open("UTF16BE", &status);
3793   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3794     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3795     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3796 
3797     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3798 
3799     log_verbose("Result of strcoll is %i\n", resU16);
3800 
3801     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3802     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3803 
3804     /* use the original sizes, as the result from converter is in bytes */
3805     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3806     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3807 
3808     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3809 
3810     log_verbose("Result of U16BE is %i\n", resU16BE);
3811 
3812     if(resU16 != resU16BE) {
3813       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3814     }
3815 
3816     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3817     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3818 
3819     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3820     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3821 
3822     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3823 
3824     if(resU16 != resU8) {
3825       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3826     }
3827 
3828   }
3829 
3830   ucol_close(coll);
3831   ucnv_close(conv);
3832 }
3833 
TestHebrewUCA(void)3834 static void TestHebrewUCA(void) {
3835   UErrorCode status = U_ZERO_ERROR;
3836   static const char *first[] = {
3837     "d790d6b8d79cd795d6bcd7a9",
3838     "d790d79cd79ed7a7d799d799d7a1",
3839     "d790d6b4d79ed795d6bcd7a9",
3840   };
3841 
3842   char utf8String[3][256];
3843   UChar utf16String[3][256];
3844 
3845   int32_t i = 0, j = 0;
3846   int32_t sizeUTF8[3];
3847   int32_t sizeUTF16[3];
3848 
3849   UCollator *coll = ucol_open("", &status);
3850   if (U_FAILURE(status)) {
3851       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3852       return;
3853   }
3854   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3855 
3856   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3857     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3858     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3859     log_verbose("%i: ");
3860     for(j = 0; j < sizeUTF16[i]; j++) {
3861       /*log_verbose("\\u%04X", utf16String[i][j]);*/
3862       log_verbose("%04X", utf16String[i][j]);
3863     }
3864     log_verbose("\n");
3865   }
3866   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3867     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3868       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3869     }
3870   }
3871 
3872   ucol_close(coll);
3873 
3874 }
3875 
TestPartialSortKeyTermination(void)3876 static void TestPartialSortKeyTermination(void) {
3877   static const char* cases[] = {
3878     "\\u1234\\u1234\\udc00",
3879     "\\udc00\\ud800\\ud800"
3880   };
3881 
3882   int32_t i = sizeof(UCollator);
3883 
3884   UErrorCode status = U_ZERO_ERROR;
3885 
3886   UCollator *coll = ucol_open("", &status);
3887 
3888   UCharIterator iter;
3889 
3890   UChar currCase[256];
3891   int32_t length = 0;
3892   int32_t pKeyLen = 0;
3893 
3894   uint8_t key[256];
3895 
3896   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3897     uint32_t state[2] = {0, 0};
3898     length = u_unescape(cases[i], currCase, 256);
3899     uiter_setString(&iter, currCase, length);
3900     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3901 
3902     log_verbose("Done\n");
3903 
3904   }
3905   ucol_close(coll);
3906 }
3907 
TestSettings(void)3908 static void TestSettings(void) {
3909   static const char* cases[] = {
3910     "apple",
3911       "Apple"
3912   };
3913 
3914   static const char* locales[] = {
3915     "",
3916       "en"
3917   };
3918 
3919   UErrorCode status = U_ZERO_ERROR;
3920 
3921   int32_t i = 0, j = 0;
3922 
3923   UChar source[256], target[256];
3924   int32_t sLen = 0, tLen = 0;
3925 
3926   UCollator *collateObject = NULL;
3927   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
3928     collateObject = ucol_open(locales[i], &status);
3929     ucol_setStrength(collateObject, UCOL_PRIMARY);
3930     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
3931     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
3932       sLen = u_unescape(cases[j-1], source, 256);
3933       source[sLen] = 0;
3934       tLen = u_unescape(cases[j], target, 256);
3935       source[tLen] = 0;
3936       doTest(collateObject, source, target, UCOL_EQUAL);
3937     }
3938     ucol_close(collateObject);
3939   }
3940 }
3941 
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)3942 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
3943     UErrorCode status = U_ZERO_ERROR;
3944     int32_t errorNo = 0;
3945     /*const UChar *sourceRules = NULL;*/
3946     /*int32_t sourceRulesLen = 0;*/
3947     UColAttributeValue french = UCOL_OFF;
3948     int32_t cloneSize = 0;
3949 
3950     if(!ucol_equals(source, target)) {
3951         log_err("Same collators, different address not equal\n");
3952         errorNo++;
3953     }
3954     ucol_close(target);
3955     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
3956         /* currently, safeClone is implemented through getRules/openRules
3957         * so it is the same as the test below - I will comment that test out.
3958         */
3959         /* real thing */
3960         target = ucol_safeClone(source, NULL, &cloneSize, &status);
3961         if(U_FAILURE(status)) {
3962             log_err("Error creating clone\n");
3963             errorNo++;
3964             return errorNo;
3965         }
3966         if(!ucol_equals(source, target)) {
3967             log_err("Collator different from it's clone\n");
3968             errorNo++;
3969         }
3970         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
3971         if(french == UCOL_ON) {
3972             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
3973         } else {
3974             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
3975         }
3976         if(U_FAILURE(status)) {
3977             log_err("Error setting attributes\n");
3978             errorNo++;
3979             return errorNo;
3980         }
3981         if(ucol_equals(source, target)) {
3982             log_err("Collators same even when options changed\n");
3983             errorNo++;
3984         }
3985         ucol_close(target);
3986         /* commented out since safeClone uses exactly the same technique */
3987         /*
3988         sourceRules = ucol_getRules(source, &sourceRulesLen);
3989         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3990         if(U_FAILURE(status)) {
3991         log_err("Error instantiating target from rules\n");
3992         errorNo++;
3993         return errorNo;
3994         }
3995         if(!ucol_equals(source, target)) {
3996         log_err("Collator different from collator that was created from the same rules\n");
3997         errorNo++;
3998         }
3999         ucol_close(target);
4000         */
4001     }
4002     return errorNo;
4003 }
4004 
4005 
TestEquals(void)4006 static void TestEquals(void) {
4007     /* ucol_equals is not currently a public API. There is a chance that it will become
4008     * something like this, but currently it is only used by RuleBasedCollator::operator==
4009     */
4010     /* test whether the two collators instantiated from the same locale are equal */
4011     UErrorCode status = U_ZERO_ERROR;
4012     UParseError parseError;
4013     int32_t noOfLoc = uloc_countAvailable();
4014     const char *locName = NULL;
4015     UCollator *source = NULL, *target = NULL;
4016     int32_t i = 0;
4017 
4018     const char* rules[] = {
4019         "&l < lj <<< Lj <<< LJ",
4020         "&n < nj <<< Nj <<< NJ",
4021         "&ae <<< \\u00e4",
4022         "&AE <<< \\u00c4"
4023     };
4024     /*
4025     const char* badRules[] = {
4026     "&l <<< Lj",
4027     "&n < nj <<< nJ <<< NJ",
4028     "&a <<< \\u00e4",
4029     "&AE <<< \\u00c4 <<< x"
4030     };
4031     */
4032 
4033     UChar sourceRules[1024], targetRules[1024];
4034     int32_t sourceRulesSize = 0, targetRulesSize = 0;
4035     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4036 
4037     for(i = 0; i < rulesSize; i++) {
4038         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4039         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4040     }
4041 
4042     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4043     if(status == U_FILE_ACCESS_ERROR) {
4044         log_data_err("Is your data around?\n");
4045         return;
4046     } else if(U_FAILURE(status)) {
4047         log_err("Error opening collator\n");
4048         return;
4049     }
4050     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4051     if(!ucol_equals(source, target)) {
4052         log_err("Equivalent collators not equal!\n");
4053     }
4054     ucol_close(source);
4055     ucol_close(target);
4056 
4057     source = ucol_open("root", &status);
4058     target = ucol_open("root", &status);
4059     log_verbose("Testing root\n");
4060     if(!ucol_equals(source, source)) {
4061         log_err("Same collator not equal\n");
4062     }
4063     if(TestEqualsForCollator(locName, source, target)) {
4064         log_err("Errors for root\n", locName);
4065     }
4066     ucol_close(source);
4067 
4068     for(i = 0; i<noOfLoc; i++) {
4069         status = U_ZERO_ERROR;
4070         locName = uloc_getAvailable(i);
4071         /*if(hasCollationElements(locName)) {*/
4072         log_verbose("Testing equality for locale %s\n", locName);
4073         source = ucol_open(locName, &status);
4074         target = ucol_open(locName, &status);
4075         if (U_FAILURE(status)) {
4076             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
4077             continue;
4078         }
4079         if(TestEqualsForCollator(locName, source, target)) {
4080             log_err("Errors for locale %s\n", locName);
4081         }
4082         ucol_close(source);
4083         /*}*/
4084     }
4085 }
4086 
TestJ2726(void)4087 static void TestJ2726(void) {
4088     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4089     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4090     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4091     UErrorCode status = U_ZERO_ERROR;
4092     UCollator *coll = ucol_open("en", &status);
4093     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4094     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4095     doTest(coll, a, aSpace, UCOL_EQUAL);
4096     doTest(coll, aSpace, a, UCOL_EQUAL);
4097     doTest(coll, a, spaceA, UCOL_EQUAL);
4098     doTest(coll, spaceA, a, UCOL_EQUAL);
4099     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4100     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4101     ucol_close(coll);
4102 }
4103 
NullRule(void)4104 static void NullRule(void) {
4105     UChar r[3] = {0};
4106     UErrorCode status = U_ZERO_ERROR;
4107     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4108     if(U_SUCCESS(status)) {
4109         log_err("This should have been an error!\n");
4110         ucol_close(coll);
4111     } else {
4112         status = U_ZERO_ERROR;
4113     }
4114     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4115     if(U_FAILURE(status)) {
4116         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4117     } else {
4118         ucol_close(coll);
4119     }
4120 }
4121 
4122 /**
4123  * Test for CollationElementIterator previous and next for the whole set of
4124  * unicode characters with normalization on.
4125  */
TestNumericCollation(void)4126 static void TestNumericCollation(void)
4127 {
4128     UErrorCode status = U_ZERO_ERROR;
4129 
4130     const static char *basicTestStrings[]={
4131     "hello1",
4132     "hello2",
4133     "hello2002",
4134     "hello2003",
4135     "hello123456",
4136     "hello1234567",
4137     "hello10000000",
4138     "hello100000000",
4139     "hello1000000000",
4140     "hello10000000000",
4141     };
4142 
4143     const static char *preZeroTestStrings[]={
4144     "avery10000",
4145     "avery010000",
4146     "avery0010000",
4147     "avery00010000",
4148     "avery000010000",
4149     "avery0000010000",
4150     "avery00000010000",
4151     "avery000000010000",
4152     };
4153 
4154     const static char *thirtyTwoBitNumericStrings[]={
4155     "avery42949672960",
4156     "avery42949672961",
4157     "avery42949672962",
4158     "avery429496729610"
4159     };
4160 
4161      const static char *longNumericStrings[]={
4162      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4163         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4164         are treated as multiple collation elements. */
4165     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4166     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4167     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4168     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4169     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4170     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4171     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4172     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4173     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4174     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4175     };
4176 
4177     const static char *supplementaryDigits[] = {
4178       "\\uD835\\uDFCE", /* 0 */
4179       "\\uD835\\uDFCF", /* 1 */
4180       "\\uD835\\uDFD0", /* 2 */
4181       "\\uD835\\uDFD1", /* 3 */
4182       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4183       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4184       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4185       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4186       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4187       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4188     };
4189 
4190     const static char *foreignDigits[] = {
4191       "\\u0661",
4192         "\\u0662",
4193         "\\u0663",
4194       "\\u0661\\u0660",
4195       "\\u0661\\u0662",
4196       "\\u0661\\u0663",
4197       "\\u0662\\u0660",
4198       "\\u0662\\u0662",
4199       "\\u0662\\u0663",
4200       "\\u0663\\u0660",
4201       "\\u0663\\u0662",
4202       "\\u0663\\u0663"
4203     };
4204 
4205     const static char *evenZeroes[] = {
4206       "2000",
4207       "2001",
4208         "2002",
4209         "2003"
4210     };
4211 
4212     UColAttribute att = UCOL_NUMERIC_COLLATION;
4213     UColAttributeValue val = UCOL_ON;
4214 
4215     /* Open our collator. */
4216     UCollator* coll = ucol_open("root", &status);
4217     if (U_FAILURE(status)){
4218         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4219               myErrorName(status));
4220         return;
4221     }
4222     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4223     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4224     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4225     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4226     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4227     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4228 
4229     /* Setting up our collator to do digits. */
4230     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4231     if (U_FAILURE(status)){
4232         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4233               myErrorName(status));
4234         return;
4235     }
4236 
4237     /*
4238        Testing that prepended zeroes still yield the correct collation behavior.
4239        We expect that every element in our strings array will be equal.
4240     */
4241     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4242 
4243     ucol_close(coll);
4244 }
4245 
TestTibetanConformance(void)4246 static void TestTibetanConformance(void)
4247 {
4248     const char* test[] = {
4249         "\\u0FB2\\u0591\\u0F71\\u0061",
4250         "\\u0FB2\\u0F71\\u0061"
4251     };
4252 
4253     UErrorCode status = U_ZERO_ERROR;
4254     UCollator *coll = ucol_open("", &status);
4255     UChar source[100];
4256     UChar target[100];
4257     int result;
4258     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4259     if (U_SUCCESS(status)) {
4260         u_unescape(test[0], source, 100);
4261         u_unescape(test[1], target, 100);
4262         doTest(coll, source, target, UCOL_EQUAL);
4263         result = ucol_strcoll(coll, source, -1,   target, -1);
4264         log_verbose("result %d\n", result);
4265         if (UCOL_EQUAL != result) {
4266             log_err("Tibetan comparison error\n");
4267         }
4268     }
4269     ucol_close(coll);
4270 
4271     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4272 }
4273 
TestPinyinProblem(void)4274 static void TestPinyinProblem(void) {
4275     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4276     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4277 }
4278 
4279 #define TST_UCOL_MAX_INPUT 0x220001
4280 #define topByte 0xFF000000;
4281 #define bottomByte 0xFF;
4282 #define fourBytes 0xFFFFFFFF;
4283 
4284 
showImplicit(UChar32 i)4285 static void showImplicit(UChar32 i) {
4286     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4287         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4288     }
4289 }
4290 
TestImplicitGeneration(void)4291 static void TestImplicitGeneration(void) {
4292     UErrorCode status = U_ZERO_ERROR;
4293     UChar32 last = 0;
4294     UChar32 current;
4295     UChar32 i = 0, j = 0;
4296     UChar32 roundtrip = 0;
4297     UChar32 lastBottom = 0;
4298     UChar32 currentBottom = 0;
4299     UChar32 lastTop = 0;
4300     UChar32 currentTop = 0;
4301 
4302     UCollator *coll = ucol_open("root", &status);
4303     if(U_FAILURE(status)) {
4304         log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4305         return;
4306     }
4307 
4308     uprv_uca_getRawFromImplicit(0xE20303E7);
4309 
4310     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4311         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4312 
4313         /* check that it round-trips AND that all intervening ones are illegal*/
4314         roundtrip = uprv_uca_getRawFromImplicit(current);
4315         if (roundtrip != i) {
4316             log_err("No roundtrip %08X\n", i);
4317         }
4318         if (last != 0) {
4319             for (j = last + 1; j < current; ++j) {
4320                 roundtrip = uprv_uca_getRawFromImplicit(j);
4321                 /* raise an error if it *doesn't* find an error*/
4322                 if (roundtrip != -1) {
4323                     log_err("Fails to recognize illegal %08X\n", j);
4324                 }
4325             }
4326         }
4327         /* now do other consistency checks*/
4328         lastBottom = last & bottomByte;
4329         currentBottom = current & bottomByte;
4330         lastTop = last & topByte;
4331         currentTop = current & topByte;
4332 
4333         /* print out some values for spot-checking*/
4334         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4335             showImplicit(i-3);
4336             showImplicit(i-2);
4337             showImplicit(i-1);
4338             showImplicit(i);
4339             showImplicit(i+1);
4340             showImplicit(i+2);
4341         }
4342         last = current;
4343 
4344         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4345             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4346         }
4347     }
4348     showImplicit(TST_UCOL_MAX_INPUT-2);
4349     showImplicit(TST_UCOL_MAX_INPUT-1);
4350     showImplicit(TST_UCOL_MAX_INPUT);
4351     ucol_close(coll);
4352 }
4353 
4354 /**
4355  * Iterate through the given iterator, checking to see that all the strings
4356  * in the expected array are present.
4357  * @param expected array of strings we expect to see, or NULL
4358  * @param expectedCount number of elements of expected, or 0
4359  */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)4360 static int32_t checkUEnumeration(const char* msg,
4361                                  UEnumeration* iter,
4362                                  const char** expected,
4363                                  int32_t expectedCount) {
4364     UErrorCode ec = U_ZERO_ERROR;
4365     int32_t i = 0, n, j, bit;
4366     int32_t seenMask = 0;
4367 
4368     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4369     n = uenum_count(iter, &ec);
4370     if (!assertSuccess("count", &ec)) return -1;
4371     log_verbose("%s = [", msg);
4372     for (;; ++i) {
4373         const char* s = uenum_next(iter, NULL, &ec);
4374         if (!assertSuccess("snext", &ec) || s == NULL) break;
4375         if (i != 0) log_verbose(",");
4376         log_verbose("%s", s);
4377         /* check expected list */
4378         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4379             if ((seenMask&bit) == 0 &&
4380                 uprv_strcmp(s, expected[j]) == 0) {
4381                 seenMask |= bit;
4382                 break;
4383             }
4384         }
4385     }
4386     log_verbose("] (%d)\n", i);
4387     assertTrue("count verified", i==n);
4388     /* did we see all expected strings? */
4389     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4390         if ((seenMask&bit)!=0) {
4391             log_verbose("Ok: \"%s\" seen\n", expected[j]);
4392         } else {
4393             log_err("FAIL: \"%s\" not seen\n", expected[j]);
4394         }
4395     }
4396     return n;
4397 }
4398 
4399 /**
4400  * Test new API added for separate collation tree.
4401  */
TestSeparateTrees(void)4402 static void TestSeparateTrees(void) {
4403     UErrorCode ec = U_ZERO_ERROR;
4404     UEnumeration *e = NULL;
4405     int32_t n = -1;
4406     UBool isAvailable;
4407     char loc[256];
4408 
4409     static const char* AVAIL[] = { "en", "de" };
4410 
4411     static const char* KW[] = { "collation" };
4412 
4413     static const char* KWVAL[] = { "phonebook", "stroke" };
4414 
4415 #if !UCONFIG_NO_SERVICE
4416     e = ucol_openAvailableLocales(&ec);
4417     if (e != NULL) {
4418         assertSuccess("ucol_openAvailableLocales", &ec);
4419         assertTrue("ucol_openAvailableLocales!=0", e!=0);
4420         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4421         /* Don't need to check n because we check list */
4422         uenum_close(e);
4423     } else {
4424         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4425     }
4426 #endif
4427 
4428     e = ucol_getKeywords(&ec);
4429     if (e != NULL) {
4430         assertSuccess("ucol_getKeywords", &ec);
4431         assertTrue("ucol_getKeywords!=0", e!=0);
4432         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4433         /* Don't need to check n because we check list */
4434         uenum_close(e);
4435     } else {
4436         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4437     }
4438 
4439     e = ucol_getKeywordValues(KW[0], &ec);
4440     if (e != NULL) {
4441         assertSuccess("ucol_getKeywordValues", &ec);
4442         assertTrue("ucol_getKeywordValues!=0", e!=0);
4443         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4444         /* Don't need to check n because we check list */
4445         uenum_close(e);
4446     } else {
4447         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4448     }
4449 
4450     /* Try setting a warning before calling ucol_getKeywordValues */
4451     ec = U_USING_FALLBACK_WARNING;
4452     e = ucol_getKeywordValues(KW[0], &ec);
4453     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4454         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4455         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4456         /* Don't need to check n because we check list */
4457         uenum_close(e);
4458     }
4459 
4460     /*
4461 U_DRAFT int32_t U_EXPORT2
4462 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4463                              const char* locale, UBool* isAvailable,
4464                              UErrorCode* status);
4465 }
4466 */
4467     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr",
4468                                      &isAvailable, &ec);
4469     if (assertSuccess("getFunctionalEquivalent", &ec)) {
4470         assertEquals("getFunctionalEquivalent(fr)", "fr", loc);
4471         assertTrue("getFunctionalEquivalent(fr).isAvailable==TRUE",
4472                    isAvailable == TRUE);
4473     }
4474 
4475     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr_FR",
4476                                      &isAvailable, &ec);
4477     if (assertSuccess("getFunctionalEquivalent", &ec)) {
4478         assertEquals("getFunctionalEquivalent(fr_FR)", "fr", loc);
4479         assertTrue("getFunctionalEquivalent(fr_FR).isAvailable==TRUE",
4480                    isAvailable == TRUE);
4481     }
4482 }
4483 
4484 /* supercedes TestJ784 */
TestBeforePinyin(void)4485 static void TestBeforePinyin(void) {
4486     const static char rules[] = {
4487         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4488         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4489         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4490         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4491         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4492         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4493     };
4494 
4495     const static char *test[] = {
4496         "l\\u0101",
4497         "la",
4498         "l\\u0101n",
4499         "lan ",
4500         "l\\u0113",
4501         "le",
4502         "l\\u0113n",
4503         "len"
4504     };
4505 
4506     const static char *test2[] = {
4507         "x\\u0101",
4508         "x\\u0100",
4509         "X\\u0101",
4510         "X\\u0100",
4511         "x\\u00E1",
4512         "x\\u00C1",
4513         "X\\u00E1",
4514         "X\\u00C1",
4515         "x\\u01CE",
4516         "x\\u01CD",
4517         "X\\u01CE",
4518         "X\\u01CD",
4519         "x\\u00E0",
4520         "x\\u00C0",
4521         "X\\u00E0",
4522         "X\\u00C0",
4523         "xa",
4524         "xA",
4525         "Xa",
4526         "XA",
4527         "x\\u0101x",
4528         "x\\u0100x",
4529         "x\\u00E1x",
4530         "x\\u00C1x",
4531         "x\\u01CEx",
4532         "x\\u01CDx",
4533         "x\\u00E0x",
4534         "x\\u00C0x",
4535         "xax",
4536         "xAx"
4537     };
4538 
4539     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4540     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4541     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4542     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4543 }
4544 
TestBeforeTightening(void)4545 static void TestBeforeTightening(void) {
4546     static const struct {
4547         const char *rules;
4548         UErrorCode expectedStatus;
4549     } tests[] = {
4550         { "&[before 1]a<x", U_ZERO_ERROR },
4551         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4552         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4553         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4554         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4555         { "&[before 2]a<<x",U_ZERO_ERROR },
4556         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4557         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4558         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4559         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4560         { "&[before 3]a<<<x",U_ZERO_ERROR },
4561         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4562         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4563     };
4564 
4565     int32_t i = 0;
4566 
4567     UErrorCode status = U_ZERO_ERROR;
4568     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4569     uint32_t rlen = 0;
4570 
4571     UCollator *coll = NULL;
4572 
4573 
4574     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4575         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4576         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4577         if(status != tests[i].expectedStatus) {
4578             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4579                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4580         }
4581         ucol_close(coll);
4582         status = U_ZERO_ERROR;
4583     }
4584 
4585 }
4586 
4587 #if 0
4588 &m < a
4589 &[before 1] a < x <<< X << q <<< Q < z
4590 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4591 
4592 &m < a
4593 &[before 2] a << x <<< X << q <<< Q < z
4594 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4595 
4596 &m < a
4597 &[before 3] a <<< x <<< X << q <<< Q < z
4598 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4599 
4600 
4601 &m << a
4602 &[before 1] a < x <<< X << q <<< Q < z
4603 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4604 
4605 &m << a
4606 &[before 2] a << x <<< X << q <<< Q < z
4607 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4608 
4609 &m << a
4610 &[before 3] a <<< x <<< X << q <<< Q < z
4611 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4612 
4613 
4614 &m <<< a
4615 &[before 1] a < x <<< X << q <<< Q < z
4616 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4617 
4618 &m <<< a
4619 &[before 2] a << x <<< X << q <<< Q < z
4620 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4621 
4622 &m <<< a
4623 &[before 3] a <<< x <<< X << q <<< Q < z
4624 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4625 
4626 
4627 &[before 1] s < x <<< X << q <<< Q < z
4628 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4629 
4630 &[before 2] s << x <<< X << q <<< Q < z
4631 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4632 
4633 &[before 3] s <<< x <<< X << q <<< Q < z
4634 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4635 
4636 
4637 &[before 1] \u24DC < x <<< X << q <<< Q < z
4638 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4639 
4640 &[before 2] \u24DC << x <<< X << q <<< Q < z
4641 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4642 
4643 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4644 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4645 #endif
4646 
4647 
4648 #if 0
4649 /* requires features not yet supported */
4650 static void TestMoreBefore(void) {
4651     static const struct {
4652         const char* rules;
4653         const char* order[16];
4654         int32_t size;
4655     } tests[] = {
4656         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4657         { "m","M","x","X","q","Q","z","a","n" }, 9},
4658         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4659         { "m","M","x","X","q","Q","a","z","n" }, 9},
4660         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4661         { "m","M","x","X","a","q","Q","z","n" }, 9},
4662         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4663         { "x","X","q","Q","z","m","M","a","n" }, 9},
4664         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4665         { "m","M","x","X","q","Q","a","z","n" }, 9},
4666         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4667         { "m","M","x","X","a","q","Q","z","n" }, 9},
4668         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4669         { "x","X","q","Q","z","n","m","a","M" }, 9},
4670         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4671         { "x","X","q","Q","m","a","M","z","n" }, 9},
4672         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4673         { "m","x","X","a","M","q","Q","z","n" }, 9},
4674         { "&[before 1] s < x <<< X << q <<< Q < z",
4675         { "r","R","x","X","q","Q","z","s","n" }, 9},
4676         { "&[before 2] s << x <<< X << q <<< Q < z",
4677         { "r","R","x","X","q","Q","s","z","n" }, 9},
4678         { "&[before 3] s <<< x <<< X << q <<< Q < z",
4679         { "r","R","x","X","s","q","Q","z","n" }, 9},
4680         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4681         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4682         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4683         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4684         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4685         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4686     };
4687 
4688     int32_t i = 0;
4689 
4690     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4691         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4692     }
4693 }
4694 #endif
4695 
TestTailorNULL(void)4696 static void TestTailorNULL( void ) {
4697     const static char* rule = "&a <<< '\\u0000'";
4698     UErrorCode status = U_ZERO_ERROR;
4699     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4700     uint32_t rlen = 0;
4701     UChar a = 1, null = 0;
4702     UCollationResult res = UCOL_EQUAL;
4703 
4704     UCollator *coll = NULL;
4705 
4706 
4707     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4708     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4709 
4710     if(U_FAILURE(status)) {
4711         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4712     } else {
4713         res = ucol_strcoll(coll, &a, 1, &null, 1);
4714 
4715         if(res != UCOL_LESS) {
4716             log_err("NULL was not tailored properly!\n");
4717         }
4718     }
4719 
4720     ucol_close(coll);
4721 }
4722 
4723 static void
TestThaiSortKey(void)4724 TestThaiSortKey(void)
4725 {
4726   UChar yamakan = 0x0E4E;
4727   UErrorCode status = U_ZERO_ERROR;
4728   uint8_t key[256];
4729   int32_t keyLen = 0;
4730   /* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */
4731   /* since it stays in the same relative position. This should be addressed in CLDR */
4732   /* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */
4733   /* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */
4734   /* UCA 5.0 uint8_t expectedKey[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 }; */
4735   /* UCA 5.1 moves Yammakan */
4736   uint8_t expectedKey[256] = { 0x01, 0xe0, 0x4e, 0x01, 0x05, 0x00 };
4737   UCollator *coll = ucol_open("th", &status);
4738   if(U_FAILURE(status)) {
4739     log_err_status(status, "Could not open a collator, exiting (%s)\n", u_errorName(status));
4740     return;
4741   }
4742 
4743   keyLen = ucol_getSortKey(coll, &yamakan, 1, key, 256);
4744   if(strcmp((char *)key, (char *)expectedKey)) {
4745     log_err("Yammakan key is different from ICU 4.0!\n");
4746   }
4747 
4748   ucol_close(coll);
4749 }
4750 
4751 static void
TestUpperFirstQuaternary(void)4752 TestUpperFirstQuaternary(void)
4753 {
4754   const char* tests[] = { "B", "b", "Bb", "bB" };
4755   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4756   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4757   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4758 }
4759 
4760 static void
TestJ4960(void)4761 TestJ4960(void)
4762 {
4763   const char* tests[] = { "\\u00e2T", "aT" };
4764   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4765   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4766   const char* tests2[] = { "a", "A" };
4767   const char* rule = "&[first tertiary ignorable]=A=a";
4768   UColAttribute att2[] = { UCOL_CASE_LEVEL };
4769   UColAttributeValue attVals2[] = { UCOL_ON };
4770   /* Test whether we correctly ignore primary ignorables on case level when */
4771   /* we have only primary & case level */
4772   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4773   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4774   /* and case level */
4775   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4776   /* Test whether completely ignorable letters have case level info (they shouldn't) */
4777   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4778 }
4779 
4780 static void
TestJ5223(void)4781 TestJ5223(void)
4782 {
4783   static const char *test = "this is a test string";
4784   UChar ustr[256];
4785   int32_t ustr_length = u_unescape(test, ustr, 256);
4786   unsigned char sortkey[256];
4787   int32_t sortkey_length;
4788   UErrorCode status = U_ZERO_ERROR;
4789   static UCollator *coll = NULL;
4790   coll = ucol_open("root", &status);
4791   if(U_FAILURE(status)) {
4792     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4793     return;
4794   }
4795   ucol_setStrength(coll, UCOL_PRIMARY);
4796   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4797   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4798   if (U_FAILURE(status)) {
4799     log_err("Failed setting atributes\n");
4800     return;
4801   }
4802   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4803   if (sortkey_length > 256) return;
4804 
4805   /* we mark the position where the null byte should be written in advance */
4806   sortkey[sortkey_length-1] = 0xAA;
4807 
4808   /* we set the buffer size one byte higher than needed */
4809   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4810     sortkey_length+1);
4811 
4812   /* no error occurs (for me) */
4813   if (sortkey[sortkey_length-1] == 0xAA) {
4814     log_err("Hit bug at first try\n");
4815   }
4816 
4817   /* we mark the position where the null byte should be written again */
4818   sortkey[sortkey_length-1] = 0xAA;
4819 
4820   /* this time we set the buffer size to the exact amount needed */
4821   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4822     sortkey_length);
4823 
4824   /* now the trailing null byte is not written */
4825   if (sortkey[sortkey_length-1] == 0xAA) {
4826     log_err("Hit bug at second try\n");
4827   }
4828 
4829   ucol_close(coll);
4830 }
4831 
4832 /* Regression test for Thai partial sort key problem */
4833 static void
TestJ5232(void)4834 TestJ5232(void)
4835 {
4836     const static char *test[] = {
4837         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4838         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4839     };
4840 
4841     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4842 }
4843 
4844 static void
TestJ5367(void)4845 TestJ5367(void)
4846 {
4847     const static char *test[] = { "a", "y" };
4848     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4849     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4850 }
4851 
4852 static void
TestVI5913(void)4853 TestVI5913(void)
4854 {
4855     UErrorCode status = U_ZERO_ERROR;
4856     int32_t i, j;
4857     UCollator *coll =NULL;
4858     uint8_t  resColl[100], expColl[100];
4859     int32_t  rLen, tLen, ruleLen, sLen, kLen;
4860     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
4861     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
4862     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
4863     static const UChar tData[][20]={
4864         {0x1EAC, 0},
4865         {0x0041, 0x0323, 0x0302, 0},
4866         {0x1EA0, 0x0302, 0},
4867         {0x00C2, 0x0323, 0},
4868         {0x1ED8, 0},  /* O with dot and circumflex */
4869         {0x1ECC, 0x0302, 0},
4870         {0x1EB7, 0},
4871         {0x1EA1, 0x0306, 0},
4872     };
4873     static const UChar tailorData[][20]={
4874         {0x1FA2, 0},  /* Omega with 3 combining marks */
4875         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4876         {0x1FF3, 0x0313, 0x0300, 0},
4877         {0x1F60, 0x0300, 0x0345, 0},
4878         {0x1F62, 0x0345, 0},
4879         {0x1FA0, 0x0300, 0},
4880     };
4881     static const UChar tailorData2[][20]={
4882         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
4883         {0x0073, 0x0323, 0x030C, 0},
4884         {0x0073, 0x030C, 0x0323, 0},
4885     };
4886     static const UChar tailorData3[][20]={
4887         {0x007a, 0},  /*  z */
4888         {0x0061, 0x0065, 0},  /*  a + e */
4889         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4890         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
4891         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4892         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
4893         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
4894         {0x00EA, 0},  /* e with circumflex  */
4895     };
4896 
4897     /* Test Vietnamese sort. */
4898     coll = ucol_open("vi", &status);
4899     if(U_FAILURE(status)) {
4900         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4901         return;
4902     }
4903     log_verbose("\n\nVI collation:");
4904     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4905         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4906     }
4907     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4908         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4909     }
4910     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4911         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4912     }
4913     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4914         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4915     }
4916 
4917     for (j=0; j<8; j++) {
4918         tLen = u_strlen(tData[j]);
4919         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4920         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4921         for(i = 0; i<rLen; i++) {
4922             log_verbose(" %02X", resColl[i]);
4923         }
4924     }
4925 
4926     ucol_close(coll);
4927 
4928     /* Test Romanian sort. */
4929     coll = ucol_open("ro", &status);
4930     log_verbose("\n\nRO collation:");
4931     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4932         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4933     }
4934     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4935         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4936     }
4937     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
4938         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4939     }
4940 
4941     for (j=4; j<8; j++) {
4942         tLen = u_strlen(tData[j]);
4943         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4944         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4945         for(i = 0; i<rLen; i++) {
4946             log_verbose(" %02X", resColl[i]);
4947         }
4948     }
4949     ucol_close(coll);
4950 
4951     /* Test the precomposed Greek character with 3 combining marks. */
4952     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
4953     ruleLen = u_strlen(rule);
4954     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4955     if (U_FAILURE(status)) {
4956         log_err("ucol_openRules failed with %s\n", u_errorName(status));
4957         return;
4958     }
4959     sLen = u_strlen(tailorData[0]);
4960     for (j=1; j<6; j++) {
4961         tLen = u_strlen(tailorData[j]);
4962         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
4963             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
4964         }
4965     }
4966     /* Test getSortKey. */
4967     tLen = u_strlen(tailorData[0]);
4968     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
4969     for (j=0; j<6; j++) {
4970         tLen = u_strlen(tailorData[j]);
4971         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
4972         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4973             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
4974             for(i = 0; i<rLen; i++) {
4975                 log_err(" %02X", resColl[i]);
4976             }
4977         }
4978     }
4979     ucol_close(coll);
4980 
4981     log_verbose("\n\nTailoring test for s with caron:");
4982     ruleLen = u_strlen(rule2);
4983     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4984     tLen = u_strlen(tailorData2[0]);
4985     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
4986     for (j=1; j<3; j++) {
4987         tLen = u_strlen(tailorData2[j]);
4988         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
4989         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4990             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
4991             for(i = 0; i<rLen; i++) {
4992                 log_err(" %02X", resColl[i]);
4993             }
4994         }
4995     }
4996     ucol_close(coll);
4997 
4998     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
4999     ruleLen = u_strlen(rule3);
5000     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5001     tLen = u_strlen(tailorData3[3]);
5002     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5003     for (j=4; j<6; j++) {
5004         tLen = u_strlen(tailorData3[j]);
5005         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5006 
5007         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5008             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5009             for(i = 0; i<rLen; i++) {
5010                 log_err(" %02X", resColl[i]);
5011             }
5012         }
5013 
5014         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5015          for(i = 0; i<rLen; i++) {
5016              log_verbose(" %02X", resColl[i]);
5017          }
5018     }
5019     ucol_close(coll);
5020 }
5021 
5022 static void
TestTailor6179(void)5023 TestTailor6179(void)
5024 {
5025     UErrorCode status = U_ZERO_ERROR;
5026     int32_t i;
5027     UCollator *coll =NULL;
5028     uint8_t  resColl[100];
5029     int32_t  rLen, tLen, ruleLen;
5030     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
5031     UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5032             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5033             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5034             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5035     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5036     UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5037             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5038             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5039             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5040             0x3C,0x3C,0x20,0x62,0};
5041 
5042     UChar tData1[][20]={
5043         {0x61, 0},
5044         {0x62, 0},
5045         { 0xFDD0,0x009E, 0}
5046     };
5047     UChar tData2[][20]={
5048             {0x61, 0},
5049             {0x62, 0},
5050             { 0xFDD0,0x009E, 0}
5051      };
5052 
5053     /* UCA5.1, the value may increase in later version. */
5054     uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};
5055     uint8_t lastPrimaryIgnCE[6]={1, 0xE7, 0xB9, 1, 5, 0};
5056     uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5057     uint8_t lastSecondaryIgnCE[6]={1, 1, 0x05, 0};
5058 
5059     /* Test [Last Primary ignorable] */
5060 
5061     log_verbose("\n\nTailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b ");
5062     ruleLen = u_strlen(rule1);
5063     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5064     if (U_FAILURE(status)) {
5065         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5066         return;
5067     }
5068     tLen = u_strlen(tData1[0]);
5069     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5070     if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) {
5071         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
5072         for(i = 0; i<rLen; i++) {
5073             log_err(" %02X", resColl[i]);
5074         }
5075     }
5076     tLen = u_strlen(tData1[1]);
5077     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5078     if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) {
5079         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
5080         for(i = 0; i<rLen; i++) {
5081             log_err(" %02X", resColl[i]);
5082         }
5083     }
5084     ucol_close(coll);
5085 
5086 
5087     /* Test [Last Secondary ignorable] */
5088     log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b ");
5089     ruleLen = u_strlen(rule1);
5090     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5091     if (U_FAILURE(status)) {
5092         log_err("Tailoring test: &[last primary ignorable] failed!");
5093         return;
5094     }
5095     tLen = u_strlen(tData2[0]);
5096     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5097     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5098     for(i = 0; i<rLen; i++) {
5099         log_verbose(" %02X", resColl[i]);
5100     }
5101     if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) {
5102         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5103         for(i = 0; i<rLen; i++) {
5104             log_err(" %02X", resColl[i]);
5105         }
5106     }
5107     tLen = u_strlen(tData2[1]);
5108     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5109     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5110     for(i = 0; i<rLen; i++) {
5111         log_verbose(" %02X", resColl[i]);
5112     }
5113     if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) {
5114         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5115         for(i = 0; i<rLen; i++) {
5116             log_err(" %02X", resColl[i]);
5117         }
5118     }
5119     ucol_close(coll);
5120 }
5121 
5122 static void
TestUCAPrecontext(void)5123 TestUCAPrecontext(void)
5124 {
5125     UErrorCode status = U_ZERO_ERROR;
5126     int32_t i, j;
5127     UCollator *coll =NULL;
5128     uint8_t  resColl[100], prevColl[100];
5129     int32_t  rLen, tLen, ruleLen;
5130     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5131     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5132     /* & l middle-dot << a  a is an expansion. */
5133 
5134     UChar tData1[][20]={
5135             { 0xb7, 0},  /* standalone middle dot(0xb7) */
5136             { 0x387, 0}, /* standalone middle dot(0x387) */
5137             { 0x61, 0},  /* a */
5138             { 0x6C, 0},  /* l */
5139             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
5140             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
5141             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5142             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
5143             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5144             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
5145             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
5146      };
5147 
5148     log_verbose("\n\nEN collation:");
5149     coll = ucol_open("en", &status);
5150     if (U_FAILURE(status)) {
5151         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5152         return;
5153     }
5154     for (j=0; j<11; j++) {
5155         tLen = u_strlen(tData1[j]);
5156         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5157         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5158             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5159                     j, tData1[j]);
5160         }
5161         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5162         for(i = 0; i<rLen; i++) {
5163             log_verbose(" %02X", resColl[i]);
5164         }
5165         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5166      }
5167      ucol_close(coll);
5168 
5169 
5170      log_verbose("\n\nJA collation:");
5171      coll = ucol_open("ja", &status);
5172      if (U_FAILURE(status)) {
5173          log_err("Tailoring test: &z <<a|- failed!");
5174          return;
5175      }
5176      for (j=0; j<11; j++) {
5177          tLen = u_strlen(tData1[j]);
5178          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5179          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5180              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5181                      j, tData1[j]);
5182          }
5183          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5184          for(i = 0; i<rLen; i++) {
5185              log_verbose(" %02X", resColl[i]);
5186          }
5187          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5188       }
5189       ucol_close(coll);
5190 
5191 
5192       log_verbose("\n\nTailoring test: & middle dot < a ");
5193       ruleLen = u_strlen(rule1);
5194       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5195       if (U_FAILURE(status)) {
5196           log_err("Tailoring test: & middle dot < a failed!");
5197           return;
5198       }
5199       for (j=0; j<11; j++) {
5200           tLen = u_strlen(tData1[j]);
5201           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5202           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5203               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5204                       j, tData1[j]);
5205           }
5206           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5207           for(i = 0; i<rLen; i++) {
5208               log_verbose(" %02X", resColl[i]);
5209           }
5210           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5211        }
5212        ucol_close(coll);
5213 
5214 
5215        log_verbose("\n\nTailoring test: & l middle-dot << a ");
5216        ruleLen = u_strlen(rule2);
5217        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5218        if (U_FAILURE(status)) {
5219            log_err("Tailoring test: & l middle-dot << a failed!");
5220            return;
5221        }
5222        for (j=0; j<11; j++) {
5223            tLen = u_strlen(tData1[j]);
5224            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5225            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5226                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5227                        j, tData1[j]);
5228            }
5229            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5230                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5231                        j, tData1[j]);
5232            }
5233            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5234            for(i = 0; i<rLen; i++) {
5235                log_verbose(" %02X", resColl[i]);
5236            }
5237            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5238         }
5239         ucol_close(coll);
5240 }
5241 
5242 static void
TestOutOfBuffer5468(void)5243 TestOutOfBuffer5468(void)
5244 {
5245     static const char *test = "\\u4e00";
5246     UChar ustr[256];
5247     int32_t ustr_length = u_unescape(test, ustr, 256);
5248     unsigned char shortKeyBuf[1];
5249     int32_t sortkey_length;
5250     UErrorCode status = U_ZERO_ERROR;
5251     static UCollator *coll = NULL;
5252 
5253     coll = ucol_open("root", &status);
5254     if(U_FAILURE(status)) {
5255       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5256       return;
5257     }
5258     ucol_setStrength(coll, UCOL_PRIMARY);
5259     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5260     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5261     if (U_FAILURE(status)) {
5262       log_err("Failed setting atributes\n");
5263       return;
5264     }
5265 
5266     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5267     if (sortkey_length != 4) {
5268         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
5269     }
5270     log_verbose("length of sortKey is %d", sortkey_length);
5271     ucol_close(coll);
5272 }
5273 
5274 #define TSKC_DATA_SIZE 5
5275 #define TSKC_BUF_SIZE  50
5276 static void
TestSortKeyConsistency(void)5277 TestSortKeyConsistency(void)
5278 {
5279     UErrorCode icuRC = U_ZERO_ERROR;
5280     UCollator* ucol;
5281     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5282 
5283     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5284     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5285     int32_t i, j, i2;
5286 
5287     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5288     if (U_FAILURE(icuRC))
5289     {
5290         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5291         return;
5292     }
5293 
5294     for (i = 0; i < TSKC_DATA_SIZE; i++)
5295     {
5296         UCharIterator uiter;
5297         uint32_t state[2] = { 0, 0 };
5298         int32_t dataLen = i+1;
5299         for (j=0; j<TSKC_BUF_SIZE; j++)
5300             bufFull[i][j] = bufPart[i][j] = 0;
5301 
5302         /* Full sort key */
5303         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5304 
5305         /* Partial sort key */
5306         uiter_setString(&uiter, data, dataLen);
5307         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5308         if (U_FAILURE(icuRC))
5309         {
5310             log_err("ucol_nextSortKeyPart failed\n");
5311             ucol_close(ucol);
5312             return;
5313         }
5314 
5315         for (i2=0; i2<i; i2++)
5316         {
5317             UBool fullMatch = TRUE;
5318             UBool partMatch = TRUE;
5319             for (j=0; j<TSKC_BUF_SIZE; j++)
5320             {
5321                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5322                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5323             }
5324             if (fullMatch != partMatch) {
5325                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5326                                   : "partial key was consistent, but full key changed\n");
5327                 ucol_close(ucol);
5328                 return;
5329             }
5330         }
5331     }
5332 
5333     /*=============================================*/
5334    ucol_close(ucol);
5335 }
5336 
5337 /* ticket: 6101 */
TestCroatianSortKey(void)5338 static void TestCroatianSortKey(void) {
5339     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5340     UErrorCode status = U_ZERO_ERROR;
5341     UCollator *ucol;
5342     UCharIterator iter;
5343 
5344     static const UChar text[] = { 0x0044, 0xD81A };
5345 
5346     size_t length = sizeof(text)/sizeof(*text);
5347 
5348     uint8_t textSortKey[32];
5349     size_t lenSortKey = 32;
5350     size_t actualSortKeyLen;
5351     uint32_t uStateInfo[2] = { 0, 0 };
5352 
5353     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5354     if (U_FAILURE(status)) {
5355         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5356         return;
5357     }
5358 
5359     uiter_setString(&iter, text, length);
5360 
5361     actualSortKeyLen = ucol_nextSortKeyPart(
5362         ucol, &iter, (uint32_t*)uStateInfo,
5363         textSortKey, lenSortKey, &status
5364         );
5365 
5366     if (actualSortKeyLen == lenSortKey) {
5367         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5368     }
5369 
5370     ucol_close(ucol);
5371 }
5372 
5373 /* ticket: 6140 */
5374 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5375  * they are both Hiragana and Katakana
5376  */
5377 #define SORTKEYLEN 50
TestHiragana(void)5378 static void TestHiragana(void) {
5379     UErrorCode status = U_ZERO_ERROR;
5380     UCollator* ucol;
5381     UCollationResult strcollresult;
5382     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5383     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5384     int32_t data1Len = sizeof(data1)/sizeof(*data1);
5385     int32_t data2Len = sizeof(data2)/sizeof(*data2);
5386     int32_t i, j;
5387     uint8_t sortKey1[SORTKEYLEN];
5388     uint8_t sortKey2[SORTKEYLEN];
5389 
5390     UCharIterator uiter1;
5391     UCharIterator uiter2;
5392     uint32_t state1[2] = { 0, 0 };
5393     uint32_t state2[2] = { 0, 0 };
5394     int32_t keySize1;
5395     int32_t keySize2;
5396 
5397     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5398             &status);
5399     if (U_FAILURE(status)) {
5400         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5401         return;
5402     }
5403 
5404     /* Start of full sort keys */
5405     /* Full sort key1 */
5406     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5407     /* Full sort key2 */
5408     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5409     if (keySize1 == keySize2) {
5410         for (i = 0; i < keySize1; i++) {
5411             if (sortKey1[i] != sortKey2[i]) {
5412                 log_err("Full sort keys are different. Should be equal.");
5413             }
5414         }
5415     } else {
5416         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5417     }
5418     /* End of full sort keys */
5419 
5420     /* Start of partial sort keys */
5421     /* Partial sort key1 */
5422     uiter_setString(&uiter1, data1, data1Len);
5423     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5424     /* Partial sort key2 */
5425     uiter_setString(&uiter2, data2, data2Len);
5426     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5427     if (U_SUCCESS(status) && keySize1 == keySize2) {
5428         for (j = 0; j < keySize1; j++) {
5429             if (sortKey1[j] != sortKey2[j]) {
5430                 log_err("Partial sort keys are different. Should be equal");
5431             }
5432         }
5433     } else {
5434         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5435     }
5436     /* End of partial sort keys */
5437 
5438     /* Start of strcoll */
5439     /* Use ucol_strcoll() to determine ordering */
5440     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5441     if (strcollresult != UCOL_EQUAL) {
5442         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5443     }
5444 
5445     ucol_close(ucol);
5446 }
5447 
5448 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5449 
addMiscCollTest(TestNode ** root)5450 void addMiscCollTest(TestNode** root)
5451 {
5452     TEST(TestRuleOptions);
5453     TEST(TestBeforePrefixFailure);
5454     TEST(TestContractionClosure);
5455     TEST(TestPrefixCompose);
5456     TEST(TestStrCollIdenticalPrefix);
5457     TEST(TestPrefix);
5458     TEST(TestNewJapanese);
5459     /*TEST(TestLimitations);*/
5460     TEST(TestNonChars);
5461     TEST(TestExtremeCompression);
5462     TEST(TestSurrogates);
5463     TEST(TestVariableTopSetting);
5464     TEST(TestBocsuCoverage);
5465     TEST(TestCyrillicTailoring);
5466     TEST(TestCase);
5467     TEST(IncompleteCntTest);
5468     TEST(BlackBirdTest);
5469     TEST(FunkyATest);
5470     TEST(BillFairmanTest);
5471     TEST(RamsRulesTest);
5472     TEST(IsTailoredTest);
5473     TEST(TestCollations);
5474     TEST(TestChMove);
5475     TEST(TestImplicitTailoring);
5476     TEST(TestFCDProblem);
5477     TEST(TestEmptyRule);
5478     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5479     TEST(TestJ815);
5480     /*TEST(TestJ831);*/ /* we changed lv locale */
5481     TEST(TestBefore);
5482     TEST(TestRedundantRules);
5483     TEST(TestExpansionSyntax);
5484     TEST(TestHangulTailoring);
5485     TEST(TestUCARules);
5486     TEST(TestIncrementalNormalize);
5487     TEST(TestComposeDecompose);
5488     TEST(TestCompressOverlap);
5489     TEST(TestContraction);
5490     TEST(TestExpansion);
5491     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5492     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5493     TEST(TestOptimize);
5494     TEST(TestSuppressContractions);
5495     TEST(Alexis2);
5496     TEST(TestHebrewUCA);
5497     TEST(TestPartialSortKeyTermination);
5498     TEST(TestSettings);
5499     TEST(TestEquals);
5500     TEST(TestJ2726);
5501     TEST(NullRule);
5502     TEST(TestNumericCollation);
5503     TEST(TestTibetanConformance);
5504     TEST(TestPinyinProblem);
5505     TEST(TestImplicitGeneration);
5506     TEST(TestSeparateTrees);
5507     TEST(TestBeforePinyin);
5508     TEST(TestBeforeTightening);
5509     /*TEST(TestMoreBefore);*/
5510     TEST(TestTailorNULL);
5511     TEST(TestThaiSortKey);
5512     TEST(TestUpperFirstQuaternary);
5513     TEST(TestJ4960);
5514     TEST(TestJ5223);
5515     TEST(TestJ5232);
5516     TEST(TestJ5367);
5517     TEST(TestHiragana);
5518     TEST(TestSortKeyConsistency);
5519     TEST(TestVI5913);  /* VI, RO tailored rules */
5520     TEST(TestCroatianSortKey);
5521     TEST(TestTailor6179);
5522     TEST(TestUCAPrecontext);
5523     TEST(TestOutOfBuffer5468);
5524 }
5525 
5526 #endif /* #if !UCONFIG_NO_COLLATION */
5527