• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 2001-2007, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /*******************************************************************************
7 *
8 * File cmsccoll.C
9 *
10 *******************************************************************************/
11 /**
12  * These are the tests specific to ICU 1.8 and above, that I didn't know where
13  * to fit.
14  */
15 
16 #include <stdio.h>
17 
18 #include "unicode/utypes.h"
19 
20 #if !UCONFIG_NO_COLLATION
21 
22 #include "unicode/ucol.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/uloc.h"
25 #include "cintltst.h"
26 #include "ccolltst.h"
27 #include "callcoll.h"
28 #include "unicode/ustring.h"
29 #include "string.h"
30 #include "ucol_imp.h"
31 #include "ucol_tok.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "uassert.h"
35 #include "unicode/parseerr.h"
36 #include "unicode/ucnv.h"
37 #include "uparse.h"
38 
39 #define LEN(a) (sizeof(a)/sizeof(a[0]))
40 
41 #define MAX_TOKEN_LEN 16
42 
43 typedef UCollationResult tst_strcoll(void *collator, const int object,
44                         const UChar *source, const int sLen,
45                         const UChar *target, const int tLen);
46 
47 
48 
49 const static char cnt1[][10] = {
50 
51   "AA",
52   "AC",
53   "AZ",
54   "AQ",
55   "AB",
56   "ABZ",
57   "ABQ",
58   "Z",
59   "ABC",
60   "Q",
61   "B"
62 };
63 
64 const static char cnt2[][10] = {
65   "DA",
66   "DAD",
67   "DAZ",
68   "MAR",
69   "Z",
70   "DAVIS",
71   "MARK",
72   "DAV",
73   "DAVI"
74 };
75 
IncompleteCntTest(void)76 static void IncompleteCntTest(void)
77 {
78   UErrorCode status = U_ZERO_ERROR;
79   UChar temp[90];
80   UChar t1[90];
81   UChar t2[90];
82 
83   UCollator *coll =  NULL;
84   uint32_t i = 0, j = 0;
85   uint32_t size = 0;
86 
87   u_uastrcpy(temp, " & Z < ABC < Q < B");
88 
89   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
90 
91   if(U_SUCCESS(status)) {
92     size = sizeof(cnt1)/sizeof(cnt1[0]);
93     for(i = 0; i < size-1; i++) {
94       for(j = i+1; j < size; j++) {
95         UCollationElements *iter;
96         u_uastrcpy(t1, cnt1[i]);
97         u_uastrcpy(t2, cnt1[j]);
98         doTest(coll, t1, t2, UCOL_LESS);
99         /* synwee : added collation element iterator test */
100         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
101         if (U_FAILURE(status)) {
102           log_err("Creation of iterator failed\n");
103           break;
104         }
105         backAndForth(iter);
106         ucol_closeElements(iter);
107       }
108     }
109   }
110 
111   ucol_close(coll);
112 
113 
114   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
115   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
116 
117   if(U_SUCCESS(status)) {
118     size = sizeof(cnt2)/sizeof(cnt2[0]);
119     for(i = 0; i < size-1; i++) {
120       for(j = i+1; j < size; j++) {
121         UCollationElements *iter;
122         u_uastrcpy(t1, cnt2[i]);
123         u_uastrcpy(t2, cnt2[j]);
124         doTest(coll, t1, t2, UCOL_LESS);
125 
126         /* synwee : added collation element iterator test */
127         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
128         if (U_FAILURE(status)) {
129           log_err("Creation of iterator failed\n");
130           break;
131         }
132         backAndForth(iter);
133         ucol_closeElements(iter);
134       }
135     }
136   }
137 
138   ucol_close(coll);
139 
140 
141 }
142 
143 const static char shifted[][20] = {
144   "black bird",
145   "black-bird",
146   "blackbird",
147   "black Bird",
148   "black-Bird",
149   "blackBird",
150   "black birds",
151   "black-birds",
152   "blackbirds"
153 };
154 
155 const static UCollationResult shiftedTert[] = {
156   UCOL_EQUAL,
157   UCOL_EQUAL,
158   UCOL_EQUAL,
159   UCOL_LESS,
160   UCOL_EQUAL,
161   UCOL_EQUAL,
162   UCOL_LESS,
163   UCOL_EQUAL,
164   UCOL_EQUAL
165 };
166 
167 const static char nonignorable[][20] = {
168   "black bird",
169   "black Bird",
170   "black birds",
171   "black-bird",
172   "black-Bird",
173   "black-birds",
174   "blackbird",
175   "blackBird",
176   "blackbirds"
177 };
178 
BlackBirdTest(void)179 static void BlackBirdTest(void) {
180   UErrorCode status = U_ZERO_ERROR;
181   UChar t1[90];
182   UChar t2[90];
183 
184   uint32_t i = 0, j = 0;
185   uint32_t size = 0;
186   UCollator *coll = ucol_open("en_US", &status);
187 
188   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
189   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
190 
191   if(U_SUCCESS(status)) {
192     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
193     for(i = 0; i < size-1; i++) {
194       for(j = i+1; j < size; j++) {
195         u_uastrcpy(t1, nonignorable[i]);
196         u_uastrcpy(t2, nonignorable[j]);
197         doTest(coll, t1, t2, UCOL_LESS);
198       }
199     }
200   }
201 
202   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
203   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
204 
205   if(U_SUCCESS(status)) {
206     size = sizeof(shifted)/sizeof(shifted[0]);
207     for(i = 0; i < size-1; i++) {
208       for(j = i+1; j < size; j++) {
209         u_uastrcpy(t1, shifted[i]);
210         u_uastrcpy(t2, shifted[j]);
211         doTest(coll, t1, t2, UCOL_LESS);
212       }
213     }
214   }
215 
216   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
217   if(U_SUCCESS(status)) {
218     size = sizeof(shifted)/sizeof(shifted[0]);
219     for(i = 1; i < size; i++) {
220       u_uastrcpy(t1, shifted[i-1]);
221       u_uastrcpy(t2, shifted[i]);
222       doTest(coll, t1, t2, shiftedTert[i]);
223     }
224   }
225 
226   ucol_close(coll);
227 }
228 
229 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
230     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
231     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
232     {0x0041/*'A'*/, 0x0300, 0x0000},
233     {0x00C0, 0x0301, 0x0000},
234     /* this would work with forced normalization */
235     {0x00C0, 0x0316, 0x0000}
236 };
237 
238 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
239     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
240     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
241     {0x00C0, 0},
242     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
243     /* this would work with forced normalization */
244     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
245 };
246 
247 const static UCollationResult results[] = {
248     UCOL_GREATER,
249     UCOL_EQUAL,
250     UCOL_EQUAL,
251     UCOL_GREATER,
252     UCOL_EQUAL
253 };
254 
FunkyATest(void)255 static void FunkyATest(void)
256 {
257 
258     int32_t i;
259     UErrorCode status = U_ZERO_ERROR;
260     UCollator  *myCollation;
261     myCollation = ucol_open("en_US", &status);
262     if(U_FAILURE(status)){
263         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
264         return;
265     }
266     log_verbose("Testing some A letters, for some reason\n");
267     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
268     ucol_setStrength(myCollation, UCOL_TERTIARY);
269     for (i = 0; i < 4 ; i++)
270     {
271         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
272     }
273     ucol_close(myCollation);
274 }
275 
276 UColAttributeValue caseFirst[] = {
277     UCOL_OFF,
278     UCOL_LOWER_FIRST,
279     UCOL_UPPER_FIRST
280 };
281 
282 
283 UColAttributeValue alternateHandling[] = {
284     UCOL_NON_IGNORABLE,
285     UCOL_SHIFTED
286 };
287 
288 UColAttributeValue caseLevel[] = {
289     UCOL_OFF,
290     UCOL_ON
291 };
292 
293 UColAttributeValue strengths[] = {
294     UCOL_PRIMARY,
295     UCOL_SECONDARY,
296     UCOL_TERTIARY,
297     UCOL_QUATERNARY,
298     UCOL_IDENTICAL
299 };
300 
301 #if 0
302 static const char * strengthsC[] = {
303     "UCOL_PRIMARY",
304     "UCOL_SECONDARY",
305     "UCOL_TERTIARY",
306     "UCOL_QUATERNARY",
307     "UCOL_IDENTICAL"
308 };
309 
310 static const char * caseFirstC[] = {
311     "UCOL_OFF",
312     "UCOL_LOWER_FIRST",
313     "UCOL_UPPER_FIRST"
314 };
315 
316 
317 static const char * alternateHandlingC[] = {
318     "UCOL_NON_IGNORABLE",
319     "UCOL_SHIFTED"
320 };
321 
322 static const char * caseLevelC[] = {
323     "UCOL_OFF",
324     "UCOL_ON"
325 };
326 
327 /* not used currently - does not test only prints */
328 static void PrintMarkDavis(void)
329 {
330   UErrorCode status = U_ZERO_ERROR;
331   UChar m[256];
332   uint8_t sortkey[256];
333   UCollator *coll = ucol_open("en_US", &status);
334   uint32_t h,i,j,k, sortkeysize;
335   uint32_t sizem = 0;
336   char buffer[512];
337   uint32_t len = 512;
338 
339   log_verbose("PrintMarkDavis");
340 
341   u_uastrcpy(m, "Mark Davis");
342   sizem = u_strlen(m);
343 
344 
345   m[1] = 0xe4;
346 
347   for(i = 0; i<sizem; i++) {
348     fprintf(stderr, "\\u%04X ", m[i]);
349   }
350   fprintf(stderr, "\n");
351 
352   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
353     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
354     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
355 
356     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
357       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
358       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
359 
360       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
361         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
362         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
363 
364         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
365           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
366           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
367           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
368           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
369         }
370 
371       }
372 
373     }
374 
375   }
376 }
377 #endif
378 
BillFairmanTest(void)379 static void BillFairmanTest(void) {
380 /*
381 ** check for actual locale via ICU resource bundles
382 **
383 ** lp points to the original locale ("fr_FR_....")
384 */
385 
386     UResourceBundle *lr,*cr;
387     UErrorCode              lec = U_ZERO_ERROR;
388     const char *lp = "fr_FR_you_ll_never_find_this_locale";
389 
390     log_verbose("BillFairmanTest\n");
391 
392     lr = ures_open(NULL,lp,&lec);
393     if (lr) {
394         cr = ures_getByKey(lr,"collations",0,&lec);
395         if (cr) {
396             lp = ures_getLocale(cr,&lec);
397             if (lp) {
398                 if (U_SUCCESS(lec)) {
399                     if(strcmp(lp, "fr") != 0) {
400                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
401                     }
402                 }
403             }
404             ures_close(cr);
405         }
406         ures_close(lr);
407     }
408 }
409 
testPrimary(UCollator * col,const UChar * p,const UChar * q)410 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
411     UChar source[256] = { '\0'};
412     UChar target[256] = { '\0'};
413     UChar preP = 0x31a3;
414     UChar preQ = 0x310d;
415 /*
416     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
417     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
418 */
419     /*log_verbose("Testing primary\n");*/
420 
421     doTest(col, p, q, UCOL_LESS);
422 /*
423     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
424 
425     if(result!=UCOL_LESS){
426        aescstrdup(p,utfSource,256);
427        aescstrdup(q,utfTarget,256);
428        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
429     }
430 */
431     source[0] = preP;
432     u_strcpy(source+1,p);
433     target[0] = preQ;
434     u_strcpy(target+1,q);
435     doTest(col, source, target, UCOL_LESS);
436 /*
437     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
438 */
439 }
440 
testSecondary(UCollator * col,const UChar * p,const UChar * q)441 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
442     UChar source[256] = { '\0'};
443     UChar target[256] = { '\0'};
444 
445     /*log_verbose("Testing secondary\n");*/
446 
447     doTest(col, p, q, UCOL_LESS);
448 /*
449     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
450 */
451     source[0] = 0x0053;
452     u_strcpy(source+1,p);
453     target[0]= 0x0073;
454     u_strcpy(target+1,q);
455 
456     doTest(col, source, target, UCOL_LESS);
457 /*
458     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
459 */
460 
461 
462     u_strcpy(source,p);
463     source[u_strlen(p)] = 0x62;
464     source[u_strlen(p)+1] = 0;
465 
466 
467     u_strcpy(target,q);
468     target[u_strlen(q)] = 0x61;
469     target[u_strlen(q)+1] = 0;
470 
471     doTest(col, source, target, UCOL_GREATER);
472 
473 /*
474     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
475 */
476 }
477 
testTertiary(UCollator * col,const UChar * p,const UChar * q)478 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
479     UChar source[256] = { '\0'};
480     UChar target[256] = { '\0'};
481 
482     /*log_verbose("Testing tertiary\n");*/
483 
484     doTest(col, p, q, UCOL_LESS);
485 /*
486     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
487 */
488     source[0] = 0x0020;
489     u_strcpy(source+1,p);
490     target[0]= 0x002D;
491     u_strcpy(target+1,q);
492 
493     doTest(col, source, target, UCOL_LESS);
494 /*
495     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
496 */
497 
498     u_strcpy(source,p);
499     source[u_strlen(p)] = 0xE0;
500     source[u_strlen(p)+1] = 0;
501 
502     u_strcpy(target,q);
503     target[u_strlen(q)] = 0x61;
504     target[u_strlen(q)+1] = 0;
505 
506     doTest(col, source, target, UCOL_GREATER);
507 
508 /*
509     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
510 */
511 }
512 
testEquality(UCollator * col,const UChar * p,const UChar * q)513 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
514 /*
515     UChar source[256] = { '\0'};
516     UChar target[256] = { '\0'};
517 */
518 
519     doTest(col, p, q, UCOL_EQUAL);
520 /*
521     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
522 */
523 }
524 
testCollator(UCollator * coll,UErrorCode * status)525 static void testCollator(UCollator *coll, UErrorCode *status) {
526   const UChar *rules = NULL, *current = NULL;
527   int32_t ruleLen = 0;
528   uint32_t strength = 0;
529   uint32_t chOffset = 0; uint32_t chLen = 0;
530   uint32_t exOffset = 0; uint32_t exLen = 0;
531   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
532   uint32_t firstEx = 0;
533 /*  uint32_t rExpsLen = 0; */
534   uint32_t firstLen = 0;
535   UBool varT = FALSE; UBool top_ = TRUE;
536   uint16_t specs = 0;
537   UBool startOfRules = TRUE;
538   UBool lastReset = FALSE;
539   UBool before = FALSE;
540   uint32_t beforeStrength = 0;
541   UColTokenParser src;
542   UColOptionSet opts;
543 
544   UChar first[256];
545   UChar second[256];
546   UChar tempB[256];
547   uint32_t tempLen;
548   UChar *rulesCopy = NULL;
549   UParseError parseError;
550   src.opts = &opts;
551 
552   rules = ucol_getRules(coll, &ruleLen);
553   if(U_SUCCESS(*status) && ruleLen > 0) {
554     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
555     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
556     src.current = src.source = rulesCopy;
557     src.end = rulesCopy+ruleLen;
558     src.extraCurrent = src.end;
559     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
560     *first = *second = 0;
561 
562     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
563       strength = src.parsedToken.strength;
564       chOffset = src.parsedToken.charsOffset;
565       chLen = src.parsedToken.charsLen;
566       exOffset = src.parsedToken.extensionOffset;
567       exLen = src.parsedToken.extensionLen;
568       prefixOffset = src.parsedToken.prefixOffset;
569       prefixLen = src.parsedToken.prefixLen;
570       specs = src.parsedToken.flags;
571 
572       startOfRules = FALSE;
573       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
574       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
575       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
576         second[0] = 0;
577       } else {
578         u_strncpy(second,rulesCopy+chOffset, chLen);
579         second[chLen] = 0;
580 
581         if(exLen > 0 && firstEx == 0) {
582           u_strncat(first, rulesCopy+exOffset, exLen);
583           first[firstLen+exLen] = 0;
584         }
585 
586         if(lastReset == TRUE && prefixLen != 0) {
587           u_strncpy(first+prefixLen, first, firstLen);
588           u_strncpy(first, rulesCopy+prefixOffset, prefixLen);
589           first[firstLen+prefixLen] = 0;
590           firstLen = firstLen+prefixLen;
591         }
592 
593         if(before == TRUE) { /* swap first and second */
594           u_strcpy(tempB, first);
595           u_strcpy(first, second);
596           u_strcpy(second, tempB);
597 
598           tempLen = firstLen;
599           firstLen = chLen;
600           chLen = tempLen;
601 
602           tempLen = firstEx;
603           firstEx = exLen;
604           exLen = tempLen;
605           if(beforeStrength < strength) {
606             strength = beforeStrength;
607           }
608         }
609       }
610       lastReset = FALSE;
611 
612       switch(strength){
613       case UCOL_IDENTICAL:
614           testEquality(coll,first,second);
615           break;
616       case UCOL_PRIMARY:
617           testPrimary(coll,first,second);
618           break;
619       case UCOL_SECONDARY:
620           testSecondary(coll,first,second);
621           break;
622       case UCOL_TERTIARY:
623           testTertiary(coll,first,second);
624           break;
625       case UCOL_TOK_RESET:
626         lastReset = TRUE;
627         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
628         if(before) {
629           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
630         }
631         break;
632       default:
633           break;
634       }
635 
636       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
637         before = FALSE;
638       } else {
639         firstLen = chLen;
640         firstEx = exLen;
641         u_strcpy(first, second);
642       }
643     }
644     free(rulesCopy);
645   }
646 }
647 
ucaTest(void * collator,const int object,const UChar * source,const int sLen,const UChar * target,const int tLen)648 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
649   UCollator *UCA = (UCollator *)collator;
650   return ucol_strcoll(UCA, source, sLen, target, tLen);
651 }
652 
653 /*
654 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
655 #ifdef U_WINDOWS
656   LCID lcid = (LCID)collator;
657   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
658 #else
659   return 0;
660 #endif
661 }
662 */
663 
swampEarlier(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)664 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
665                                      UChar s1, UChar s2,
666                                      const UChar *s, const uint32_t sLen,
667                                      const UChar *t, const uint32_t tLen) {
668   UChar source[256] = {0};
669   UChar target[256] = {0};
670 
671   source[0] = s1;
672   u_strcpy(source+1, s);
673   target[0] = s2;
674   u_strcpy(target+1, t);
675 
676   return func(collator, opts, source, sLen+1, target, tLen+1);
677 }
678 
swampLater(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)679 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
680                                    UChar s1, UChar s2,
681                                    const UChar *s, const uint32_t sLen,
682                                    const UChar *t, const uint32_t tLen) {
683   UChar source[256] = {0};
684   UChar target[256] = {0};
685 
686   u_strcpy(source, s);
687   source[sLen] = s1;
688   u_strcpy(target, t);
689   target[tLen] = s2;
690 
691   return func(collator, opts, source, sLen+1, target, tLen+1);
692 }
693 
probeStrength(tst_strcoll * func,void * collator,int opts,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen,UCollationResult result)694 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
695                               const UChar *s, const uint32_t sLen,
696                               const UChar *t, const uint32_t tLen,
697                               UCollationResult result) {
698   /*UChar fPrimary = 0x6d;*/
699   /*UChar sPrimary = 0x6e;*/
700   UChar fSecondary = 0x310d;
701   UChar sSecondary = 0x31a3;
702   UChar fTertiary = 0x310f;
703   UChar sTertiary = 0x31b7;
704 
705   UCollationResult oposite;
706   if(result == UCOL_EQUAL) {
707     return UCOL_IDENTICAL;
708   } else if(result == UCOL_GREATER) {
709     oposite = UCOL_LESS;
710   } else {
711     oposite = UCOL_GREATER;
712   }
713 
714   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
715     return UCOL_PRIMARY;
716   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
717     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
718     return UCOL_SECONDARY;
719   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
720     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
721     return UCOL_TERTIARY;
722   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
723     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
724     return UCOL_QUATERNARY;
725   } else {
726     return UCOL_IDENTICAL;
727   }
728 }
729 
getRelationSymbol(UCollationResult res,uint32_t strength,char * buffer)730 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
731   uint32_t i = 0;
732 
733   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
734     buffer[0] = '=';
735     buffer[1] = '=';
736     buffer[2] = '\0';
737   } else if(res == UCOL_GREATER) {
738     for(i = 0; i<strength+1; i++) {
739       buffer[i] = '>';
740     }
741     buffer[strength+1] = '\0';
742   } else {
743     for(i = 0; i<strength+1; i++) {
744       buffer[i] = '<';
745     }
746     buffer[strength+1] = '\0';
747   }
748 
749   return buffer;
750 }
751 
752 
753 
logFailure(const char * platform,const char * test,const UChar * source,const uint32_t sLen,const UChar * target,const uint32_t tLen,UCollationResult realRes,uint32_t realStrength,UCollationResult expRes,uint32_t expStrength,UBool error)754 static void logFailure (const char *platform, const char *test,
755                         const UChar *source, const uint32_t sLen,
756                         const UChar *target, const uint32_t tLen,
757                         UCollationResult realRes, uint32_t realStrength,
758                         UCollationResult expRes, uint32_t expStrength, UBool error) {
759 
760   uint32_t i = 0;
761 
762   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
763   static int32_t maxOutputLength = 0;
764   int32_t outputLength;
765 
766   *sEsc = *tEsc = *s = *t = 0;
767   if(error == TRUE) {
768     log_err("Difference between expected and generated order. Run test with -v for more info\n");
769   } else if(VERBOSITY == 0) {
770     return;
771   }
772   for(i = 0; i<sLen; i++) {
773     sprintf(b, "%04X", source[i]);
774     strcat(sEsc, "\\u");
775     strcat(sEsc, b);
776     strcat(s, b);
777     strcat(s, " ");
778     if(source[i] < 0x80) {
779       sprintf(b, "(%c)", source[i]);
780       strcat(sEsc, b);
781     }
782   }
783   for(i = 0; i<tLen; i++) {
784     sprintf(b, "%04X", target[i]);
785     strcat(tEsc, "\\u");
786     strcat(tEsc, b);
787     strcat(t, b);
788     strcat(t, " ");
789     if(target[i] < 0x80) {
790       sprintf(b, "(%c)", target[i]);
791       strcat(tEsc, b);
792     }
793   }
794 /*
795   strcpy(output, "[[ ");
796   strcat(output, sEsc);
797   strcat(output, getRelationSymbol(expRes, expStrength, relation));
798   strcat(output, tEsc);
799 
800   strcat(output, " : ");
801 
802   strcat(output, sEsc);
803   strcat(output, getRelationSymbol(realRes, realStrength, relation));
804   strcat(output, tEsc);
805   strcat(output, " ]] ");
806 
807   log_verbose("%s", output);
808 */
809 
810 
811   strcpy(output, "DIFF: ");
812 
813   strcat(output, s);
814   strcat(output, " : ");
815   strcat(output, t);
816 
817   strcat(output, test);
818   strcat(output, ": ");
819 
820   strcat(output, sEsc);
821   strcat(output, getRelationSymbol(expRes, expStrength, relation));
822   strcat(output, tEsc);
823 
824   strcat(output, " ");
825 
826   strcat(output, platform);
827   strcat(output, ": ");
828 
829   strcat(output, sEsc);
830   strcat(output, getRelationSymbol(realRes, realStrength, relation));
831   strcat(output, tEsc);
832 
833   outputLength = (int32_t)strlen(output);
834   if(outputLength > maxOutputLength) {
835     maxOutputLength = outputLength;
836     U_ASSERT(outputLength < sizeof(output));
837   }
838 
839   log_verbose("%s\n", output);
840 
841 }
842 
843 /*
844 static void printOutRules(const UChar *rules) {
845   uint32_t len = u_strlen(rules);
846   uint32_t i = 0;
847   char toPrint;
848   uint32_t line = 0;
849 
850   fprintf(stdout, "Rules:");
851 
852   for(i = 0; i<len; i++) {
853     if(rules[i]<0x7f && rules[i]>=0x20) {
854       toPrint = (char)rules[i];
855       if(toPrint == '&') {
856         line = 1;
857         fprintf(stdout, "\n&");
858       } else if(toPrint == ';') {
859         fprintf(stdout, "<<");
860         line+=2;
861       } else if(toPrint == ',') {
862         fprintf(stdout, "<<<");
863         line+=3;
864       } else {
865         fprintf(stdout, "%c", toPrint);
866         line++;
867       }
868     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
869       fprintf(stdout, "\\u%04X", rules[i]);
870       line+=6;
871     }
872     if(line>72) {
873       fprintf(stdout, "\n");
874       line = 0;
875     }
876   }
877 
878   log_verbose("\n");
879 
880 }
881 */
882 
testSwitch(tst_strcoll * func,void * collator,int opts,uint32_t strength,const UChar * first,const UChar * second,const char * msg,UBool error)883 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
884   uint32_t diffs = 0;
885   UCollationResult realResult;
886   uint32_t realStrength;
887 
888   uint32_t sLen = u_strlen(first);
889   uint32_t tLen = u_strlen(second);
890 
891   realResult = func(collator, opts, first, sLen, second, tLen);
892   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
893 
894   if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
895     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
896     diffs++;
897   } else if(realResult != UCOL_LESS || realStrength != strength) {
898     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
899     diffs++;
900   }
901   return diffs;
902 }
903 
904 
testAgainstUCA(UCollator * coll,UCollator * UCA,const char * refName,UBool error,UErrorCode * status)905 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
906   const UChar *rules = NULL, *current = NULL;
907   int32_t ruleLen = 0;
908   uint32_t strength = 0;
909   uint32_t chOffset = 0; uint32_t chLen = 0;
910   uint32_t exOffset = 0; uint32_t exLen = 0;
911   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
912 /*  uint32_t rExpsLen = 0; */
913   uint32_t firstLen = 0, secondLen = 0;
914   UBool varT = FALSE; UBool top_ = TRUE;
915   uint16_t specs = 0;
916   UBool startOfRules = TRUE;
917   UColTokenParser src;
918   UColOptionSet opts;
919 
920   UChar first[256];
921   UChar second[256];
922   UChar *rulesCopy = NULL;
923 
924   uint32_t UCAdiff = 0;
925   uint32_t Windiff = 1;
926   UParseError parseError;
927 
928   src.opts = &opts;
929 
930   rules = ucol_getRules(coll, &ruleLen);
931 
932   /*printOutRules(rules);*/
933 
934   if(U_SUCCESS(*status) && ruleLen > 0) {
935     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
936     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
937     src.current = src.source = rulesCopy;
938     src.end = rulesCopy+ruleLen;
939     src.extraCurrent = src.end;
940     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
941     *first = *second = 0;
942 
943     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
944       strength = src.parsedToken.strength;
945       chOffset = src.parsedToken.charsOffset;
946       chLen = src.parsedToken.charsLen;
947       exOffset = src.parsedToken.extensionOffset;
948       exLen = src.parsedToken.extensionLen;
949       prefixOffset = src.parsedToken.prefixOffset;
950       prefixLen = src.parsedToken.prefixLen;
951       specs = src.parsedToken.flags;
952 
953       startOfRules = FALSE;
954       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
955       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
956 
957       u_strncpy(second,rulesCopy+chOffset, chLen);
958       second[chLen] = 0;
959       secondLen = chLen;
960 
961       if(exLen > 0) {
962         u_strncat(first, rulesCopy+exOffset, exLen);
963         first[firstLen+exLen] = 0;
964         firstLen += exLen;
965       }
966 
967       if(strength != UCOL_TOK_RESET) {
968         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
969           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
970           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
971         }
972       }
973 
974 
975       firstLen = chLen;
976       u_strcpy(first, second);
977 
978     }
979     if(UCAdiff != 0 && Windiff != 0) {
980       log_verbose("\n");
981     }
982     if(UCAdiff == 0) {
983       log_verbose("No immediate difference with %s!\n", refName);
984     }
985     if(Windiff == 0) {
986       log_verbose("No immediate difference with Win32!\n");
987     }
988     free(rulesCopy);
989   }
990 }
991 
992 /*
993  * Takes two CEs (lead and continuation) and
994  * compares them as CEs should be compared:
995  * primary vs. primary, secondary vs. secondary
996  * tertiary vs. tertiary
997  */
compareCEs(uint32_t s1,uint32_t s2,uint32_t t1,uint32_t t2)998 static int32_t compareCEs(uint32_t s1, uint32_t s2,
999                    uint32_t t1, uint32_t t2) {
1000   uint32_t s = 0, t = 0;
1001   if(s1 == t1 && s2 == t2) {
1002     return 0;
1003   }
1004   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1005   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1006   if(s < t) {
1007     return -1;
1008   } else if(s > t) {
1009     return 1;
1010   } else {
1011     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1012     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1013     if(s < t) {
1014       return -1;
1015     } else if(s > t) {
1016       return 1;
1017     } else {
1018       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1019       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1020       if(s < t) {
1021         return -1;
1022       } else {
1023         return 1;
1024       }
1025     }
1026   }
1027 }
1028 
1029 typedef struct {
1030   uint32_t startCE;
1031   uint32_t startContCE;
1032   uint32_t limitCE;
1033   uint32_t limitContCE;
1034 } indirectBoundaries;
1035 
1036 /* these values are used for finding CE values for indirect positioning. */
1037 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
1038 /* values. It only works for resets and you cannot tailor indirect names */
1039 /* An indirect name can define either an anchor point or a range. An     */
1040 /* anchor point behaves in exactly the same way as a code point in reset */
1041 /* would, except that it cannot be tailored. A range (we currently only  */
1042 /* know for the [top] range will explicitly set the upper bound for      */
1043 /* generated CEs, thus allowing for better control over how many CEs can */
1044 /* be squeezed between in the range without performance penalty.         */
1045 /* In that respect, we use [top] for tailoring of locales that use CJK   */
1046 /* characters. Other indirect values are currently a pure convenience,   */
1047 /* they can be used to assure that the CEs will be always positioned in  */
1048 /* the same place relative to a point with known properties (e.g. first  */
1049 /* primary ignorable). */
1050 static indirectBoundaries ucolIndirectBoundaries[15];
1051 static UBool indirectBoundariesSet = FALSE;
setIndirectBoundaries(uint32_t indexR,uint32_t * start,uint32_t * end)1052 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1053 
1054   /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1055   /* to initalize here. */
1056   ucolIndirectBoundaries[indexR].startCE = start[0];
1057   ucolIndirectBoundaries[indexR].startContCE = start[1];
1058   if(end) {
1059     ucolIndirectBoundaries[indexR].limitCE = end[0];
1060     ucolIndirectBoundaries[indexR].limitContCE = end[1];
1061   } else {
1062     ucolIndirectBoundaries[indexR].limitCE = 0;
1063     ucolIndirectBoundaries[indexR].limitContCE = 0;
1064   }
1065 }
1066 
testCEs(UCollator * coll,UErrorCode * status)1067 static void testCEs(UCollator *coll, UErrorCode *status) {
1068 
1069   const UChar *rules = NULL, *current = NULL;
1070   int32_t ruleLen = 0;
1071 
1072   uint32_t strength = 0;
1073   uint32_t maxStrength = UCOL_IDENTICAL;
1074   uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1075   uint32_t lastCE;
1076   uint32_t lastContCE;
1077 
1078   int32_t result = 0;
1079   uint32_t chOffset = 0; uint32_t chLen = 0;
1080   uint32_t exOffset = 0; uint32_t exLen = 0;
1081   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1082   uint32_t oldOffset = 0;
1083 
1084   /* uint32_t rExpsLen = 0; */
1085   /* uint32_t firstLen = 0; */
1086   uint16_t specs = 0;
1087   UBool varT = FALSE; UBool top_ = TRUE;
1088   UBool startOfRules = TRUE;
1089   UBool before = FALSE;
1090   UColTokenParser src;
1091   UColOptionSet opts;
1092   UParseError parseError;
1093   UChar *rulesCopy = NULL;
1094   collIterate c;
1095   UCollator *UCA = ucol_open("root", status);
1096   UCAConstants *consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1097   uint32_t UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0], /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1], */
1098            UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0], UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1099 
1100   baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1101 
1102   src.opts = &opts;
1103 
1104   rules = ucol_getRules(coll, &ruleLen);
1105 
1106   src.invUCA = ucol_initInverseUCA(status);
1107 
1108   if(indirectBoundariesSet == FALSE) {
1109     /* UCOL_RESET_TOP_VALUE */
1110     setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1111     /* UCOL_FIRST_PRIMARY_IGNORABLE */
1112     setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1113     /* UCOL_LAST_PRIMARY_IGNORABLE */
1114     setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1115     /* UCOL_FIRST_SECONDARY_IGNORABLE */
1116     setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1117     /* UCOL_LAST_SECONDARY_IGNORABLE */
1118     setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1119     /* UCOL_FIRST_TERTIARY_IGNORABLE */
1120     setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1121     /* UCOL_LAST_TERTIARY_IGNORABLE */
1122     setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1123     /* UCOL_FIRST_VARIABLE */
1124     setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1125     /* UCOL_LAST_VARIABLE */
1126     setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1127     /* UCOL_FIRST_NON_VARIABLE */
1128     setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1129     /* UCOL_LAST_NON_VARIABLE */
1130     setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1131     /* UCOL_FIRST_IMPLICIT */
1132     setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1133     /* UCOL_LAST_IMPLICIT */
1134     setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1135     /* UCOL_FIRST_TRAILING */
1136     setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1137     /* UCOL_LAST_TRAILING */
1138     setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1139     ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1140     indirectBoundariesSet = TRUE;
1141   }
1142 
1143 
1144   if(U_SUCCESS(*status) && ruleLen > 0) {
1145     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1146     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1147     src.current = src.source = rulesCopy;
1148     src.end = rulesCopy+ruleLen;
1149     src.extraCurrent = src.end;
1150     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1151 
1152     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1153       strength = src.parsedToken.strength;
1154       chOffset = src.parsedToken.charsOffset;
1155       chLen = src.parsedToken.charsLen;
1156       exOffset = src.parsedToken.extensionOffset;
1157       exLen = src.parsedToken.extensionLen;
1158       prefixOffset = src.parsedToken.prefixOffset;
1159       prefixLen = src.parsedToken.prefixLen;
1160       specs = src.parsedToken.flags;
1161 
1162       startOfRules = FALSE;
1163       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1164       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1165 
1166       uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
1167 
1168       currCE = ucol_getNextCE(coll, &c, status);
1169       if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
1170         log_verbose("Thai prevowel detected. Will pick next CE\n");
1171         currCE = ucol_getNextCE(coll, &c, status);
1172       }
1173 
1174       currContCE = ucol_getNextCE(coll, &c, status);
1175       if(!isContinuation(currContCE)) {
1176         currContCE = 0;
1177       }
1178 
1179       /* we need to repack CEs here */
1180 
1181       if(strength == UCOL_TOK_RESET) {
1182         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1183         if(top_ == TRUE) {
1184           int32_t index = src.parsedToken.indirectIndex;
1185 
1186           nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
1187           nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
1188         } else {
1189           nextCE = baseCE = currCE;
1190           nextContCE = baseContCE = currContCE;
1191         }
1192         maxStrength = UCOL_IDENTICAL;
1193       } else {
1194         if(strength < maxStrength) {
1195           maxStrength = strength;
1196           if(baseCE == UCOL_RESET_TOP_VALUE) {
1197               log_verbose("Resetting to [top]\n");
1198               nextCE = UCOL_NEXT_TOP_VALUE;
1199               nextContCE = UCOL_NEXT_TOP_CONT;
1200           } else {
1201             result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1202           }
1203           if(result < 0) {
1204             if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
1205               log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
1206               return;
1207             } else {
1208               log_err("couldn't find the CE\n");
1209               return;
1210             }
1211           }
1212         }
1213 
1214         currCE &= 0xFFFFFF3F;
1215         currContCE &= 0xFFFFFFBF;
1216 
1217         if(maxStrength == UCOL_IDENTICAL) {
1218           if(baseCE != currCE || baseContCE != currContCE) {
1219             log_err("current CE  (initial strength UCOL_EQUAL)\n");
1220           }
1221         } else {
1222           if(strength == UCOL_IDENTICAL) {
1223             if(lastCE != currCE || lastContCE != currContCE) {
1224               log_err("current CE  (initial strength UCOL_EQUAL)\n");
1225             }
1226           } else {
1227             if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1228             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1229               log_err("current CE is not less than base CE\n");
1230             }
1231             if(!before) {
1232               if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1233               /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1234                 log_err("sequence of generated CEs is broken\n");
1235               }
1236             } else {
1237               before = FALSE;
1238               if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1239               /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1240                 log_err("sequence of generated CEs is broken\n");
1241               }
1242             }
1243           }
1244         }
1245 
1246       }
1247 
1248       oldOffset = chOffset;
1249       lastCE = currCE & 0xFFFFFF3F;
1250       lastContCE = currContCE & 0xFFFFFFBF;
1251     }
1252     free(rulesCopy);
1253   }
1254   ucol_close(UCA);
1255 }
1256 
1257 #if 0
1258 /* these locales are now picked from index RB */
1259 static const char* localesToTest[] = {
1260 "ar", "bg", "ca", "cs", "da",
1261 "el", "en_BE", "en_US_POSIX",
1262 "es", "et", "fi", "fr", "hi",
1263 "hr", "hu", "is", "iw", "ja",
1264 "ko", "lt", "lv", "mk", "mt",
1265 "nb", "nn", "nn_NO", "pl", "ro",
1266 "ru", "sh", "sk", "sl", "sq",
1267 "sr", "sv", "th", "tr", "uk",
1268 "vi", "zh", "zh_TW"
1269 };
1270 #endif
1271 
1272 static const char* rulesToTest[] = {
1273   /* Funky fa rule */
1274   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1275   /*"& Z < p, P",*/
1276     /* Cui Mins rules */
1277     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1278     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1279     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1280     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1281     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1282     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1283     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1284 };
1285 
1286 
TestCollations(void)1287 static void TestCollations(void) {
1288   int32_t noOfLoc = uloc_countAvailable();
1289   int32_t i = 0, j = 0;
1290 
1291   UErrorCode status = U_ZERO_ERROR;
1292   char cName[256];
1293   UChar name[256];
1294   int32_t nameSize;
1295 
1296 
1297   const char *locName = NULL;
1298   UCollator *coll = NULL;
1299   UCollator *UCA = ucol_open("", &status);
1300   UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1301   ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1302 
1303   for(i = 0; i<noOfLoc; i++) {
1304     status = U_ZERO_ERROR;
1305     locName = uloc_getAvailable(i);
1306     if(uprv_strcmp("ja", locName) == 0) {
1307       log_verbose("Don't know how to test prefixes\n");
1308       continue;
1309     }
1310     if(hasCollationElements(locName)) {
1311         nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1312         for(j = 0; j<nameSize; j++) {
1313           cName[j] = (char)name[j];
1314         }
1315         cName[nameSize] = 0;
1316         log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1317         coll = ucol_open(locName, &status);
1318         if(U_SUCCESS(status)) {
1319           testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1320           ucol_close(coll);
1321         } else {
1322           log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1323           status = U_ZERO_ERROR;
1324         }
1325     }
1326   }
1327   ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1328   ucol_close(UCA);
1329 }
1330 
RamsRulesTest(void)1331 static void RamsRulesTest(void) {
1332   UErrorCode status = U_ZERO_ERROR;
1333   int32_t i = 0;
1334   UCollator *coll = NULL;
1335   UChar rule[2048];
1336   uint32_t ruleLen;
1337   int32_t noOfLoc = uloc_countAvailable();
1338   const char *locName = NULL;
1339 
1340   log_verbose("RamsRulesTest\n");
1341 
1342   for(i = 0; i<noOfLoc; i++) {
1343     status = U_ZERO_ERROR;
1344     locName = uloc_getAvailable(i);
1345     if(hasCollationElements(locName)) {
1346       if (uprv_strcmp("ja", locName)==0) {
1347         log_verbose("Don't know how to test Japanese because of prefixes\n");
1348         continue;
1349       }
1350       if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1351         log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1352         continue;
1353       }
1354       if (uprv_strcmp("km", locName)==0 ||
1355           uprv_strcmp("km_KH", locName)==0 ||
1356           uprv_strcmp("zh", locName)==0 ||
1357           uprv_strcmp("zh_Hant", locName)==0 ) {
1358           continue;  /* TODO: enable these locale tests after trac#6040 is fixed. */
1359       }
1360       log_verbose("Testing locale %s\n", locName);
1361       coll = ucol_open(locName, &status);
1362       if(U_SUCCESS(status)) {
1363         if(coll->image->jamoSpecial == TRUE) {
1364           log_err("%s has special JAMOs\n", locName);
1365         }
1366         ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1367         testCollator(coll, &status);
1368         testCEs(coll, &status);
1369         ucol_close(coll);
1370       }
1371     }
1372   }
1373 
1374   for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1375     log_verbose("Testing rule: %s\n", rulesToTest[i]);
1376     ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1377     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1378     if(U_SUCCESS(status)) {
1379       testCollator(coll, &status);
1380       testCEs(coll, &status);
1381       ucol_close(coll);
1382     }
1383   }
1384 
1385 }
1386 
IsTailoredTest(void)1387 static void IsTailoredTest(void) {
1388   UErrorCode status = U_ZERO_ERROR;
1389   uint32_t i = 0;
1390   UCollator *coll = NULL;
1391   UChar rule[2048];
1392   UChar tailored[2048];
1393   UChar notTailored[2048];
1394   uint32_t ruleLen, tailoredLen, notTailoredLen;
1395 
1396   log_verbose("IsTailoredTest\n");
1397 
1398   u_uastrcpy(rule, "&Z < A, B, C;c < d");
1399   ruleLen = u_strlen(rule);
1400 
1401   u_uastrcpy(tailored, "ABCcd");
1402   tailoredLen = u_strlen(tailored);
1403 
1404   u_uastrcpy(notTailored, "ZabD");
1405   notTailoredLen = u_strlen(notTailored);
1406 
1407   coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1408   if(U_SUCCESS(status)) {
1409     for(i = 0; i<tailoredLen; i++) {
1410       if(!ucol_isTailored(coll, tailored[i], &status)) {
1411         log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1412       }
1413     }
1414     for(i = 0; i<notTailoredLen; i++) {
1415       if(ucol_isTailored(coll, notTailored[i], &status)) {
1416         log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1417       }
1418     }
1419     ucol_close(coll);
1420   }
1421   else {
1422     log_err("Can't tailor rules");
1423   }
1424   /* Code coverage */
1425   status = U_ZERO_ERROR;
1426   coll = ucol_open("ja", &status);
1427   if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1428     log_err("0x4E9C should be tailored - it is reported as not\n");
1429   }
1430   ucol_close(coll);
1431 }
1432 
1433 
1434 const static char chTest[][20] = {
1435   "c",
1436   "C",
1437   "ca", "cb", "cx", "cy", "CZ",
1438   "c\\u030C", "C\\u030C",
1439   "h",
1440   "H",
1441   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1442   "ch", "cH", "Ch", "CH",
1443   "cha", "charly", "che", "chh", "chch", "chr",
1444   "i", "I", "iarly",
1445   "r", "R",
1446   "r\\u030C", "R\\u030C",
1447   "s",
1448   "S",
1449   "s\\u030C", "S\\u030C",
1450   "z", "Z",
1451   "z\\u030C", "Z\\u030C"
1452 };
1453 
TestChMove(void)1454 static void TestChMove(void) {
1455   UChar t1[256] = {0};
1456   UChar t2[256] = {0};
1457 
1458   uint32_t i = 0, j = 0;
1459   uint32_t size = 0;
1460   UErrorCode status = U_ZERO_ERROR;
1461 
1462   UCollator *coll = ucol_open("cs", &status);
1463 
1464   if(U_SUCCESS(status)) {
1465     size = sizeof(chTest)/sizeof(chTest[0]);
1466     for(i = 0; i < size-1; i++) {
1467       for(j = i+1; j < size; j++) {
1468         u_unescape(chTest[i], t1, 256);
1469         u_unescape(chTest[j], t2, 256);
1470         doTest(coll, t1, t2, UCOL_LESS);
1471       }
1472     }
1473   }
1474   else {
1475     log_err("Can't open collator");
1476   }
1477   ucol_close(coll);
1478 }
1479 
1480 
1481 
1482 
1483 const static char impTest[][20] = {
1484   "\\u4e00",
1485     "a",
1486     "A",
1487     "b",
1488     "B",
1489     "\\u4e01"
1490 };
1491 
1492 
TestImplicitTailoring(void)1493 static void TestImplicitTailoring(void) {
1494   static const struct {
1495     const char *rules;
1496     const char *data[10];
1497     const uint32_t len;
1498   } tests[] = {
1499       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1500       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1501       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1502       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1503   };
1504 
1505   int32_t i = 0;
1506 
1507   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1508       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1509   }
1510 
1511 /*
1512   UChar t1[256] = {0};
1513   UChar t2[256] = {0};
1514 
1515   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1516 
1517   uint32_t i = 0, j = 0;
1518   uint32_t size = 0;
1519   uint32_t ruleLen = 0;
1520   UErrorCode status = U_ZERO_ERROR;
1521   UCollator *coll = NULL;
1522   ruleLen = u_unescape(rule, t1, 256);
1523 
1524   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1525 
1526   if(U_SUCCESS(status)) {
1527     size = sizeof(impTest)/sizeof(impTest[0]);
1528     for(i = 0; i < size-1; i++) {
1529       for(j = i+1; j < size; j++) {
1530         u_unescape(impTest[i], t1, 256);
1531         u_unescape(impTest[j], t2, 256);
1532         doTest(coll, t1, t2, UCOL_LESS);
1533       }
1534     }
1535   }
1536   else {
1537     log_err("Can't open collator");
1538   }
1539   ucol_close(coll);
1540   */
1541 }
1542 
TestFCDProblem(void)1543 static void TestFCDProblem(void) {
1544   UChar t1[256] = {0};
1545   UChar t2[256] = {0};
1546 
1547   const char *s1 = "\\u0430\\u0306\\u0325";
1548   const char *s2 = "\\u04D1\\u0325";
1549 
1550   UErrorCode status = U_ZERO_ERROR;
1551   UCollator *coll = ucol_open("", &status);
1552   u_unescape(s1, t1, 256);
1553   u_unescape(s2, t2, 256);
1554 
1555   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1556   doTest(coll, t1, t2, UCOL_EQUAL);
1557 
1558   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1559   doTest(coll, t1, t2, UCOL_EQUAL);
1560 
1561   ucol_close(coll);
1562 }
1563 
1564 #define NORM_BUFFER_TEST_LEN 32
1565 typedef struct {
1566   UChar32 u;
1567   UChar NFC[NORM_BUFFER_TEST_LEN];
1568   UChar NFD[NORM_BUFFER_TEST_LEN];
1569 } tester;
1570 
TestComposeDecompose(void)1571 static void TestComposeDecompose(void) {
1572     int32_t noOfLoc;
1573     int32_t i = 0, j = 0;
1574 
1575     UErrorCode status = U_ZERO_ERROR;
1576 
1577     const char *locName = NULL;
1578 
1579     uint32_t nfcSize;
1580     uint32_t nfdSize;
1581     tester **t;
1582     uint32_t noCases = 0;
1583     UCollator *coll = NULL;
1584     UChar32 u = 0;
1585     UChar comp[NORM_BUFFER_TEST_LEN];
1586     uint32_t len = 0;
1587     UCollationElements *iter;
1588 
1589     noOfLoc = uloc_countAvailable();
1590 
1591     t = malloc(0x30000 * sizeof(tester *));
1592     t[0] = (tester *)malloc(sizeof(tester));
1593     log_verbose("Testing UCA extensively\n");
1594     coll = ucol_open("", &status);
1595     if(status == U_FILE_ACCESS_ERROR) {
1596       log_data_err("Is your data around?\n");
1597       return;
1598     } else if(U_FAILURE(status)) {
1599       log_err("Error opening collator\n");
1600       return;
1601     }
1602 
1603 
1604     for(u = 0; u < 0x30000; u++) {
1605       len = 0;
1606       UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1607         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1608         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1609 
1610         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1611           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1612             t[noCases]->u = u;
1613             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1614               u_strncpy(t[noCases]->NFC, comp, len);
1615               t[noCases]->NFC[len] = 0;
1616             }
1617             noCases++;
1618             t[noCases] = (tester *)malloc(sizeof(tester));
1619             uprv_memset(t[noCases], 0, sizeof(tester));
1620         }
1621     }
1622 
1623     for(u=0; u<(UChar32)noCases; u++) {
1624       if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1625         log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1626         doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1627       }
1628     }
1629     /*
1630     for(u = 0; u < 0x30000; u++) {
1631       if(!(u&0xFFFF)) {
1632         log_verbose("%08X ", u);
1633       }
1634       uprv_memset(t[noCases], 0, sizeof(tester));
1635       t[noCases]->u = u;
1636       len = 0;
1637       UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1638       comp[len] = 0;
1639       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1640       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1641       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1642       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1643     }
1644     */
1645 
1646     ucol_close(coll);
1647 
1648     log_verbose("Testing locales, number of cases = %i\n", noCases);
1649     for(i = 0; i<noOfLoc; i++) {
1650         status = U_ZERO_ERROR;
1651         locName = uloc_getAvailable(i);
1652         if(hasCollationElements(locName)) {
1653             char cName[256];
1654             UChar name[256];
1655             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1656 
1657             for(j = 0; j<nameSize; j++) {
1658                 cName[j] = (char)name[j];
1659             }
1660             cName[nameSize] = 0;
1661             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1662 
1663             coll = ucol_open(locName, &status);
1664             ucol_setStrength(coll, UCOL_IDENTICAL);
1665             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1666 
1667             for(u=0; u<(UChar32)noCases; u++) {
1668               if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1669                 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1670                 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1671                 log_verbose("Testing NFC\n");
1672                 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1673                   backAndForth(iter);
1674                 log_verbose("Testing NFD\n");
1675                   ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1676                   backAndForth(iter);
1677               }
1678             }
1679             ucol_closeElements(iter);
1680             ucol_close(coll);
1681         }
1682     }
1683     for(u = 0; u <= (UChar32)noCases; u++) {
1684         free(t[u]);
1685     }
1686     free(t);
1687 }
1688 
TestEmptyRule(void)1689 static void TestEmptyRule(void) {
1690   UErrorCode status = U_ZERO_ERROR;
1691   UChar rulez[] = { 0 };
1692   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1693 
1694   ucol_close(coll);
1695 }
1696 
TestUCARules(void)1697 static void TestUCARules(void) {
1698   UErrorCode status = U_ZERO_ERROR;
1699   UChar b[256];
1700   UChar *rules = b;
1701   uint32_t ruleLen = 0;
1702   UCollator *UCAfromRules = NULL;
1703   UCollator *coll = ucol_open("", &status);
1704   if(status == U_FILE_ACCESS_ERROR) {
1705     log_data_err("Is your data around?\n");
1706     return;
1707   } else if(U_FAILURE(status)) {
1708     log_err("Error opening collator\n");
1709     return;
1710   }
1711   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1712 
1713   log_verbose("TestUCARules\n");
1714   if(ruleLen > 256) {
1715     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1716     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1717   }
1718   log_verbose("Rules length is %d\n", ruleLen);
1719   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1720   if(U_SUCCESS(status)) {
1721     ucol_close(UCAfromRules);
1722   } else {
1723     log_verbose("Unable to create a collator from UCARules!\n");
1724   }
1725 /*
1726   u_unescape(blah, b, 256);
1727   ucol_getSortKey(coll, b, 1, res, 256);
1728 */
1729   ucol_close(coll);
1730   if(rules != b) {
1731     free(rules);
1732   }
1733 }
1734 
1735 
1736 /* Pinyin tonal order */
1737 /*
1738     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1739           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1740     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1741     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1742     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1743     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1744       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1745 .. (\u00fc)
1746 
1747 However, in testing we got the following order:
1748     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1749           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1750     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1751 .. (\u0113)
1752     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1753     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1754     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1755 .. (\u01d8)
1756       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1757 */
1758 
TestBefore(void)1759 static void TestBefore(void) {
1760   const static char *data[] = {
1761       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1762       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1763       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1764       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1765       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1766       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1767   };
1768   genericRulesStarter(
1769     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1770     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1771     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1772     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1773     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1774     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1775     data, sizeof(data)/sizeof(data[0]));
1776 }
1777 
1778 #if 0
1779 /* superceded by TestBeforePinyin */
1780 static void TestJ784(void) {
1781   const static char *data[] = {
1782       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1783       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1784       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1785       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1786       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1787       "\\u00fc",
1788            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1789   };
1790   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1791 }
1792 #endif
1793 
1794 #if 0
1795 /* superceded by the changes to the lv locale */
1796 static void TestJ831(void) {
1797   const static char *data[] = {
1798     "I",
1799       "i",
1800       "Y",
1801       "y"
1802   };
1803   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1804 }
1805 #endif
1806 
TestJ815(void)1807 static void TestJ815(void) {
1808   const static char *data[] = {
1809     "aa",
1810       "Aa",
1811       "ab",
1812       "Ab",
1813       "ad",
1814       "Ad",
1815       "ae",
1816       "Ae",
1817       "\\u00e6",
1818       "\\u00c6",
1819       "af",
1820       "Af",
1821       "b",
1822       "B"
1823   };
1824   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1825   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1826 }
1827 
1828 
1829 /*
1830 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1831 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1832 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1833 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1834 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1835 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1836 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1837 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1838 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1839 */
TestRedundantRules(void)1840 static void TestRedundantRules(void) {
1841   int32_t i;
1842 
1843   static const struct {
1844       const char *rules;
1845       const char *expectedRules;
1846       const char *testdata[8];
1847       uint32_t testdatalen;
1848   } tests[] = {
1849     /* this test conflicts with positioning of CODAN placeholder */
1850        /*{
1851         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1852         "&\\u2089<<<x",
1853         {"\\u2089", "x"}, 2
1854        }, */
1855     /* this test conflicts with the [before x] syntax tightening */
1856       /*{
1857         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1858         "&\\u0252<<<x",
1859         {"\\u0252", "x"}, 2
1860       }, */
1861     /* this test conflicts with the [before x] syntax tightening */
1862       /*{
1863          "& a < b <<< c << d <<< e& [before 1] e <<< x",
1864          "& a <<< x < b <<< c << d <<< e",
1865         {"a", "x", "b", "c", "d", "e"}, 6
1866       }, */
1867       {
1868         "& a < b < c < d& [before 1] c < m",
1869         "& a < b < m < c < d",
1870         {"a", "b", "m", "c", "d"}, 5
1871       },
1872       {
1873         "& a < b <<< c << d <<< e& [before 3] e <<< x",
1874         "& a < b <<< c << d <<< x <<< e",
1875         {"a", "b", "c", "d", "x", "e"}, 6
1876       },
1877     /* this test conflicts with the [before x] syntax tightening */
1878       /* {
1879         "& a < b <<< c << d <<< e& [before 2] e <<< x",
1880         "& a < b <<< c <<< x << d <<< e",
1881         {"a", "b", "c", "x", "d", "e"},, 6
1882       }, */
1883       {
1884         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1885         "& a < b <<< c << d <<< e <<< f < x < g",
1886         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1887       },
1888       {
1889         "& a <<< b << c < d& a < m",
1890         "& a <<< b << c < m < d",
1891         {"a", "b", "c", "m", "d"}, 5
1892       },
1893       {
1894         "&a<b<<b\\u0301 &z<b",
1895         "&a<b\\u0301 &z<b",
1896         {"a", "b\\u0301", "z", "b"}, 4
1897       },
1898       {
1899         "&z<m<<<q<<<m",
1900         "&z<q<<<m",
1901         {"z", "q", "m"},3
1902       },
1903       {
1904         "&z<<<m<q<<<m",
1905         "&z<q<<<m",
1906         {"z", "q", "m"}, 3
1907       },
1908       {
1909         "& a < b < c < d& r < c",
1910         "& a < b < d& r < c",
1911         {"a", "b", "d"}, 3
1912       },
1913       {
1914         "& a < b < c < d& r < c",
1915         "& a < b < d& r < c",
1916         {"r", "c"}, 2
1917       },
1918       {
1919         "& a < b < c < d& c < m",
1920         "& a < b < c < m < d",
1921         {"a", "b", "c", "m", "d"}, 5
1922       },
1923       {
1924         "& a < b < c < d& a < m",
1925         "& a < m < b < c < d",
1926         {"a", "m", "b", "c", "d"}, 5
1927       }
1928   };
1929 
1930 
1931   UCollator *credundant = NULL;
1932   UCollator *cresulting = NULL;
1933   UErrorCode status = U_ZERO_ERROR;
1934   UChar rlz[2048] = { 0 };
1935   uint32_t rlen = 0;
1936 
1937   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
1938     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
1939     rlen = u_unescape(tests[i].rules, rlz, 2048);
1940 
1941     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1942     if(status == U_FILE_ACCESS_ERROR) {
1943       log_data_err("Is your data around?\n");
1944       return;
1945     } else if(U_FAILURE(status)) {
1946       log_err("Error opening collator\n");
1947       return;
1948     }
1949 
1950     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
1951     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1952 
1953     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
1954 
1955     ucol_close(credundant);
1956     ucol_close(cresulting);
1957 
1958     log_verbose("testing using data\n");
1959 
1960     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
1961   }
1962 
1963 }
1964 
TestExpansionSyntax(void)1965 static void TestExpansionSyntax(void) {
1966   int32_t i;
1967 
1968   const static char *rules[] = {
1969     "&AE <<< a << b <<< c &d <<< f",
1970     "&AE <<< a <<< b << c << d < e < f <<< g",
1971     "&AE <<< B <<< C / D <<< F"
1972   };
1973 
1974   const static char *expectedRules[] = {
1975     "&A <<< a / E << b / E <<< c /E  &d <<< f",
1976     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
1977     "&A <<< B / E <<< C / ED <<< F / E"
1978   };
1979 
1980   const static char *testdata[][8] = {
1981     {"AE", "a", "b", "c"},
1982     {"AE", "a", "b", "c", "d", "e", "f", "g"},
1983     {"AE", "B", "C"} /* / ED <<< F / E"},*/
1984   };
1985 
1986   const static uint32_t testdatalen[] = {
1987       4,
1988       8,
1989       3
1990   };
1991 
1992 
1993 
1994   UCollator *credundant = NULL;
1995   UCollator *cresulting = NULL;
1996   UErrorCode status = U_ZERO_ERROR;
1997   UChar rlz[2048] = { 0 };
1998   uint32_t rlen = 0;
1999 
2000   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2001     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2002     rlen = u_unescape(rules[i], rlz, 2048);
2003 
2004     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2005     if(status == U_FILE_ACCESS_ERROR) {
2006       log_data_err("Is your data around?\n");
2007       return;
2008     } else if(U_FAILURE(status)) {
2009       log_err("Error opening collator\n");
2010       return;
2011     }
2012     rlen = u_unescape(expectedRules[i], rlz, 2048);
2013     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2014 
2015     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2016     /* as a hard error test, but only in information mode */
2017     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2018 
2019     ucol_close(credundant);
2020     ucol_close(cresulting);
2021 
2022     log_verbose("testing using data\n");
2023 
2024     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2025   }
2026 }
2027 
TestCase(void)2028 static void TestCase(void)
2029 {
2030     const static UChar gRules[MAX_TOKEN_LEN] =
2031     /*" & 0 < 1,\u2461<a,A"*/
2032     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2033 
2034     const static UChar testCase[][MAX_TOKEN_LEN] =
2035     {
2036         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2037         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2038         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2039         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2040     };
2041 
2042     const static UCollationResult caseTestResults[][9] =
2043     {
2044         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2045         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2046         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2047         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2048     };
2049 
2050     const static UColAttributeValue caseTestAttributes[][2] =
2051     {
2052         { UCOL_LOWER_FIRST, UCOL_OFF},
2053         { UCOL_UPPER_FIRST, UCOL_OFF},
2054         { UCOL_LOWER_FIRST, UCOL_ON},
2055         { UCOL_UPPER_FIRST, UCOL_ON}
2056     };
2057     int32_t i,j,k;
2058     UErrorCode status = U_ZERO_ERROR;
2059     UCollationElements *iter;
2060     UCollator  *myCollation;
2061     myCollation = ucol_open("en_US", &status);
2062 
2063     if(U_FAILURE(status)){
2064         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2065         return;
2066     }
2067     log_verbose("Testing different case settings\n");
2068     ucol_setStrength(myCollation, UCOL_TERTIARY);
2069 
2070     for(k = 0; k<4; k++) {
2071       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2072       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2073       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2074       for (i = 0; i < 3 ; i++) {
2075         for(j = i+1; j<4; j++) {
2076           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2077         }
2078       }
2079     }
2080     ucol_close(myCollation);
2081 
2082     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2083     if(U_FAILURE(status)){
2084         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2085         return;
2086     }
2087     log_verbose("Testing different case settings with custom rules\n");
2088     ucol_setStrength(myCollation, UCOL_TERTIARY);
2089 
2090     for(k = 0; k<4; k++) {
2091       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2092       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2093       for (i = 0; i < 3 ; i++) {
2094         for(j = i+1; j<4; j++) {
2095           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2096           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2097           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2098           backAndForth(iter);
2099           ucol_closeElements(iter);
2100           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2101           backAndForth(iter);
2102           ucol_closeElements(iter);
2103         }
2104       }
2105     }
2106     ucol_close(myCollation);
2107     {
2108       const static char *lowerFirst[] = {
2109         "h",
2110         "H",
2111         "ch",
2112         "Ch",
2113         "CH",
2114         "cha",
2115         "chA",
2116         "Cha",
2117         "ChA",
2118         "CHa",
2119         "CHA",
2120         "i",
2121         "I"
2122       };
2123 
2124       const static char *upperFirst[] = {
2125         "H",
2126         "h",
2127         "CH",
2128         "Ch",
2129         "ch",
2130         "CHA",
2131         "CHa",
2132         "ChA",
2133         "Cha",
2134         "chA",
2135         "cha",
2136         "I",
2137         "i"
2138       };
2139       log_verbose("mixed case test\n");
2140       log_verbose("lower first, case level off\n");
2141       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2142       log_verbose("upper first, case level off\n");
2143       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2144       log_verbose("lower first, case level on\n");
2145       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2146       log_verbose("upper first, case level on\n");
2147       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2148     }
2149 
2150 }
2151 
TestIncrementalNormalize(void)2152 static void TestIncrementalNormalize(void) {
2153 
2154     /*UChar baseA     =0x61;*/
2155     UChar baseA     =0x41;
2156 /*    UChar baseB     = 0x42;*/
2157     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
2158     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2159     /*
2160         0x316 is combining grave accent below, cc=220
2161         0x321 is combining palatalized hook below, cc=202
2162         0x300 is combining grave accent, cc=230
2163     */
2164 
2165     int          maxSLen   = 2000;
2166     /*int          maxSLen   = 64000;*/
2167     int          sLen;
2168     int          i;
2169 
2170     UCollator        *coll;
2171     UErrorCode       status = U_ZERO_ERROR;
2172     UCollationResult result;
2173 
2174     int32_t myQ = QUICK;
2175 
2176     if(QUICK < 0) {
2177       QUICK = 1;
2178     }
2179 
2180     {
2181         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2182         /*          most buffers along the way.*/
2183         UChar            *strA;
2184         UChar            *strB;
2185 
2186         strA = malloc((maxSLen+1) * sizeof(UChar));
2187         strB = malloc((maxSLen+1) * sizeof(UChar));
2188 
2189         coll = ucol_open("en_US", &status);
2190         if(status == U_FILE_ACCESS_ERROR) {
2191           log_data_err("Is your data around?\n");
2192           return;
2193         } else if(U_FAILURE(status)) {
2194           log_err("Error opening collator\n");
2195           return;
2196         }
2197         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2198 
2199         /*for (sLen = 257; sLen<maxSLen; sLen++) {*/
2200         /*for (sLen = 4; sLen<maxSLen; sLen++) {*/
2201         /*for (sLen = 1000; sLen<1001; sLen++) {*/
2202         for (sLen = 500; sLen<501; sLen++) {
2203         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2204             strA[0] = baseA;
2205             strB[0] = baseA;
2206             for (i=1; i<=sLen-1; i++) {
2207                 strA[i] = ccMix[i % 3];
2208                 strB[sLen-i] = ccMix[i % 3];
2209             }
2210             strA[sLen]   = 0;
2211             strB[sLen]   = 0;
2212 
2213             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2214             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2215             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2216             doTest(coll, strA, strB, UCOL_EQUAL);
2217         }
2218         free(strA);
2219         free(strB);
2220     }
2221 
2222     QUICK = myQ;
2223 
2224 
2225     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2226     /*         of the string.  Checks a couple of edge cases.*/
2227 
2228     {
2229         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2230         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2231         ucol_setStrength(coll, UCOL_TERTIARY);
2232         doTest(coll, strA, strB, UCOL_EQUAL);
2233     }
2234 
2235     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2236 
2237     {
2238       /* New UCA  3.1.1.
2239        * test below used a code point from Desseret, which sorts differently
2240        * than d800 dc00
2241        */
2242         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2243         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2244         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2245         ucol_setStrength(coll, UCOL_TERTIARY);
2246         doTest(coll, strA, strB, UCOL_GREATER);
2247     }
2248 
2249     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2250 
2251     {
2252         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2253         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2254         char  sortKeyA[50];
2255         char  sortKeyAz[50];
2256         char  sortKeyB[50];
2257         char  sortKeyBz[50];
2258         int   r;
2259 
2260         /* there used to be -3 here. Hmmmm.... */
2261         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2262         result = ucol_strcoll(coll, strA, 3, strB, 3);
2263         if (result != UCOL_GREATER) {
2264             log_err("ERROR 1 in test 4\n");
2265         }
2266         result = ucol_strcoll(coll, strA, -1, strB, -1);
2267         if (result != UCOL_EQUAL) {
2268             log_err("ERROR 2 in test 4\n");
2269         }
2270 
2271         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2272         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2273         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2274         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2275 
2276         r = strcmp(sortKeyA, sortKeyAz);
2277         if (r <= 0) {
2278             log_err("Error 3 in test 4\n");
2279         }
2280         r = strcmp(sortKeyA, sortKeyB);
2281         if (r <= 0) {
2282             log_err("Error 4 in test 4\n");
2283         }
2284         r = strcmp(sortKeyAz, sortKeyBz);
2285         if (r != 0) {
2286             log_err("Error 5 in test 4\n");
2287         }
2288 
2289         ucol_setStrength(coll, UCOL_IDENTICAL);
2290         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2291         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2292         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2293         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2294 
2295         r = strcmp(sortKeyA, sortKeyAz);
2296         if (r <= 0) {
2297             log_err("Error 6 in test 4\n");
2298         }
2299         r = strcmp(sortKeyA, sortKeyB);
2300         if (r <= 0) {
2301             log_err("Error 7 in test 4\n");
2302         }
2303         r = strcmp(sortKeyAz, sortKeyBz);
2304         if (r != 0) {
2305             log_err("Error 8 in test 4\n");
2306         }
2307         ucol_setStrength(coll, UCOL_TERTIARY);
2308     }
2309 
2310 
2311     /*  Test 5:  Null characters in non-normal source strings.*/
2312 
2313     {
2314         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2315         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2316         char  sortKeyA[50];
2317         char  sortKeyAz[50];
2318         char  sortKeyB[50];
2319         char  sortKeyBz[50];
2320         int   r;
2321 
2322         result = ucol_strcoll(coll, strA, 6, strB, 6);
2323         if (result != UCOL_GREATER) {
2324             log_err("ERROR 1 in test 5\n");
2325         }
2326         result = ucol_strcoll(coll, strA, -1, strB, -1);
2327         if (result != UCOL_EQUAL) {
2328             log_err("ERROR 2 in test 5\n");
2329         }
2330 
2331         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2332         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2333         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2334         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2335 
2336         r = strcmp(sortKeyA, sortKeyAz);
2337         if (r <= 0) {
2338             log_err("Error 3 in test 5\n");
2339         }
2340         r = strcmp(sortKeyA, sortKeyB);
2341         if (r <= 0) {
2342             log_err("Error 4 in test 5\n");
2343         }
2344         r = strcmp(sortKeyAz, sortKeyBz);
2345         if (r != 0) {
2346             log_err("Error 5 in test 5\n");
2347         }
2348 
2349         ucol_setStrength(coll, UCOL_IDENTICAL);
2350         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2351         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2352         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2353         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2354 
2355         r = strcmp(sortKeyA, sortKeyAz);
2356         if (r <= 0) {
2357             log_err("Error 6 in test 5\n");
2358         }
2359         r = strcmp(sortKeyA, sortKeyB);
2360         if (r <= 0) {
2361             log_err("Error 7 in test 5\n");
2362         }
2363         r = strcmp(sortKeyAz, sortKeyBz);
2364         if (r != 0) {
2365             log_err("Error 8 in test 5\n");
2366         }
2367         ucol_setStrength(coll, UCOL_TERTIARY);
2368     }
2369 
2370 
2371     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2372 
2373     {
2374         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2375         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2376 
2377         result = ucol_strcoll(coll, strA, 5, strB, 5);
2378         if (result != UCOL_LESS) {
2379             log_err("Error 1 in test 6\n");
2380         }
2381         result = ucol_strcoll(coll, strA, -1, strB, -1);
2382         if (result != UCOL_EQUAL) {
2383             log_err("Error 2 in test 6\n");
2384         }
2385     }
2386 
2387     ucol_close(coll);
2388 }
2389 
2390 
2391 
2392 #if 0
2393 static void TestGetCaseBit(void) {
2394   static const char *caseBitData[] = {
2395     "a", "A", "ch", "Ch", "CH",
2396       "\\uFF9E", "\\u0009"
2397   };
2398 
2399   static const uint8_t results[] = {
2400     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2401       UCOL_UPPER_CASE, UCOL_LOWER_CASE
2402   };
2403 
2404   uint32_t i, blen = 0;
2405   UChar b[256] = {0};
2406   UErrorCode status = U_ZERO_ERROR;
2407   UCollator *UCA = ucol_open("", &status);
2408   uint8_t res = 0;
2409 
2410   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2411     blen = u_unescape(caseBitData[i], b, 256);
2412     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2413     if(results[i] != res) {
2414       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2415     }
2416   }
2417 }
2418 #endif
2419 
TestHangulTailoring(void)2420 static void TestHangulTailoring(void) {
2421     static const char *koreanData[] = {
2422         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2423             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2424             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2425             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2426             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2427             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2428     };
2429 
2430     const char *rules =
2431         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2432         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2433         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2434         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2435         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2436         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2437 
2438 
2439   UErrorCode status = U_ZERO_ERROR;
2440   UChar rlz[2048] = { 0 };
2441   uint32_t rlen = u_unescape(rules, rlz, 2048);
2442 
2443   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2444   if(status == U_FILE_ACCESS_ERROR) {
2445     log_data_err("Is your data around?\n");
2446     return;
2447   } else if(U_FAILURE(status)) {
2448     log_err("Error opening collator\n");
2449     return;
2450   }
2451 
2452   log_verbose("Using start of korean rules\n");
2453 
2454   if(U_SUCCESS(status)) {
2455     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2456   } else {
2457     log_err("Unable to open collator with rules %s\n", rules);
2458   }
2459 
2460   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2461   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2462   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2463 
2464   ucol_close(coll);
2465 
2466   log_verbose("Using ko__LOTUS locale\n");
2467   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2468 }
2469 
TestCompressOverlap(void)2470 static void TestCompressOverlap(void) {
2471     UChar       secstr[150];
2472     UChar       tertstr[150];
2473     UErrorCode  status = U_ZERO_ERROR;
2474     UCollator  *coll;
2475     char        result[200];
2476     uint32_t    resultlen;
2477     int         count = 0;
2478     char       *tempptr;
2479 
2480     coll = ucol_open("", &status);
2481 
2482     if (U_FAILURE(status)) {
2483         log_err("Collator can't be created\n");
2484         return;
2485     }
2486     while (count < 149) {
2487         secstr[count] = 0x0020; /* [06, 05, 05] */
2488         tertstr[count] = 0x0020;
2489         count ++;
2490     }
2491 
2492     /* top down compression ----------------------------------- */
2493     secstr[count] = 0x0332; /* [, 87, 05] */
2494     tertstr[count] = 0x3000; /* [06, 05, 07] */
2495 
2496     /* no compression secstr should have 150 secondary bytes, tertstr should
2497     have 150 tertiary bytes.
2498     with correct overlapping compression, secstr should have 4 secondary
2499     bytes, tertstr should have > 2 tertiary bytes */
2500     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2501     tempptr = uprv_strchr(result, 1) + 1;
2502     while (*(tempptr + 1) != 1) {
2503         /* the last secondary collation element is not checked since it is not
2504         part of the compression */
2505         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2506             log_err("Secondary compression overlapped\n");
2507         }
2508         tempptr ++;
2509     }
2510 
2511     /* tertiary top/bottom/common for en_US is similar to the secondary
2512     top/bottom/common */
2513     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2514     tempptr = uprv_strrchr(result, 1) + 1;
2515     while (*(tempptr + 1) != 0) {
2516         /* the last secondary collation element is not checked since it is not
2517         part of the compression */
2518         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2519             log_err("Tertiary compression overlapped\n");
2520         }
2521         tempptr ++;
2522     }
2523 
2524     /* bottom up compression ------------------------------------- */
2525     secstr[count] = 0;
2526     tertstr[count] = 0;
2527     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2528     tempptr = uprv_strchr(result, 1) + 1;
2529     while (*(tempptr + 1) != 1) {
2530         /* the last secondary collation element is not checked since it is not
2531         part of the compression */
2532         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2533             log_err("Secondary compression overlapped\n");
2534         }
2535         tempptr ++;
2536     }
2537 
2538     /* tertiary top/bottom/common for en_US is similar to the secondary
2539     top/bottom/common */
2540     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2541     tempptr = uprv_strrchr(result, 1) + 1;
2542     while (*(tempptr + 1) != 0) {
2543         /* the last secondary collation element is not checked since it is not
2544         part of the compression */
2545         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2546             log_err("Tertiary compression overlapped\n");
2547         }
2548         tempptr ++;
2549     }
2550 
2551     ucol_close(coll);
2552 }
2553 
TestCyrillicTailoring(void)2554 static void TestCyrillicTailoring(void) {
2555   static const char *test[] = {
2556     "\\u0410b",
2557       "\\u0410\\u0306a",
2558       "\\u04d0A"
2559   };
2560 
2561     /* Russian overrides contractions, so this test is not valid anymore */
2562     /*genericLocaleStarter("ru", test, 3);*/
2563 
2564     genericLocaleStarter("root", test, 3);
2565     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2566     genericRulesStarter("&Z < \\u0410", test, 3);
2567     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2568     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2569     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2570     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2571 }
2572 
TestSuppressContractions(void)2573 static void TestSuppressContractions(void) {
2574 
2575   static const char *testNoCont2[] = {
2576       "\\u0410\\u0302a",
2577       "\\u0410\\u0306b",
2578       "\\u0410c"
2579   };
2580   static const char *testNoCont[] = {
2581       "a\\u0410",
2582       "A\\u0410\\u0306",
2583       "\\uFF21\\u0410\\u0302"
2584   };
2585 
2586   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2587   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2588 }
2589 
TestContraction(void)2590 static void TestContraction(void) {
2591     const static char *testrules[] = {
2592         "&A = AB / B",
2593         "&A = A\\u0306/\\u0306",
2594         "&c = ch / h"
2595     };
2596     const static UChar testdata[][2] = {
2597         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2598         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2599         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2600     };
2601     const static UChar testdata2[][2] = {
2602         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2603         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2604         {0x0063 /* 'c' */, 0x006C /* 'l' */}
2605     };
2606     const static char *testrules3[] = {
2607         "&z < xyz &xyzw << B",
2608         "&z < xyz &xyz << B / w",
2609         "&z < ch &achm << B",
2610         "&z < ch &a << B / chm",
2611         "&\\ud800\\udc00w << B",
2612         "&\\ud800\\udc00 << B / w",
2613         "&a\\ud800\\udc00m << B",
2614         "&a << B / \\ud800\\udc00m",
2615     };
2616 
2617     UErrorCode  status   = U_ZERO_ERROR;
2618     UCollator  *coll;
2619     UChar       rule[256] = {0};
2620     uint32_t    rlen     = 0;
2621     int         i;
2622 
2623     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2624         UCollationElements *iter1;
2625         int j = 0;
2626         log_verbose("Rule %s for testing\n", testrules[i]);
2627         rlen = u_unescape(testrules[i], rule, 32);
2628         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2629         if (U_FAILURE(status)) {
2630             log_err("Collator creation failed %s\n", testrules[i]);
2631             return;
2632         }
2633         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2634         if (U_FAILURE(status)) {
2635             log_err("Collation iterator creation failed\n");
2636             return;
2637         }
2638         while (j < 2) {
2639             UCollationElements *iter2 = ucol_openElements(coll,
2640                                                          &(testdata[i][j]),
2641                                                          1, &status);
2642             uint32_t ce;
2643             if (U_FAILURE(status)) {
2644                 log_err("Collation iterator creation failed\n");
2645                 return;
2646             }
2647             ce = ucol_next(iter2, &status);
2648             while (ce != UCOL_NULLORDER) {
2649                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2650                     log_err("Collation elements in contraction split does not match\n");
2651                     return;
2652                 }
2653                 ce = ucol_next(iter2, &status);
2654             }
2655             j ++;
2656             ucol_closeElements(iter2);
2657         }
2658         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2659             log_err("Collation elements not exhausted\n");
2660             return;
2661         }
2662         ucol_closeElements(iter1);
2663         ucol_close(coll);
2664     }
2665 
2666     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2667     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2668     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2669         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2670                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2671                 testdata2[1][1]);
2672         return;
2673     }
2674     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2675         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2676                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2677                 testdata2[2][1]);
2678         return;
2679     }
2680     ucol_close(coll);
2681 
2682     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2683         UCollator          *coll1,
2684                            *coll2;
2685         UCollationElements *iter1,
2686                            *iter2;
2687         UChar               ch = 0x0042 /* 'B' */;
2688         uint32_t            ce;
2689         rlen = u_unescape(testrules3[i], rule, 32);
2690         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2691         rlen = u_unescape(testrules3[i + 1], rule, 32);
2692         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2693         if (U_FAILURE(status)) {
2694             log_err("Collator creation failed %s\n", testrules[i]);
2695             return;
2696         }
2697         iter1 = ucol_openElements(coll1, &ch, 1, &status);
2698         iter2 = ucol_openElements(coll2, &ch, 1, &status);
2699         if (U_FAILURE(status)) {
2700             log_err("Collation iterator creation failed\n");
2701             return;
2702         }
2703         ce = ucol_next(iter1, &status);
2704         if (U_FAILURE(status)) {
2705             log_err("Retrieving ces failed\n");
2706             return;
2707         }
2708         while (ce != UCOL_NULLORDER) {
2709             if (ce != (uint32_t)ucol_next(iter2, &status)) {
2710                 log_err("CEs does not match\n");
2711                 return;
2712             }
2713             ce = ucol_next(iter1, &status);
2714             if (U_FAILURE(status)) {
2715                 log_err("Retrieving ces failed\n");
2716                 return;
2717             }
2718         }
2719         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2720             log_err("CEs not exhausted\n");
2721             return;
2722         }
2723         ucol_closeElements(iter1);
2724         ucol_closeElements(iter2);
2725         ucol_close(coll1);
2726         ucol_close(coll2);
2727     }
2728 }
2729 
TestExpansion(void)2730 static void TestExpansion(void) {
2731     const static char *testrules[] = {
2732         "&J << K / B & K << M",
2733         "&J << K / B << M"
2734     };
2735     const static UChar testdata[][3] = {
2736         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2737         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2738         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2739         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2740         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2741         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2742     };
2743 
2744     UErrorCode  status   = U_ZERO_ERROR;
2745     UCollator  *coll;
2746     UChar       rule[256] = {0};
2747     uint32_t    rlen     = 0;
2748     int         i;
2749 
2750     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2751         int j = 0;
2752         log_verbose("Rule %s for testing\n", testrules[i]);
2753         rlen = u_unescape(testrules[i], rule, 32);
2754         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2755         if (U_FAILURE(status)) {
2756             log_err("Collator creation failed %s\n", testrules[i]);
2757             return;
2758         }
2759 
2760         for (j = 0; j < 5; j ++) {
2761             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2762         }
2763         ucol_close(coll);
2764     }
2765 }
2766 
2767 #if 0
2768 /* this test tests the current limitations of the engine */
2769 /* it always fail, so it is disabled by default */
2770 static void TestLimitations(void) {
2771   /* recursive expansions */
2772   {
2773     static const char *rule = "&a=b/c&d=c/e";
2774     static const char *tlimit01[] = {"add","b","adf"};
2775     static const char *tlimit02[] = {"aa","b","af"};
2776     log_verbose("recursive expansions\n");
2777     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2778     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2779   }
2780   /* contractions spanning expansions */
2781   {
2782     static const char *rule = "&a<<<c/e&g<<<eh";
2783     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2784     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2785     log_verbose("contractions spanning expansions\n");
2786     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2787     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2788   }
2789   /* normalization: nulls in contractions */
2790   {
2791     static const char *rule = "&a<<<\\u0000\\u0302";
2792     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2793     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2794     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2795     static const UColAttributeValue valOn[] = { UCOL_ON };
2796     static const UColAttributeValue valOff[] = { UCOL_OFF };
2797 
2798     log_verbose("NULL in contractions\n");
2799     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2800     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2801     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2802     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2803 
2804   }
2805   /* normalization: contractions spanning normalization */
2806   {
2807     static const char *rule = "&a<<<\\u0000\\u0302";
2808     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2809     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2810     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2811     static const UColAttributeValue valOn[] = { UCOL_ON };
2812     static const UColAttributeValue valOff[] = { UCOL_OFF };
2813 
2814     log_verbose("contractions spanning normalization\n");
2815     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2816     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2817     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2818     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2819 
2820   }
2821   /* variable top:  */
2822   {
2823     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2824     static const char *rule = "&\\u2010<x<[variable top]=z";
2825     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2826     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2827     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2828     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2829     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2830     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2831     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2832 
2833     log_verbose("variable top\n");
2834     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2835     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2836     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2837     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2838     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2839 
2840   }
2841   /* case level */
2842   {
2843     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2844     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2845     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2846     static const UColAttribute att[] = { UCOL_CASE_FIRST};
2847     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2848     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2849     log_verbose("case level\n");
2850     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2851     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2852     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2853     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2854   }
2855 
2856 }
2857 #endif
2858 
TestBocsuCoverage(void)2859 static void TestBocsuCoverage(void) {
2860   UErrorCode status = U_ZERO_ERROR;
2861   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2862   UChar       test[256] = {0};
2863   uint32_t    tlen     = u_unescape(testString, test, 32);
2864   uint8_t key[256]     = {0};
2865   uint32_t klen         = 0;
2866 
2867   UCollator *coll = ucol_open("", &status);
2868   if(U_SUCCESS(status)) {
2869   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2870 
2871   klen = ucol_getSortKey(coll, test, tlen, key, 256);
2872 
2873   ucol_close(coll);
2874   } else {
2875     log_data_err("Couldn't open UCA\n");
2876   }
2877 }
2878 
TestVariableTopSetting(void)2879 static void TestVariableTopSetting(void) {
2880   UErrorCode status = U_ZERO_ERROR;
2881   const UChar *current = NULL;
2882   uint32_t varTopOriginal = 0, varTop1, varTop2;
2883   UCollator *coll = ucol_open("", &status);
2884   if(U_SUCCESS(status)) {
2885 
2886   uint32_t strength = 0;
2887   uint16_t specs = 0;
2888   uint32_t chOffset = 0;
2889   uint32_t chLen = 0;
2890   uint32_t exOffset = 0;
2891   uint32_t exLen = 0;
2892   uint32_t oldChOffset = 0;
2893   uint32_t oldChLen = 0;
2894   uint32_t oldExOffset = 0;
2895   uint32_t oldExLen = 0;
2896   uint32_t prefixOffset = 0;
2897   uint32_t prefixLen = 0;
2898 
2899   UBool startOfRules = TRUE;
2900   UColTokenParser src;
2901   UColOptionSet opts;
2902 
2903   UChar *rulesCopy = NULL;
2904   uint32_t rulesLen;
2905 
2906   UCollationResult result;
2907 
2908   UChar first[256] = { 0 };
2909   UChar second[256] = { 0 };
2910   UParseError parseError;
2911   int32_t myQ = QUICK;
2912 
2913   src.opts = &opts;
2914 
2915   if(QUICK <= 0) {
2916     QUICK = 1;
2917   }
2918 
2919   /* this test will fail when normalization is turned on */
2920   /* therefore we always turn off exhaustive mode for it */
2921   { /* QUICK > 0*/
2922     log_verbose("Slide variable top over UCARules\n");
2923     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2924     rulesCopy = (UChar *)malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
2925     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
2926 
2927     if(U_SUCCESS(status) && rulesLen > 0) {
2928       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2929       src.current = src.source = rulesCopy;
2930       src.end = rulesCopy+rulesLen;
2931       src.extraCurrent = src.end;
2932       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
2933 
2934       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
2935         strength = src.parsedToken.strength;
2936         chOffset = src.parsedToken.charsOffset;
2937         chLen = src.parsedToken.charsLen;
2938         exOffset = src.parsedToken.extensionOffset;
2939         exLen = src.parsedToken.extensionLen;
2940         prefixOffset = src.parsedToken.prefixOffset;
2941         prefixLen = src.parsedToken.prefixLen;
2942         specs = src.parsedToken.flags;
2943 
2944         startOfRules = FALSE;
2945         {
2946           log_verbose("%04X %d ", *(rulesCopy+chOffset), chLen);
2947         }
2948         if(strength == UCOL_PRIMARY) {
2949           status = U_ZERO_ERROR;
2950           varTopOriginal = ucol_getVariableTop(coll, &status);
2951           varTop1 = ucol_setVariableTop(coll, rulesCopy+oldChOffset, oldChLen, &status);
2952           if(U_FAILURE(status)) {
2953             char buffer[256];
2954             char *buf = buffer;
2955             uint32_t i = 0, j;
2956             uint32_t CE = UCOL_NO_MORE_CES;
2957 
2958             /* before we start screaming, let's see if there is a problem with the rules */
2959             collIterate s;
2960             uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s);
2961 
2962             CE = ucol_getNextCE(coll, &s, &status);
2963 
2964             for(i = 0; i < oldChLen; i++) {
2965               j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i));
2966               buf += j;
2967             }
2968             if(status == U_PRIMARY_TOO_LONG_ERROR) {
2969               log_verbose("= Expected failure for %s =", buffer);
2970             } else {
2971               if(s.pos == s.endp) {
2972                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
2973                   oldChOffset, u_errorName(status), buffer);
2974               } else {
2975                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
2976                   buffer);
2977               }
2978             }
2979           }
2980           varTop2 = ucol_getVariableTop(coll, &status);
2981           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
2982             log_err("cannot retrieve set varTop value!\n");
2983             continue;
2984           }
2985 
2986           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
2987 
2988             u_strncpy(first, rulesCopy+oldChOffset, oldChLen);
2989             u_strncpy(first+oldChLen, rulesCopy+chOffset, chLen);
2990             u_strncpy(first+oldChLen+chLen, rulesCopy+oldChOffset, oldChLen);
2991             first[2*oldChLen+chLen] = 0;
2992 
2993             if(oldExLen == 0) {
2994               u_strncpy(second, rulesCopy+chOffset, chLen);
2995               second[chLen] = 0;
2996             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
2997               u_strncpy(second, rulesCopy+oldExOffset, oldExLen);
2998               u_strncpy(second+oldChLen, rulesCopy+chOffset, chLen);
2999               u_strncpy(second+oldChLen+chLen, rulesCopy+oldExOffset, oldExLen);
3000               second[2*oldExLen+chLen] = 0;
3001             }
3002             result = ucol_strcoll(coll, first, -1, second, -1);
3003             if(result == UCOL_EQUAL) {
3004               doTest(coll, first, second, UCOL_EQUAL);
3005             } else {
3006               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy+oldChOffset), *(rulesCopy+chOffset));
3007             }
3008           }
3009         }
3010         if(strength != UCOL_TOK_RESET) {
3011           oldChOffset = chOffset;
3012           oldChLen = chLen;
3013           oldExOffset = exOffset;
3014           oldExLen = exLen;
3015         }
3016       }
3017       status = U_ZERO_ERROR;
3018     }
3019     else {
3020       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3021       return;
3022     }
3023     if (U_FAILURE(status)) {
3024         log_err("Error parsing rules %s\n", u_errorName(status));
3025         return;
3026     }
3027     status = U_ZERO_ERROR;
3028   }
3029 
3030   QUICK = myQ;
3031 
3032   log_verbose("Testing setting variable top to contractions\n");
3033   {
3034     /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3035     /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3036     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3037     while(*conts != 0) {
3038       if(*(conts+2) == 0) {
3039         varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3040       } else {
3041         varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3042       }
3043       if(U_FAILURE(status)) {
3044         log_err("Couldn't set variable top to a contraction %04X %04X %04X\n",
3045           *conts, *(conts+1), *(conts+2));
3046         status = U_ZERO_ERROR;
3047       }
3048       conts+=3;
3049     }
3050 
3051     status = U_ZERO_ERROR;
3052 
3053     first[0] = 0x0040;
3054     first[1] = 0x0050;
3055     first[2] = 0x0000;
3056 
3057     ucol_setVariableTop(coll, first, -1, &status);
3058 
3059     if(U_SUCCESS(status)) {
3060       log_err("Invalid contraction succeded in setting variable top!\n");
3061     }
3062 
3063   }
3064 
3065   log_verbose("Test restoring variable top\n");
3066 
3067   status = U_ZERO_ERROR;
3068   ucol_restoreVariableTop(coll, varTopOriginal, &status);
3069   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3070     log_err("Couldn't restore old variable top\n");
3071   }
3072 
3073   log_verbose("Testing calling with error set\n");
3074 
3075   status = U_INTERNAL_PROGRAM_ERROR;
3076   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3077   varTop2 = ucol_getVariableTop(coll, &status);
3078   ucol_restoreVariableTop(coll, varTop2, &status);
3079   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3080   varTop2 = ucol_getVariableTop(NULL, &status);
3081   ucol_restoreVariableTop(NULL, varTop2, &status);
3082   if(status != U_INTERNAL_PROGRAM_ERROR) {
3083     log_err("Bad reaction to passed error!\n");
3084   }
3085   free(rulesCopy);
3086   ucol_close(coll);
3087   } else {
3088     log_data_err("Couldn't open UCA collator\n");
3089   }
3090 
3091 }
3092 
TestNonChars(void)3093 static void TestNonChars(void) {
3094   static const char *test[] = {
3095     "\\u0000",
3096     "\\uFFFE", "\\uFFFF",
3097       "\\U0001FFFE", "\\U0001FFFF",
3098       "\\U0002FFFE", "\\U0002FFFF",
3099       "\\U0003FFFE", "\\U0003FFFF",
3100       "\\U0004FFFE", "\\U0004FFFF",
3101       "\\U0005FFFE", "\\U0005FFFF",
3102       "\\U0006FFFE", "\\U0006FFFF",
3103       "\\U0007FFFE", "\\U0007FFFF",
3104       "\\U0008FFFE", "\\U0008FFFF",
3105       "\\U0009FFFE", "\\U0009FFFF",
3106       "\\U000AFFFE", "\\U000AFFFF",
3107       "\\U000BFFFE", "\\U000BFFFF",
3108       "\\U000CFFFE", "\\U000CFFFF",
3109       "\\U000DFFFE", "\\U000DFFFF",
3110       "\\U000EFFFE", "\\U000EFFFF",
3111       "\\U000FFFFE", "\\U000FFFFF",
3112       "\\U0010FFFE", "\\U0010FFFF"
3113   };
3114   UErrorCode status = U_ZERO_ERROR;
3115   UCollator *coll = ucol_open("en_US", &status);
3116 
3117   log_verbose("Test non characters\n");
3118 
3119   if(U_SUCCESS(status)) {
3120     genericOrderingTestWithResult(coll, test, 35, UCOL_EQUAL);
3121   } else {
3122     log_err("Unable to open collator\n");
3123   }
3124 
3125   ucol_close(coll);
3126 }
3127 
TestExtremeCompression(void)3128 static void TestExtremeCompression(void) {
3129   static char *test[4];
3130   int32_t j = 0, i = 0;
3131 
3132   for(i = 0; i<4; i++) {
3133     test[i] = (char *)malloc(2048*sizeof(char));
3134   }
3135 
3136   for(j = 20; j < 500; j++) {
3137     for(i = 0; i<4; i++) {
3138       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3139       test[i][j-1] = (char)('a'+i);
3140       test[i][j] = 0;
3141     }
3142     genericLocaleStarter("en_US", (const char **)test, 4);
3143   }
3144 
3145 
3146   for(i = 0; i<4; i++) {
3147     free(test[i]);
3148   }
3149 }
3150 
3151 #if 0
3152 static void TestExtremeCompression(void) {
3153   static char *test[4];
3154   int32_t j = 0, i = 0;
3155   UErrorCode status = U_ZERO_ERROR;
3156   UCollator *coll = ucol_open("en_US", status);
3157   for(i = 0; i<4; i++) {
3158     test[i] = (char *)malloc(2048*sizeof(char));
3159   }
3160   for(j = 10; j < 2048; j++) {
3161     for(i = 0; i<4; i++) {
3162       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3163       test[i][j-1] = (char)('a'+i);
3164       test[i][j] = 0;
3165     }
3166   }
3167   genericLocaleStarter("en_US", (const char **)test, 4);
3168 
3169   for(j = 10; j < 2048; j++) {
3170     for(i = 0; i<1; i++) {
3171       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3172       test[i][j] = 0;
3173     }
3174   }
3175   for(i = 0; i<4; i++) {
3176     free(test[i]);
3177   }
3178 }
3179 #endif
3180 
TestSurrogates(void)3181 static void TestSurrogates(void) {
3182   static const char *test[] = {
3183     "z","\\ud900\\udc25",  "\\ud805\\udc50",
3184        "\\ud800\\udc00y",  "\\ud800\\udc00r",
3185        "\\ud800\\udc00f",  "\\ud800\\udc00",
3186        "\\ud800\\udc00c", "\\ud800\\udc00b",
3187        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3188        "\\ud800\\udc00a",
3189        "c", "b"
3190   };
3191 
3192   static const char *rule =
3193     "&z < \\ud900\\udc25   < \\ud805\\udc50"
3194        "< \\ud800\\udc00y  < \\ud800\\udc00r"
3195        "< \\ud800\\udc00f  << \\ud800\\udc00"
3196        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3197        "< \\ud800\\udc00a  < c < b" ;
3198 
3199   genericRulesStarter(rule, test, 14);
3200 }
3201 
3202 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)3203 static void TestPrefix(void) {
3204   uint32_t i;
3205 
3206   static const struct {
3207     const char *rules;
3208     const char *data[50];
3209     const uint32_t len;
3210   } tests[] = {
3211     { "&z <<< z|a",
3212       {"zz", "za"}, 2 },
3213 
3214     { "&z <<< z|   a",
3215       {"zz", "za"}, 2 },
3216     { "[strength I]"
3217       "&a=\\ud900\\udc25"
3218       "&z<<<\\ud900\\udc25|a",
3219       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3220   };
3221 
3222 
3223   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3224     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3225   }
3226 }
3227 
3228 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3229 /* JIS X 4061 collation order implementation                                   */
TestNewJapanese(void)3230 static void TestNewJapanese(void) {
3231 
3232   static const char * const test1[] = {
3233       "\\u30b7\\u30e3\\u30fc\\u30ec",
3234       "\\u30b7\\u30e3\\u30a4",
3235       "\\u30b7\\u30e4\\u30a3",
3236       "\\u30b7\\u30e3\\u30ec",
3237       "\\u3061\\u3087\\u3053",
3238       "\\u3061\\u3088\\u3053",
3239       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3240       "\\u3066\\u30fc\\u305f",
3241       "\\u30c6\\u30fc\\u30bf",
3242       "\\u30c6\\u30a7\\u30bf",
3243       "\\u3066\\u3048\\u305f",
3244       "\\u3067\\u30fc\\u305f",
3245       "\\u30c7\\u30fc\\u30bf",
3246       "\\u30c7\\u30a7\\u30bf",
3247       "\\u3067\\u3048\\u305f",
3248       "\\u3066\\u30fc\\u305f\\u30fc",
3249       "\\u30c6\\u30fc\\u30bf\\u30a1",
3250       "\\u30c6\\u30a7\\u30bf\\u30fc",
3251       "\\u3066\\u3047\\u305f\\u3041",
3252       "\\u3066\\u3048\\u305f\\u30fc",
3253       "\\u3067\\u30fc\\u305f\\u30fc",
3254       "\\u30c7\\u30fc\\u30bf\\u30a1",
3255       "\\u3067\\u30a7\\u305f\\u30a1",
3256       "\\u30c7\\u3047\\u30bf\\u3041",
3257       "\\u30c7\\u30a8\\u30bf\\u30a2",
3258       "\\u3072\\u3086",
3259       "\\u3073\\u3085\\u3042",
3260       "\\u3074\\u3085\\u3042",
3261       "\\u3073\\u3085\\u3042\\u30fc",
3262       "\\u30d3\\u30e5\\u30a2\\u30fc",
3263       "\\u3074\\u3085\\u3042\\u30fc",
3264       "\\u30d4\\u30e5\\u30a2\\u30fc",
3265       "\\u30d2\\u30e5\\u30a6",
3266       "\\u30d2\\u30e6\\u30a6",
3267       "\\u30d4\\u30e5\\u30a6\\u30a2",
3268       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3269       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3270       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3271       "\\u3072\\u3085\\u3093",
3272       "\\u3074\\u3085\\u3093",
3273       "\\u3075\\u30fc\\u308a",
3274       "\\u30d5\\u30fc\\u30ea",
3275       "\\u3075\\u3045\\u308a",
3276       "\\u3075\\u30a5\\u308a",
3277       "\\u3075\\u30a5\\u30ea",
3278       "\\u30d5\\u30a6\\u30ea",
3279       "\\u3076\\u30fc\\u308a",
3280       "\\u30d6\\u30fc\\u30ea",
3281       "\\u3076\\u3045\\u308a",
3282       "\\u30d6\\u30a5\\u308a",
3283       "\\u3077\\u3046\\u308a",
3284       "\\u30d7\\u30a6\\u30ea",
3285       "\\u3075\\u30fc\\u308a\\u30fc",
3286       "\\u30d5\\u30a5\\u30ea\\u30fc",
3287       "\\u3075\\u30a5\\u308a\\u30a3",
3288       "\\u30d5\\u3045\\u308a\\u3043",
3289       "\\u30d5\\u30a6\\u30ea\\u30fc",
3290       "\\u3075\\u3046\\u308a\\u3043",
3291       "\\u30d6\\u30a6\\u30ea\\u30a4",
3292       "\\u3077\\u30fc\\u308a\\u30fc",
3293       "\\u3077\\u30a5\\u308a\\u30a4",
3294       "\\u3077\\u3046\\u308a\\u30fc",
3295       "\\u30d7\\u30a6\\u30ea\\u30a4",
3296       "\\u30d5\\u30fd",
3297       "\\u3075\\u309e",
3298       "\\u3076\\u309d",
3299       "\\u3076\\u3075",
3300       "\\u3076\\u30d5",
3301       "\\u30d6\\u3075",
3302       "\\u30d6\\u30d5",
3303       "\\u3076\\u309e",
3304       "\\u3076\\u3077",
3305       "\\u30d6\\u3077",
3306       "\\u3077\\u309d",
3307       "\\u30d7\\u30fd",
3308       "\\u3077\\u3075",
3309 };
3310 
3311   static const char *test2[] = {
3312     "\\u306f\\u309d", /* H\\u309d */
3313     "\\u30cf\\u30fd", /* K\\u30fd */
3314     "\\u306f\\u306f", /* HH */
3315     "\\u306f\\u30cf", /* HK */
3316     "\\u30cf\\u30cf", /* KK */
3317     "\\u306f\\u309e", /* H\\u309e */
3318     "\\u30cf\\u30fe", /* K\\u30fe */
3319     "\\u306f\\u3070", /* HH\\u309b */
3320     "\\u30cf\\u30d0", /* KK\\u309b */
3321     "\\u306f\\u3071", /* HH\\u309c */
3322     "\\u30cf\\u3071", /* KH\\u309c */
3323     "\\u30cf\\u30d1", /* KK\\u309c */
3324     "\\u3070\\u309d", /* H\\u309b\\u309d */
3325     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3326     "\\u3070\\u306f", /* H\\u309bH */
3327     "\\u30d0\\u30cf", /* K\\u309bK */
3328     "\\u3070\\u309e", /* H\\u309b\\u309e */
3329     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3330     "\\u3070\\u3070", /* H\\u309bH\\u309b */
3331     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3332     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3333     "\\u3070\\u3071", /* H\\u309bH\\u309c */
3334     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3335     "\\u3071\\u309d", /* H\\u309c\\u309d */
3336     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3337     "\\u3071\\u306f", /* H\\u309cH */
3338     "\\u30d1\\u30cf", /* K\\u309cK */
3339     "\\u3071\\u3070", /* H\\u309cH\\u309b */
3340     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3341     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3342     "\\u3071\\u3071", /* H\\u309cH\\u309c */
3343     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3344   };
3345   /*
3346   static const char *test3[] = {
3347     "\\u221er\\u221e",
3348     "\\u221eR#",
3349     "\\u221et\\u221e",
3350     "#r\\u221e",
3351     "#R#",
3352     "#t%",
3353     "#T%",
3354     "8t\\u221e",
3355     "8T\\u221e",
3356     "8t#",
3357     "8T#",
3358     "8t%",
3359     "8T%",
3360     "8t8",
3361     "8T8",
3362     "\\u03c9r\\u221e",
3363     "\\u03a9R%",
3364     "rr\\u221e",
3365     "rR\\u221e",
3366     "Rr\\u221e",
3367     "RR\\u221e",
3368     "RT%",
3369     "rt8",
3370     "tr\\u221e",
3371     "tr8",
3372     "TR8",
3373     "tt8",
3374     "\\u30b7\\u30e3\\u30fc\\u30ec",
3375   };
3376   */
3377   static const UColAttribute att[] = { UCOL_STRENGTH };
3378   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3379 
3380   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3381   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3382 
3383   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3384   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3385   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3386   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3387   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3388 }
3389 
TestStrCollIdenticalPrefix(void)3390 static void TestStrCollIdenticalPrefix(void) {
3391   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3392   const char* test[] = {
3393     "ab\\ud9b0\\udc70",
3394     "ab\\ud9b0\\udc71"
3395   };
3396   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3397 }
3398 /* Contractions should have all their canonically equivalent */
3399 /* strings included */
TestContractionClosure(void)3400 static void TestContractionClosure(void) {
3401   static const struct {
3402     const char *rules;
3403     const char *data[10];
3404     const uint32_t len;
3405   } tests[] = {
3406     {   "&b=\\u00e4\\u00e4",
3407       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3408     {   "&b=\\u00C5",
3409       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3410   };
3411   uint32_t i;
3412 
3413 
3414   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3415     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3416   }
3417 }
3418 
3419 /* This tests also fails*/
TestBeforePrefixFailure(void)3420 static void TestBeforePrefixFailure(void) {
3421   static const struct {
3422     const char *rules;
3423     const char *data[10];
3424     const uint32_t len;
3425   } tests[] = {
3426     { "&g <<< a"
3427       "&[before 3]\\uff41 <<< x",
3428       {"x", "\\uff41"}, 2 },
3429     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3430         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3431         "&[before 3]\\u30a7<<<\\u30a9",
3432       {"\\u30a9", "\\u30a7"}, 2 },
3433     {   "&[before 3]\\u30a7<<<\\u30a9"
3434         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3435         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3436       {"\\u30a9", "\\u30a7"}, 2 },
3437   };
3438   uint32_t i;
3439 
3440 
3441   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3442     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3443   }
3444 
3445 #if 0
3446   const char* rule1 =
3447         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3448         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3449         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3450   const char* rule2 =
3451         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3452         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3453         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3454   const char* test[] = {
3455       "\\u30c6\\u30fc\\u30bf",
3456       "\\u30c6\\u30a7\\u30bf",
3457   };
3458   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3459   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3460 /* this piece of code should be in some sort of verbose mode     */
3461 /* it gets the collation elements for elements and prints them   */
3462 /* This is useful when trying to see whether the problem is      */
3463   {
3464     UErrorCode status = U_ZERO_ERROR;
3465     uint32_t i = 0;
3466     UCollationElements *it = NULL;
3467     uint32_t CE;
3468     UChar string[256];
3469     uint32_t uStringLen;
3470     UCollator *coll = NULL;
3471 
3472     uStringLen = u_unescape(rule1, string, 256);
3473 
3474     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3475 
3476     /*coll = ucol_open("ja_JP_JIS", &status);*/
3477     it = ucol_openElements(coll, string, 0, &status);
3478 
3479     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3480       log_verbose("%s\n", test[i]);
3481       uStringLen = u_unescape(test[i], string, 256);
3482       ucol_setText(it, string, uStringLen, &status);
3483 
3484       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3485         log_verbose("%08X\n", CE);
3486       }
3487       log_verbose("\n");
3488 
3489     }
3490 
3491     ucol_closeElements(it);
3492     ucol_close(coll);
3493   }
3494 #endif
3495 }
3496 
TestPrefixCompose(void)3497 static void TestPrefixCompose(void) {
3498   const char* rule1 =
3499         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3500   /*
3501   const char* test[] = {
3502       "\\u30c6\\u30fc\\u30bf",
3503       "\\u30c6\\u30a7\\u30bf",
3504   };
3505   */
3506   {
3507     UErrorCode status = U_ZERO_ERROR;
3508     /*uint32_t i = 0;*/
3509     /*UCollationElements *it = NULL;*/
3510 /*    uint32_t CE;*/
3511     UChar string[256];
3512     uint32_t uStringLen;
3513     UCollator *coll = NULL;
3514 
3515     uStringLen = u_unescape(rule1, string, 256);
3516 
3517     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3518     ucol_close(coll);
3519   }
3520 
3521 
3522 }
3523 
3524 /*
3525 [last variable] last variable value
3526 [last primary ignorable] largest CE for primary ignorable
3527 [last secondary ignorable] largest CE for secondary ignorable
3528 [last tertiary ignorable] largest CE for tertiary ignorable
3529 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3530 */
3531 
TestRuleOptions(void)3532 static void TestRuleOptions(void) {
3533   /* values here are hardcoded and are correct for the current UCA
3534    * when the UCA changes, one might be forced to change these
3535    * values. (\\u02d0, \\U00010FFFC etc...)
3536    */
3537   static const struct {
3538     const char *rules;
3539     const char *data[10];
3540     const uint32_t len;
3541   } tests[] = {
3542     /* - all befores here amount to zero */
3543     { "&[before 3][first tertiary ignorable]<<<a",
3544         { "\\u0000", "a"}, 2
3545     }, /* you cannot go before first tertiary ignorable */
3546 
3547     { "&[before 3][last tertiary ignorable]<<<a",
3548         { "\\u0000", "a"}, 2
3549     }, /* you cannot go before last tertiary ignorable */
3550 
3551     { "&[before 3][first secondary ignorable]<<<a",
3552         { "\\u0000", "a"}, 2
3553     }, /* you cannot go before first secondary ignorable */
3554 
3555     { "&[before 3][last secondary ignorable]<<<a",
3556         { "\\u0000", "a"}, 2
3557     }, /* you cannot go before first secondary ignorable */
3558 
3559     /* 'normal' befores */
3560 
3561     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3562         {  "c", "b", "\\u0332", "a" }, 4
3563     },
3564 
3565     /* we don't have a code point that corresponds to
3566      * the last primary ignorable
3567      */
3568     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3569         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3570     },
3571 
3572     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3573         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3574     },
3575 
3576     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3577         {  "c", "b", "\\uD834\\uDF71", "a", "\\u02d0" }, 5
3578     },
3579 
3580     { "&[first regular]<a"
3581       "&[before 1][first regular]<b",
3582       { "b", "\\u02d0", "a", "\\u02d1"}, 4
3583     },
3584 
3585     { "&[before 1][last regular]<b"
3586       "&[last regular]<a",
3587         { "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4
3588     },
3589 
3590     { "&[before 1][first implicit]<b"
3591       "&[first implicit]<a",
3592         { "b", "\\u4e00", "a", "\\u4e01"}, 4
3593     },
3594 
3595     { "&[before 1][last implicit]<b"
3596       "&[last implicit]<a",
3597         { "b", "\\U0010FFFD", "a" }, 3
3598     },
3599 
3600     { "&[last variable]<z"
3601       "&[last primary ignorable]<x"
3602       "&[last secondary ignorable]<<y"
3603       "&[last tertiary ignorable]<<<w"
3604       "&[top]<u",
3605       {"\\ufffb",  "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
3606     }
3607 
3608   };
3609   uint32_t i;
3610 
3611 
3612   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3613     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3614   }
3615 }
3616 
3617 
TestOptimize(void)3618 static void TestOptimize(void) {
3619   /* this is not really a test - just trying out
3620    * whether copying of UCA contents will fail
3621    * Cannot really test, since the functionality
3622    * remains the same.
3623    */
3624   static const struct {
3625     const char *rules;
3626     const char *data[10];
3627     const uint32_t len;
3628   } tests[] = {
3629     /* - all befores here amount to zero */
3630     { "[optimize [\\uAC00-\\uD7FF]]",
3631     { "a", "b"}, 2}
3632   };
3633   uint32_t i;
3634 
3635   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3636     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3637   }
3638 }
3639 
3640 /*
3641 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3642 weiv    ucol_strcollIter?
3643 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3644 weiv    these are the input strings?
3645 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3646 weiv    will check - could be a problem with utf-8 iterator
3647 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3648 weiv    hmmm
3649 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3650 weiv    that doesn't sound right
3651 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3652 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3653 cycheng@ca.ibm.c... yes
3654 weiv    and then do the comparison
3655 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3656 weiv    utf-16 strings look like a little endian ones in the example you sent me
3657 weiv    It could be a bug - let me try to test it out
3658 cycheng@ca.ibm.c... ok
3659 cycheng@ca.ibm.c... we can wait till the conf. call
3660 cycheng@ca.ibm.c... next weke
3661 weiv    that would be great
3662 weiv    hmmm
3663 weiv    I might be wrong
3664 weiv    let me play with it some more
3665 cycheng@ca.ibm.c... ok
3666 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3667 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3668 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3669 weiv    ok
3670 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3671 weiv    thanks
3672 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3673 */
3674 #if 0
3675 static void Alexis(void) {
3676   UErrorCode status = U_ZERO_ERROR;
3677   UCollator *coll = ucol_open("", &status);
3678 
3679 
3680   const char utf16be[2][4] = {
3681     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3682     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3683   };
3684 
3685   const char utf8[2][4] = {
3686     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3687     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3688   };
3689 
3690   UCharIterator iterU161, iterU162;
3691   UCharIterator iterU81, iterU82;
3692 
3693   UCollationResult resU16, resU8;
3694 
3695   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3696   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3697 
3698   uiter_setUTF8(&iterU81, utf8[0], 4);
3699   uiter_setUTF8(&iterU82, utf8[1], 4);
3700 
3701   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3702 
3703   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3704   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3705 
3706 
3707   if(resU16 != resU8) {
3708     log_err("different results\n");
3709   }
3710 
3711   ucol_close(coll);
3712 }
3713 #endif
3714 
3715 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)3716 static void Alexis2(void) {
3717   UErrorCode status = U_ZERO_ERROR;
3718   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3719   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3720   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3721   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3722 
3723   UConverter *conv = NULL;
3724 
3725   UCharIterator U16BEItS, U16BEItT;
3726   UCharIterator U8ItS, U8ItT;
3727 
3728   UCollationResult resU16, resU16BE, resU8;
3729 
3730   static const char* const pairs[][2] = {
3731     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3732     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3733     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3734     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3735     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3736     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3737     { "\\u0020", "\\u0020\\u0000"}
3738 /*
3739 5F20 (my result here)
3740 5F204E008E3F
3741 5F20 (your result here)
3742 */
3743   };
3744 
3745   int32_t i = 0;
3746 
3747   UCollator *coll = ucol_open("", &status);
3748   if(status == U_FILE_ACCESS_ERROR) {
3749     log_data_err("Is your data around?\n");
3750     return;
3751   } else if(U_FAILURE(status)) {
3752     log_err("Error opening collator\n");
3753     return;
3754   }
3755   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3756   conv = ucnv_open("UTF16BE", &status);
3757   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3758     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3759     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3760 
3761     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3762 
3763     log_verbose("Result of strcoll is %i\n", resU16);
3764 
3765     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3766     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3767 
3768     /* use the original sizes, as the result from converter is in bytes */
3769     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3770     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3771 
3772     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3773 
3774     log_verbose("Result of U16BE is %i\n", resU16BE);
3775 
3776     if(resU16 != resU16BE) {
3777       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3778     }
3779 
3780     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3781     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3782 
3783     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3784     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3785 
3786     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3787 
3788     if(resU16 != resU8) {
3789       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3790     }
3791 
3792   }
3793 
3794   ucol_close(coll);
3795   ucnv_close(conv);
3796 }
3797 
TestHebrewUCA(void)3798 static void TestHebrewUCA(void) {
3799   UErrorCode status = U_ZERO_ERROR;
3800   static const char *first[] = {
3801     "d790d6b8d79cd795d6bcd7a9",
3802     "d790d79cd79ed7a7d799d799d7a1",
3803     "d790d6b4d79ed795d6bcd7a9",
3804   };
3805 
3806   char utf8String[3][256];
3807   UChar utf16String[3][256];
3808 
3809   int32_t i = 0, j = 0;
3810   int32_t sizeUTF8[3];
3811   int32_t sizeUTF16[3];
3812 
3813   UCollator *coll = ucol_open("", &status);
3814   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3815 
3816   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3817     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3818     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3819     log_verbose("%i: ");
3820     for(j = 0; j < sizeUTF16[i]; j++) {
3821       /*log_verbose("\\u%04X", utf16String[i][j]);*/
3822       log_verbose("%04X", utf16String[i][j]);
3823     }
3824     log_verbose("\n");
3825   }
3826   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3827     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3828       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3829     }
3830   }
3831 
3832   ucol_close(coll);
3833 
3834 }
3835 
TestPartialSortKeyTermination(void)3836 static void TestPartialSortKeyTermination(void) {
3837   static const char* cases[] = {
3838     "\\u1234\\u1234\\udc00",
3839     "\\udc00\\ud800\\ud800"
3840   };
3841 
3842   int32_t i = sizeof(UCollator);
3843 
3844   UErrorCode status = U_ZERO_ERROR;
3845 
3846   UCollator *coll = ucol_open("", &status);
3847 
3848   UCharIterator iter;
3849 
3850   UChar currCase[256];
3851   int32_t length = 0;
3852   int32_t pKeyLen = 0;
3853 
3854   uint8_t key[256];
3855 
3856   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3857     uint32_t state[2] = {0, 0};
3858     length = u_unescape(cases[i], currCase, 256);
3859     uiter_setString(&iter, currCase, length);
3860     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3861 
3862     log_verbose("Done\n");
3863 
3864   }
3865   ucol_close(coll);
3866 }
3867 
TestSettings(void)3868 static void TestSettings(void) {
3869   static const char* cases[] = {
3870     "apple",
3871       "Apple"
3872   };
3873 
3874   static const char* locales[] = {
3875     "",
3876       "en"
3877   };
3878 
3879   UErrorCode status = U_ZERO_ERROR;
3880 
3881   int32_t i = 0, j = 0;
3882 
3883   UChar source[256], target[256];
3884   int32_t sLen = 0, tLen = 0;
3885 
3886   UCollator *collateObject = NULL;
3887   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
3888     collateObject = ucol_open(locales[i], &status);
3889     ucol_setStrength(collateObject, UCOL_PRIMARY);
3890     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
3891     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
3892       sLen = u_unescape(cases[j-1], source, 256);
3893       source[sLen] = 0;
3894       tLen = u_unescape(cases[j], target, 256);
3895       source[tLen] = 0;
3896       doTest(collateObject, source, target, UCOL_EQUAL);
3897     }
3898     ucol_close(collateObject);
3899   }
3900 }
3901 
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)3902 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
3903   UErrorCode status = U_ZERO_ERROR;
3904   int32_t errorNo = 0;
3905   /*const UChar *sourceRules = NULL;*/
3906   /*int32_t sourceRulesLen = 0;*/
3907   UColAttributeValue french = UCOL_OFF;
3908   int32_t cloneSize = 0;
3909 
3910   if(!ucol_equals(source, target)) {
3911     log_err("Same collators, different address not equal\n");
3912     errorNo++;
3913   }
3914   ucol_close(target);
3915   if(uprv_strcmp(ucol_getLocale(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocale(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
3916     /* currently, safeClone is implemented through getRules/openRules
3917      * so it is the same as the test below - I will comment that test out.
3918      */
3919     /* real thing */
3920     target = ucol_safeClone(source, NULL, &cloneSize, &status);
3921     if(U_FAILURE(status)) {
3922       log_err("Error creating clone\n");
3923       errorNo++;
3924       return errorNo;
3925     }
3926     if(!ucol_equals(source, target)) {
3927       log_err("Collator different from it's clone\n");
3928       errorNo++;
3929     }
3930     french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
3931     if(french == UCOL_ON) {
3932       ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
3933     } else {
3934       ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
3935     }
3936     if(U_FAILURE(status)) {
3937       log_err("Error setting attributes\n");
3938       errorNo++;
3939       return errorNo;
3940     }
3941     if(ucol_equals(source, target)) {
3942       log_err("Collators same even when options changed\n");
3943       errorNo++;
3944     }
3945     ucol_close(target);
3946     /* commented out since safeClone uses exactly the same technique */
3947     /*
3948     sourceRules = ucol_getRules(source, &sourceRulesLen);
3949     target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3950     if(U_FAILURE(status)) {
3951       log_err("Error instantiating target from rules\n");
3952       errorNo++;
3953       return errorNo;
3954     }
3955     if(!ucol_equals(source, target)) {
3956       log_err("Collator different from collator that was created from the same rules\n");
3957       errorNo++;
3958     }
3959     ucol_close(target);
3960     */
3961   }
3962   return errorNo;
3963 }
3964 
3965 
TestEquals(void)3966 static void TestEquals(void) {
3967   /* ucol_equals is not currently a public API. There is a chance that it will become
3968    * something like this, but currently it is only used by RuleBasedCollator::operator==
3969    */
3970   /* test whether the two collators instantiated from the same locale are equal */
3971   UErrorCode status = U_ZERO_ERROR;
3972   UParseError parseError;
3973   int32_t noOfLoc = uloc_countAvailable();
3974   const char *locName = NULL;
3975   UCollator *source = NULL, *target = NULL;
3976   int32_t i = 0;
3977 
3978   const char* rules[] = {
3979     "&l < lj <<< Lj <<< LJ",
3980       "&n < nj <<< Nj <<< NJ",
3981       "&ae <<< \\u00e4",
3982       "&AE <<< \\u00c4"
3983   };
3984   /*
3985   const char* badRules[] = {
3986     "&l <<< Lj",
3987       "&n < nj <<< nJ <<< NJ",
3988       "&a <<< \\u00e4",
3989       "&AE <<< \\u00c4 <<< x"
3990   };
3991   */
3992 
3993   UChar sourceRules[1024], targetRules[1024];
3994   int32_t sourceRulesSize = 0, targetRulesSize = 0;
3995   int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
3996 
3997   for(i = 0; i < rulesSize; i++) {
3998     sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
3999     targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4000   }
4001 
4002   source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4003   if(status == U_FILE_ACCESS_ERROR) {
4004     log_data_err("Is your data around?\n");
4005     return;
4006   } else if(U_FAILURE(status)) {
4007     log_err("Error opening collator\n");
4008     return;
4009   }
4010   target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4011   if(!ucol_equals(source, target)) {
4012     log_err("Equivalent collators not equal!\n");
4013   }
4014   ucol_close(source);
4015   ucol_close(target);
4016 
4017   source = ucol_open("root", &status);
4018   target = ucol_open("root", &status);
4019   log_verbose("Testing root\n");
4020   if(!ucol_equals(source, source)) {
4021     log_err("Same collator not equal\n");
4022   }
4023   if(TestEqualsForCollator(locName, source, target)) {
4024     log_err("Errors for root\n", locName);
4025   }
4026   ucol_close(source);
4027 
4028   for(i = 0; i<noOfLoc; i++) {
4029     status = U_ZERO_ERROR;
4030     locName = uloc_getAvailable(i);
4031     /*if(hasCollationElements(locName)) {*/
4032       log_verbose("Testing equality for locale %s\n", locName);
4033       source = ucol_open(locName, &status);
4034       target = ucol_open(locName, &status);
4035       if(TestEqualsForCollator(locName, source, target)) {
4036         log_err("Errors for locale %s\n", locName);
4037       }
4038       ucol_close(source);
4039     /*}*/
4040   }
4041 }
4042 
TestJ2726(void)4043 static void TestJ2726(void) {
4044   UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4045   UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4046   UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4047   UErrorCode status = U_ZERO_ERROR;
4048   UCollator *coll = ucol_open("en", &status);
4049   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4050   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4051   doTest(coll, a, aSpace, UCOL_EQUAL);
4052   doTest(coll, aSpace, a, UCOL_EQUAL);
4053   doTest(coll, a, spaceA, UCOL_EQUAL);
4054   doTest(coll, spaceA, a, UCOL_EQUAL);
4055   doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4056   doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4057   ucol_close(coll);
4058 }
4059 
NullRule(void)4060 static void NullRule(void) {
4061   UChar r[3] = {0};
4062   UErrorCode status = U_ZERO_ERROR;
4063   UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4064   if(U_SUCCESS(status)) {
4065     log_err("This should have been an error!\n");
4066     ucol_close(coll);
4067   } else {
4068     status = U_ZERO_ERROR;
4069   }
4070   coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4071   if(U_FAILURE(status)) {
4072     log_err("Empty rules should have produced a valid collator\n");
4073   } else {
4074     ucol_close(coll);
4075   }
4076 }
4077 
4078 /**
4079  * Test for CollationElementIterator previous and next for the whole set of
4080  * unicode characters with normalization on.
4081  */
TestNumericCollation(void)4082 static void TestNumericCollation(void)
4083 {
4084     UErrorCode status = U_ZERO_ERROR;
4085 
4086     const static char *basicTestStrings[]={
4087     "hello1",
4088     "hello2",
4089     "hello2002",
4090     "hello2003",
4091     "hello123456",
4092     "hello1234567",
4093     "hello10000000",
4094     "hello100000000",
4095     "hello1000000000",
4096     "hello10000000000",
4097     };
4098 
4099     const static char *preZeroTestStrings[]={
4100     "avery10000",
4101     "avery010000",
4102     "avery0010000",
4103     "avery00010000",
4104     "avery000010000",
4105     "avery0000010000",
4106     "avery00000010000",
4107     "avery000000010000",
4108     };
4109 
4110     const static char *thirtyTwoBitNumericStrings[]={
4111     "avery42949672960",
4112     "avery42949672961",
4113     "avery42949672962",
4114     "avery429496729610"
4115     };
4116 
4117     const static char *supplementaryDigits[] = {
4118       "\\uD835\\uDFCE", /* 0 */
4119       "\\uD835\\uDFCF", /* 1 */
4120       "\\uD835\\uDFD0", /* 2 */
4121       "\\uD835\\uDFD1", /* 3 */
4122       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4123       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4124       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4125       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4126       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4127       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4128     };
4129 
4130     const static char *foreignDigits[] = {
4131       "\\u0661",
4132         "\\u0662",
4133         "\\u0663",
4134       "\\u0661\\u0660",
4135       "\\u0661\\u0662",
4136       "\\u0661\\u0663",
4137       "\\u0662\\u0660",
4138       "\\u0662\\u0662",
4139       "\\u0662\\u0663",
4140       "\\u0663\\u0660",
4141       "\\u0663\\u0662",
4142       "\\u0663\\u0663"
4143     };
4144 
4145     const static char *evenZeroes[] = {
4146       "2000",
4147       "2001",
4148         "2002",
4149         "2003"
4150     };
4151 
4152     UColAttribute att = UCOL_NUMERIC_COLLATION;
4153     UColAttributeValue val = UCOL_ON;
4154 
4155     /* Open our collator. */
4156     UCollator* coll = ucol_open("root", &status);
4157     if (U_FAILURE(status)){
4158         log_err("ERROR: in using ucol_open()\n %s\n",
4159               myErrorName(status));
4160         return;
4161     }
4162     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4163     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4164     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4165     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4166     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4167 
4168     /* Setting up our collator to do digits. */
4169     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4170     if (U_FAILURE(status)){
4171         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4172               myErrorName(status));
4173         return;
4174     }
4175 
4176     /*
4177        Testing that prepended zeroes still yield the correct collation behavior.
4178        We expect that every element in our strings array will be equal.
4179     */
4180     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4181 
4182     ucol_close(coll);
4183 }
4184 
TestTibetanConformance(void)4185 static void TestTibetanConformance(void)
4186 {
4187     const char* test[] = {
4188         "\\u0FB2\\u0591\\u0F71\\u0061",
4189         "\\u0FB2\\u0F71\\u0061"
4190     };
4191 
4192     UErrorCode status = U_ZERO_ERROR;
4193     UCollator *coll = ucol_open("", &status);
4194     UChar source[100];
4195     UChar target[100];
4196     int result;
4197     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4198     if (U_SUCCESS(status)) {
4199         u_unescape(test[0], source, 100);
4200         u_unescape(test[1], target, 100);
4201         doTest(coll, source, target, UCOL_EQUAL);
4202         result = ucol_strcoll(coll, source, -1,   target, -1);
4203         log_verbose("result %d\n", result);
4204         if (UCOL_EQUAL != result) {
4205             log_err("Tibetan comparison error\n");
4206         }
4207     }
4208     ucol_close(coll);
4209 
4210     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4211 }
4212 
TestPinyinProblem(void)4213 static void TestPinyinProblem(void) {
4214     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4215     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4216 }
4217 
4218 #define TST_UCOL_MAX_INPUT 0x220001
4219 #define topByte 0xFF000000;
4220 #define bottomByte 0xFF;
4221 #define fourBytes 0xFFFFFFFF;
4222 
4223 
showImplicit(UChar32 i)4224 static void showImplicit(UChar32 i) {
4225     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4226         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4227     }
4228 }
4229 
TestImplicitGeneration(void)4230 static void TestImplicitGeneration(void) {
4231     UErrorCode status = U_ZERO_ERROR;
4232     UChar32 last = 0;
4233     UChar32 current;
4234     UChar32 i = 0, j = 0;
4235     UChar32 roundtrip = 0;
4236     UChar32 lastBottom = 0;
4237     UChar32 currentBottom = 0;
4238     UChar32 lastTop = 0;
4239     UChar32 currentTop = 0;
4240 
4241     UCollator *coll = ucol_open("root", &status);
4242     if(U_FAILURE(status)) {
4243         log_err("Couldn't open UCA\n");
4244         return;
4245     }
4246 
4247     uprv_uca_getRawFromImplicit(0xE20303E7);
4248 
4249     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4250         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4251 
4252         /* check that it round-trips AND that all intervening ones are illegal*/
4253         roundtrip = uprv_uca_getRawFromImplicit(current);
4254         if (roundtrip != i) {
4255             log_err("No roundtrip %08X\n", i);
4256         }
4257         if (last != 0) {
4258             for (j = last + 1; j < current; ++j) {
4259                 roundtrip = uprv_uca_getRawFromImplicit(j);
4260                 /* raise an error if it *doesn't* find an error*/
4261                 if (roundtrip != -1) {
4262                     log_err("Fails to recognize illegal %08X\n", j);
4263                 }
4264             }
4265         }
4266         /* now do other consistency checks*/
4267         lastBottom = last & bottomByte;
4268         currentBottom = current & bottomByte;
4269         lastTop = last & topByte;
4270         currentTop = current & topByte;
4271 
4272         /* print out some values for spot-checking*/
4273         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4274             showImplicit(i-3);
4275             showImplicit(i-2);
4276             showImplicit(i-1);
4277             showImplicit(i);
4278             showImplicit(i+1);
4279             showImplicit(i+2);
4280         }
4281         last = current;
4282 
4283         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4284             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4285         }
4286     }
4287     showImplicit(TST_UCOL_MAX_INPUT-2);
4288     showImplicit(TST_UCOL_MAX_INPUT-1);
4289     showImplicit(TST_UCOL_MAX_INPUT);
4290     ucol_close(coll);
4291 }
4292 
4293 /**
4294  * Iterate through the given iterator, checking to see that all the strings
4295  * in the expected array are present.
4296  * @param expected array of strings we expect to see, or NULL
4297  * @param expectedCount number of elements of expected, or 0
4298  */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)4299 static int32_t checkUEnumeration(const char* msg,
4300                                  UEnumeration* iter,
4301                                  const char** expected,
4302                                  int32_t expectedCount) {
4303     UErrorCode ec = U_ZERO_ERROR;
4304     int32_t i = 0, n, j, bit;
4305     int32_t seenMask = 0;
4306 
4307     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4308     n = uenum_count(iter, &ec);
4309     if (!assertSuccess("count", &ec)) return -1;
4310     log_verbose("%s = [", msg);
4311     for (;; ++i) {
4312         const char* s = uenum_next(iter, NULL, &ec);
4313         if (!assertSuccess("snext", &ec) || s == NULL) break;
4314         if (i != 0) log_verbose(",");
4315         log_verbose("%s", s);
4316         /* check expected list */
4317         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4318             if ((seenMask&bit) == 0 &&
4319                 uprv_strcmp(s, expected[j]) == 0) {
4320                 seenMask |= bit;
4321                 break;
4322             }
4323         }
4324     }
4325     log_verbose("] (%d)\n", i);
4326     assertTrue("count verified", i==n);
4327     /* did we see all expected strings? */
4328     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4329         if ((seenMask&bit)!=0) {
4330             log_verbose("Ok: \"%s\" seen\n", expected[j]);
4331         } else {
4332             log_err("FAIL: \"%s\" not seen\n", expected[j]);
4333         }
4334     }
4335     return n;
4336 }
4337 
4338 /**
4339  * Test new API added for separate collation tree.
4340  */
TestSeparateTrees(void)4341 static void TestSeparateTrees(void) {
4342     UErrorCode ec = U_ZERO_ERROR;
4343     UEnumeration *e = NULL;
4344     int32_t n = -1;
4345     UBool isAvailable;
4346     char loc[256];
4347 
4348     static const char* AVAIL[] = { "en", "de" };
4349 
4350     static const char* KW[] = { "collation" };
4351 
4352     static const char* KWVAL[] = { "phonebook", "stroke" };
4353 
4354 #if !UCONFIG_NO_SERVICE
4355     e = ucol_openAvailableLocales(&ec);
4356     assertSuccess("ucol_openAvailableLocales", &ec);
4357     assertTrue("ucol_openAvailableLocales!=0", e!=0);
4358     n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4359     /* Don't need to check n because we check list */
4360     uenum_close(e);
4361 #endif
4362 
4363     e = ucol_getKeywords(&ec);
4364     assertSuccess("ucol_getKeywords", &ec);
4365     assertTrue("ucol_getKeywords!=0", e!=0);
4366     n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4367     /* Don't need to check n because we check list */
4368     uenum_close(e);
4369 
4370     e = ucol_getKeywordValues(KW[0], &ec);
4371     assertSuccess("ucol_getKeywordValues", &ec);
4372     assertTrue("ucol_getKeywordValues!=0", e!=0);
4373     n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4374     /* Don't need to check n because we check list */
4375     uenum_close(e);
4376 
4377     /* Try setting a warning before calling ucol_getKeywordValues */
4378     ec = U_USING_FALLBACK_WARNING;
4379     e = ucol_getKeywordValues(KW[0], &ec);
4380     assertSuccess("ucol_getKeywordValues [with warning code set]", &ec);
4381     assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4382     n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4383     /* Don't need to check n because we check list */
4384     uenum_close(e);
4385 
4386     /*
4387 U_DRAFT int32_t U_EXPORT2
4388 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4389                              const char* locale, UBool* isAvailable,
4390                              UErrorCode* status);
4391 }
4392 */
4393     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr",
4394                                      &isAvailable, &ec);
4395     assertSuccess("getFunctionalEquivalent", &ec);
4396     assertEquals("getFunctionalEquivalent(fr)", "fr", loc);
4397     assertTrue("getFunctionalEquivalent(fr).isAvailable==TRUE",
4398                isAvailable == TRUE);
4399 
4400     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr_FR",
4401                                      &isAvailable, &ec);
4402     assertSuccess("getFunctionalEquivalent", &ec);
4403     assertEquals("getFunctionalEquivalent(fr_FR)", "fr", loc);
4404     assertTrue("getFunctionalEquivalent(fr_FR).isAvailable==TRUE",
4405                isAvailable == TRUE);
4406 }
4407 
4408 /* supercedes TestJ784 */
TestBeforePinyin(void)4409 static void TestBeforePinyin(void) {
4410     const static char rules[] = {
4411         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4412         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4413         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4414         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4415         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4416         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4417     };
4418 
4419     const static char *test[] = {
4420         "l\\u0101",
4421         "la",
4422         "l\\u0101n",
4423         "lan ",
4424         "l\\u0113",
4425         "le",
4426         "l\\u0113n",
4427         "len"
4428     };
4429 
4430     const static char *test2[] = {
4431         "x\\u0101",
4432         "x\\u0100",
4433         "X\\u0101",
4434         "X\\u0100",
4435         "x\\u00E1",
4436         "x\\u00C1",
4437         "X\\u00E1",
4438         "X\\u00C1",
4439         "x\\u01CE",
4440         "x\\u01CD",
4441         "X\\u01CE",
4442         "X\\u01CD",
4443         "x\\u00E0",
4444         "x\\u00C0",
4445         "X\\u00E0",
4446         "X\\u00C0",
4447         "xa",
4448         "xA",
4449         "Xa",
4450         "XA",
4451         "x\\u0101x",
4452         "x\\u0100x",
4453         "x\\u00E1x",
4454         "x\\u00C1x",
4455         "x\\u01CEx",
4456         "x\\u01CDx",
4457         "x\\u00E0x",
4458         "x\\u00C0x",
4459         "xax",
4460         "xAx"
4461     };
4462 
4463     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4464     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4465     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4466     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4467 }
4468 
TestBeforeTightening(void)4469 static void TestBeforeTightening(void) {
4470     static const struct {
4471         const char *rules;
4472         UErrorCode expectedStatus;
4473     } tests[] = {
4474         { "&[before 1]a<x", U_ZERO_ERROR },
4475         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4476         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4477         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4478         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4479         { "&[before 2]a<<x",U_ZERO_ERROR },
4480         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4481         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4482         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4483         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4484         { "&[before 3]a<<<x",U_ZERO_ERROR },
4485         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4486         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4487     };
4488 
4489     int32_t i = 0;
4490 
4491     UErrorCode status = U_ZERO_ERROR;
4492     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4493     uint32_t rlen = 0;
4494 
4495     UCollator *coll = NULL;
4496 
4497 
4498     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4499         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4500         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4501         if(status != tests[i].expectedStatus) {
4502             log_err("Opening a collator with rules %s returned error code %s, expected %s\n",
4503                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4504         }
4505         ucol_close(coll);
4506         status = U_ZERO_ERROR;
4507     }
4508 
4509 }
4510 
4511 #if 0
4512 &m < a
4513 &[before 1] a < x <<< X << q <<< Q < z
4514 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4515 
4516 &m < a
4517 &[before 2] a << x <<< X << q <<< Q < z
4518 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4519 
4520 &m < a
4521 &[before 3] a <<< x <<< X << q <<< Q < z
4522 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4523 
4524 
4525 &m << a
4526 &[before 1] a < x <<< X << q <<< Q < z
4527 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4528 
4529 &m << a
4530 &[before 2] a << x <<< X << q <<< Q < z
4531 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4532 
4533 &m << a
4534 &[before 3] a <<< x <<< X << q <<< Q < z
4535 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4536 
4537 
4538 &m <<< a
4539 &[before 1] a < x <<< X << q <<< Q < z
4540 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4541 
4542 &m <<< a
4543 &[before 2] a << x <<< X << q <<< Q < z
4544 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4545 
4546 &m <<< a
4547 &[before 3] a <<< x <<< X << q <<< Q < z
4548 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4549 
4550 
4551 &[before 1] s < x <<< X << q <<< Q < z
4552 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4553 
4554 &[before 2] s << x <<< X << q <<< Q < z
4555 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4556 
4557 &[before 3] s <<< x <<< X << q <<< Q < z
4558 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4559 
4560 
4561 &[before 1] \u24DC < x <<< X << q <<< Q < z
4562 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4563 
4564 &[before 2] \u24DC << x <<< X << q <<< Q < z
4565 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4566 
4567 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4568 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4569 #endif
4570 
4571 
4572 #if 0
4573 /* requires features not yet supported */
4574 static void TestMoreBefore(void) {
4575     static const struct {
4576         const char* rules;
4577         const char* order[16];
4578         int32_t size;
4579     } tests[] = {
4580         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4581         { "m","M","x","X","q","Q","z","a","n" }, 9},
4582         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4583         { "m","M","x","X","q","Q","a","z","n" }, 9},
4584         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4585         { "m","M","x","X","a","q","Q","z","n" }, 9},
4586         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4587         { "x","X","q","Q","z","m","M","a","n" }, 9},
4588         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4589         { "m","M","x","X","q","Q","a","z","n" }, 9},
4590         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4591         { "m","M","x","X","a","q","Q","z","n" }, 9},
4592         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4593         { "x","X","q","Q","z","n","m","a","M" }, 9},
4594         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4595         { "x","X","q","Q","m","a","M","z","n" }, 9},
4596         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4597         { "m","x","X","a","M","q","Q","z","n" }, 9},
4598         { "&[before 1] s < x <<< X << q <<< Q < z",
4599         { "r","R","x","X","q","Q","z","s","n" }, 9},
4600         { "&[before 2] s << x <<< X << q <<< Q < z",
4601         { "r","R","x","X","q","Q","s","z","n" }, 9},
4602         { "&[before 3] s <<< x <<< X << q <<< Q < z",
4603         { "r","R","x","X","s","q","Q","z","n" }, 9},
4604         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4605         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4606         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4607         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4608         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4609         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4610     };
4611 
4612     int32_t i = 0;
4613 
4614     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4615         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4616     }
4617 }
4618 #endif
4619 
TestTailorNULL(void)4620 static void TestTailorNULL( void ) {
4621     const static char* rule = "&a <<< '\\u0000'";
4622     UErrorCode status = U_ZERO_ERROR;
4623     UChar rlz[RULE_BUFFER_LEN] = { 0 };
4624     uint32_t rlen = 0;
4625     UChar a = 1, null = 0;
4626     UCollationResult res = UCOL_EQUAL;
4627 
4628     UCollator *coll = NULL;
4629 
4630 
4631     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4632     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4633 
4634     if(U_FAILURE(status)) {
4635         log_err("Could not open default collator!\n");
4636     } else {
4637         res = ucol_strcoll(coll, &a, 1, &null, 1);
4638 
4639         if(res != UCOL_LESS) {
4640             log_err("NULL was not tailored properly!\n");
4641         }
4642     }
4643 
4644     ucol_close(coll);
4645 }
4646 
4647 static void
TestThaiSortKey(void)4648 TestThaiSortKey(void)
4649 {
4650   UChar yamakan = 0x0E4E;
4651   UErrorCode status = U_ZERO_ERROR;
4652   uint8_t key[256];
4653   int32_t keyLen = 0;
4654   /* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */
4655   /* since it stays in the same relative position. This should be addressed in CLDR */
4656   /* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */
4657   /* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */
4658   /* UCA 5.0 moves Yammakan */
4659   uint8_t expectedKey[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 };
4660   UCollator *coll = ucol_open("th", &status);
4661   if(U_FAILURE(status)) {
4662     log_err("Could not open a collator, exiting (%s)\n", u_errorName(status));
4663     return;
4664   }
4665 
4666   keyLen = ucol_getSortKey(coll, &yamakan, 1, key, 256);
4667   if(strcmp((char *)key, (char *)expectedKey)) {
4668     log_err("Yammakan key is different from ICU 34!\n");
4669   }
4670 
4671   ucol_close(coll);
4672 }
4673 
4674 static void
TestUpperFirstQuaternary(void)4675 TestUpperFirstQuaternary(void)
4676 {
4677   const char* tests[] = { "B", "b", "Bb", "bB" };
4678   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4679   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4680   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4681 }
4682 
4683 static void
TestJ4960(void)4684 TestJ4960(void)
4685 {
4686   const char* tests[] = { "\\u00e2T", "aT" };
4687   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4688   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4689   const char* tests2[] = { "a", "A" };
4690   const char* rule = "&[first tertiary ignorable]=A=a";
4691   UColAttribute att2[] = { UCOL_CASE_LEVEL };
4692   UColAttributeValue attVals2[] = { UCOL_ON };
4693   /* Test whether we correctly ignore primary ignorables on case level when */
4694   /* we have only primary & case level */
4695   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4696   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4697   /* and case level */
4698   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4699   /* Test whether completely ignorable letters have case level info (they shouldn't) */
4700   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4701 }
4702 
4703 static void
TestJ5223(void)4704 TestJ5223(void)
4705 {
4706   static const char *test = "this is a test string";
4707   UChar ustr[256];
4708   int32_t ustr_length = u_unescape(test, ustr, 256);
4709   unsigned char sortkey[256];
4710   int32_t sortkey_length;
4711   UErrorCode status = U_ZERO_ERROR;
4712   static UCollator *coll = NULL;
4713   coll = ucol_open("root", &status);
4714   if(U_FAILURE(status)) {
4715     log_err("Couldn't open UCA\n");
4716     return;
4717   }
4718   ucol_setStrength(coll, UCOL_PRIMARY);
4719   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4720   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4721   if (U_FAILURE(status)) {
4722     log_err("Failed setting atributes\n");
4723     return;
4724   }
4725   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4726   if (sortkey_length > 256) return;
4727 
4728   /* we mark the position where the null byte should be written in advance */
4729   sortkey[sortkey_length-1] = 0xAA;
4730 
4731   /* we set the buffer size one byte higher than needed */
4732   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4733     sortkey_length+1);
4734 
4735   /* no error occurs (for me) */
4736   if (sortkey[sortkey_length-1] == 0xAA) {
4737     log_err("Hit bug at first try\n");
4738   }
4739 
4740   /* we mark the position where the null byte should be written again */
4741   sortkey[sortkey_length-1] = 0xAA;
4742 
4743   /* this time we set the buffer size to the exact amount needed */
4744   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4745     sortkey_length);
4746 
4747   /* now the trailing null byte is not written */
4748   if (sortkey[sortkey_length-1] == 0xAA) {
4749     log_err("Hit bug at second try\n");
4750   }
4751 
4752   ucol_close(coll);
4753 }
4754 
4755 /* Regression test for Thai partial sort key problem */
4756 static void
TestJ5232(void)4757 TestJ5232(void)
4758 {
4759     const static char *test[] = {
4760         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4761         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4762     };
4763 
4764     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4765 }
4766 
4767 static void
TestJ5367(void)4768 TestJ5367(void)
4769 {
4770     const static char *test[] = { "a", "y" };
4771     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4772     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4773 }
4774 
4775 static void
TestVI5913(void)4776 TestVI5913(void)
4777 {
4778     UErrorCode status = U_ZERO_ERROR;
4779     int32_t i, j;
4780     UCollator *coll =NULL;
4781     uint8_t  resColl[100], expColl[100];
4782     int32_t  rLen, tLen, ruleLen, sLen, kLen;
4783     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
4784     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
4785     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
4786     UChar tData[][20]={
4787         {0x1EAC, 0},
4788         {0x0041, 0x0323, 0x0302, 0},
4789         {0x1EA0, 0x0302, 0},
4790         {0x00C2, 0x0323, 0},
4791         {0x1ED8, 0},  /* O with dot and circumflex */
4792         {0x1ECC, 0x0302, 0},
4793         {0x1EB7, 0},
4794         {0x1EA1, 0x0306, 0},
4795     };
4796     UChar tailorData[][20]={
4797         {0x1FA2, 0},  /* Omega with 3 combining marks */
4798         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4799         {0x1FF3, 0x0313, 0x0300, 0},
4800         {0x1F60, 0x0300, 0x0345, 0},
4801         {0x1F62, 0x0345, 0},
4802         {0x1FA0, 0x0300, 0},
4803     };
4804     UChar tailorData2[][20]={
4805         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
4806         {0x0073, 0x0323, 0x030C, 0},
4807         {0x0073, 0x030C, 0x0323, 0},
4808     };
4809     UChar tailorData3[][20]={
4810         {0x007a, 0},  /*  z */
4811         {0x0061, 0x0065, 0},  /*  a + e */
4812         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4813         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
4814         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4815         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
4816         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
4817         {0x00EA, 0},  /* e with circumflex  */
4818     };
4819 
4820     /* Test Vietnamese sort. */
4821     coll = ucol_open("vi", &status);
4822     log_verbose("\n\nVI collation:");
4823     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4824         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4825     }
4826     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4827         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4828     }
4829     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4830         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4831     }
4832     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4833         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4834     }
4835 
4836     for (j=0; j<8; j++) {
4837         tLen = u_strlen(tData[j]);
4838         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4839         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4840         for(i = 0; i<rLen; i++) {
4841             log_verbose(" %02X", resColl[i]);
4842         }
4843     }
4844 
4845     ucol_close(coll);
4846 
4847     /* Test Russian sort. */
4848     coll = ucol_open("ro", &status);
4849     log_verbose("\n\nRO collation:");
4850     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4851         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4852     }
4853     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4854         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4855     }
4856     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
4857         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4858     }
4859 
4860     for (j=4; j<8; j++) {
4861         tLen = u_strlen(tData[j]);
4862         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4863         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4864         for(i = 0; i<rLen; i++) {
4865             log_verbose(" %02X", resColl[i]);
4866         }
4867     }
4868     ucol_close(coll);
4869 
4870     /* Test the precomposed Greek character with 3 combining marks. */
4871     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
4872     ruleLen = u_strlen(rule);
4873     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4874     sLen = u_strlen(tailorData[0]);
4875     for (j=1; j<6; j++) {
4876         tLen = u_strlen(tailorData[j]);
4877         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
4878             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
4879         }
4880     }
4881     /* Test getSortKey. */
4882     tLen = u_strlen(tailorData[0]);
4883     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
4884     for (j=0; j<6; j++) {
4885         tLen = u_strlen(tailorData[j]);
4886         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
4887         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4888             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
4889             for(i = 0; i<rLen; i++) {
4890                 log_err(" %02X", resColl[i]);
4891             }
4892         }
4893     }
4894     ucol_close(coll);
4895 
4896     log_verbose("\n\nTailoring test for s with caron:");
4897     ruleLen = u_strlen(rule2);
4898     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4899     tLen = u_strlen(tailorData2[0]);
4900     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
4901     for (j=1; j<3; j++) {
4902         tLen = u_strlen(tailorData2[j]);
4903         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
4904         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4905             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
4906             for(i = 0; i<rLen; i++) {
4907                 log_err(" %02X", resColl[i]);
4908             }
4909         }
4910     }
4911     ucol_close(coll);
4912 
4913     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
4914      ruleLen = u_strlen(rule3);
4915      coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4916      tLen = u_strlen(tailorData3[3]);
4917      kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
4918      for (j=4; j<6; j++) {
4919          tLen = u_strlen(tailorData3[j]);
4920          rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
4921 
4922          if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4923              log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
4924              for(i = 0; i<rLen; i++) {
4925                  log_err(" %02X", resColl[i]);
4926              }
4927          }
4928 
4929          log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
4930           for(i = 0; i<rLen; i++) {
4931               log_verbose(" %02X", resColl[i]);
4932           }
4933      }
4934      ucol_close(coll);
4935 }
4936 
4937 #define TSKC_DATA_SIZE 5
4938 #define TSKC_BUF_SIZE  50
4939 static void
TestSortKeyConsistency(void)4940 TestSortKeyConsistency(void)
4941 {
4942     UErrorCode icuRC = U_ZERO_ERROR;
4943     UCollator* ucol;
4944     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4945 
4946     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4947     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4948 	int32_t i, j, i2;
4949 
4950     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4951     if (U_FAILURE(icuRC))
4952 	{
4953 	    log_err("ucol_openFromShortString failed\n");
4954         return;
4955 	}
4956 
4957     for (i = 0; i < TSKC_DATA_SIZE; i++)
4958     {
4959         UCharIterator uiter;
4960         uint32_t state[2] = { 0, 0 };
4961         int32_t dataLen = i+1;
4962 	    for (j=0; j<TSKC_BUF_SIZE; j++)
4963 	        bufFull[i][j] = bufPart[i][j] = 0;
4964 
4965         /* Full sort key */
4966         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4967 
4968         /* Partial sort key */
4969         uiter_setString(&uiter, data, dataLen);
4970         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4971         if (U_FAILURE(icuRC))
4972 		{
4973 		    log_err("ucol_nextSortKeyPart failed\n");
4974 			ucol_close(ucol);
4975 			return;
4976 		}
4977 
4978 	    for (i2=0; i2<i; i2++)
4979 	    {
4980 	        UBool fullMatch = TRUE;
4981 		    UBool partMatch = TRUE;
4982 		    for (j=0; j<TSKC_BUF_SIZE; j++)
4983 		    {
4984 			    fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4985 			    partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4986 		    }
4987 			if (fullMatch != partMatch) {
4988 		        log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4989 					              : "partial key was consistent, but full key changed\n");
4990 				ucol_close(ucol);
4991 				return;
4992 			}
4993 	    }
4994 
4995     }
4996 
4997     /*=============================================*/
4998    ucol_close(ucol);
4999 }
5000 
5001 
5002 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5003 
addMiscCollTest(TestNode ** root)5004 void addMiscCollTest(TestNode** root)
5005 {
5006     TEST(TestRuleOptions);
5007     TEST(TestBeforePrefixFailure);
5008     TEST(TestContractionClosure);
5009     TEST(TestPrefixCompose);
5010     TEST(TestStrCollIdenticalPrefix);
5011     TEST(TestPrefix);
5012     TEST(TestNewJapanese);
5013     /*TEST(TestLimitations);*/
5014     TEST(TestNonChars);
5015     TEST(TestExtremeCompression);
5016     TEST(TestSurrogates);
5017     TEST(TestVariableTopSetting);
5018     TEST(TestBocsuCoverage);
5019     TEST(TestCyrillicTailoring);
5020     TEST(TestCase);
5021     TEST(IncompleteCntTest);
5022     TEST(BlackBirdTest);
5023     TEST(FunkyATest);
5024     TEST(BillFairmanTest);
5025     TEST(RamsRulesTest);
5026     TEST(IsTailoredTest);
5027     TEST(TestCollations);
5028     TEST(TestChMove);
5029     TEST(TestImplicitTailoring);
5030     TEST(TestFCDProblem);
5031     TEST(TestEmptyRule);
5032     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5033     TEST(TestJ815);
5034     /*TEST(TestJ831);*/ /* we changed lv locale */
5035     TEST(TestBefore);
5036     TEST(TestRedundantRules);
5037     TEST(TestExpansionSyntax);
5038     TEST(TestHangulTailoring);
5039     TEST(TestUCARules);
5040     TEST(TestIncrementalNormalize);
5041     TEST(TestComposeDecompose);
5042     TEST(TestCompressOverlap);
5043     TEST(TestContraction);
5044     TEST(TestExpansion);
5045     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5046     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5047     TEST(TestOptimize);
5048     TEST(TestSuppressContractions);
5049     TEST(Alexis2);
5050     TEST(TestHebrewUCA);
5051     TEST(TestPartialSortKeyTermination);
5052     TEST(TestSettings);
5053     TEST(TestEquals);
5054     TEST(TestJ2726);
5055     TEST(NullRule);
5056     TEST(TestNumericCollation);
5057     TEST(TestTibetanConformance);
5058     TEST(TestPinyinProblem);
5059     TEST(TestImplicitGeneration);
5060     TEST(TestSeparateTrees);
5061     TEST(TestBeforePinyin);
5062     TEST(TestBeforeTightening);
5063     /*TEST(TestMoreBefore);*/
5064     TEST(TestTailorNULL);
5065     TEST(TestThaiSortKey);
5066     TEST(TestUpperFirstQuaternary);
5067     TEST(TestJ4960);
5068     TEST(TestJ5223);
5069     TEST(TestJ5232);
5070     TEST(TestJ5367);
5071     TEST(TestSortKeyConsistency);
5072     TEST(TestVI5913);  /* VI, RO tailored rules */
5073 }
5074 
5075 #endif /* #if !UCONFIG_NO_COLLATION */
5076