1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2001-2007, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
7 *
8 * File cmsccoll.C
9 *
10 *******************************************************************************/
11 /**
12 * These are the tests specific to ICU 1.8 and above, that I didn't know where
13 * to fit.
14 */
15
16 #include <stdio.h>
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_COLLATION
21
22 #include "unicode/ucol.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/uloc.h"
25 #include "cintltst.h"
26 #include "ccolltst.h"
27 #include "callcoll.h"
28 #include "unicode/ustring.h"
29 #include "string.h"
30 #include "ucol_imp.h"
31 #include "ucol_tok.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "uassert.h"
35 #include "unicode/parseerr.h"
36 #include "unicode/ucnv.h"
37 #include "uparse.h"
38
39 #define LEN(a) (sizeof(a)/sizeof(a[0]))
40
41 #define MAX_TOKEN_LEN 16
42
43 typedef UCollationResult tst_strcoll(void *collator, const int object,
44 const UChar *source, const int sLen,
45 const UChar *target, const int tLen);
46
47
48
49 const static char cnt1[][10] = {
50
51 "AA",
52 "AC",
53 "AZ",
54 "AQ",
55 "AB",
56 "ABZ",
57 "ABQ",
58 "Z",
59 "ABC",
60 "Q",
61 "B"
62 };
63
64 const static char cnt2[][10] = {
65 "DA",
66 "DAD",
67 "DAZ",
68 "MAR",
69 "Z",
70 "DAVIS",
71 "MARK",
72 "DAV",
73 "DAVI"
74 };
75
IncompleteCntTest(void)76 static void IncompleteCntTest(void)
77 {
78 UErrorCode status = U_ZERO_ERROR;
79 UChar temp[90];
80 UChar t1[90];
81 UChar t2[90];
82
83 UCollator *coll = NULL;
84 uint32_t i = 0, j = 0;
85 uint32_t size = 0;
86
87 u_uastrcpy(temp, " & Z < ABC < Q < B");
88
89 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
90
91 if(U_SUCCESS(status)) {
92 size = sizeof(cnt1)/sizeof(cnt1[0]);
93 for(i = 0; i < size-1; i++) {
94 for(j = i+1; j < size; j++) {
95 UCollationElements *iter;
96 u_uastrcpy(t1, cnt1[i]);
97 u_uastrcpy(t2, cnt1[j]);
98 doTest(coll, t1, t2, UCOL_LESS);
99 /* synwee : added collation element iterator test */
100 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
101 if (U_FAILURE(status)) {
102 log_err("Creation of iterator failed\n");
103 break;
104 }
105 backAndForth(iter);
106 ucol_closeElements(iter);
107 }
108 }
109 }
110
111 ucol_close(coll);
112
113
114 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
115 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
116
117 if(U_SUCCESS(status)) {
118 size = sizeof(cnt2)/sizeof(cnt2[0]);
119 for(i = 0; i < size-1; i++) {
120 for(j = i+1; j < size; j++) {
121 UCollationElements *iter;
122 u_uastrcpy(t1, cnt2[i]);
123 u_uastrcpy(t2, cnt2[j]);
124 doTest(coll, t1, t2, UCOL_LESS);
125
126 /* synwee : added collation element iterator test */
127 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
128 if (U_FAILURE(status)) {
129 log_err("Creation of iterator failed\n");
130 break;
131 }
132 backAndForth(iter);
133 ucol_closeElements(iter);
134 }
135 }
136 }
137
138 ucol_close(coll);
139
140
141 }
142
143 const static char shifted[][20] = {
144 "black bird",
145 "black-bird",
146 "blackbird",
147 "black Bird",
148 "black-Bird",
149 "blackBird",
150 "black birds",
151 "black-birds",
152 "blackbirds"
153 };
154
155 const static UCollationResult shiftedTert[] = {
156 UCOL_EQUAL,
157 UCOL_EQUAL,
158 UCOL_EQUAL,
159 UCOL_LESS,
160 UCOL_EQUAL,
161 UCOL_EQUAL,
162 UCOL_LESS,
163 UCOL_EQUAL,
164 UCOL_EQUAL
165 };
166
167 const static char nonignorable[][20] = {
168 "black bird",
169 "black Bird",
170 "black birds",
171 "black-bird",
172 "black-Bird",
173 "black-birds",
174 "blackbird",
175 "blackBird",
176 "blackbirds"
177 };
178
BlackBirdTest(void)179 static void BlackBirdTest(void) {
180 UErrorCode status = U_ZERO_ERROR;
181 UChar t1[90];
182 UChar t2[90];
183
184 uint32_t i = 0, j = 0;
185 uint32_t size = 0;
186 UCollator *coll = ucol_open("en_US", &status);
187
188 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
189 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
190
191 if(U_SUCCESS(status)) {
192 size = sizeof(nonignorable)/sizeof(nonignorable[0]);
193 for(i = 0; i < size-1; i++) {
194 for(j = i+1; j < size; j++) {
195 u_uastrcpy(t1, nonignorable[i]);
196 u_uastrcpy(t2, nonignorable[j]);
197 doTest(coll, t1, t2, UCOL_LESS);
198 }
199 }
200 }
201
202 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
203 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
204
205 if(U_SUCCESS(status)) {
206 size = sizeof(shifted)/sizeof(shifted[0]);
207 for(i = 0; i < size-1; i++) {
208 for(j = i+1; j < size; j++) {
209 u_uastrcpy(t1, shifted[i]);
210 u_uastrcpy(t2, shifted[j]);
211 doTest(coll, t1, t2, UCOL_LESS);
212 }
213 }
214 }
215
216 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
217 if(U_SUCCESS(status)) {
218 size = sizeof(shifted)/sizeof(shifted[0]);
219 for(i = 1; i < size; i++) {
220 u_uastrcpy(t1, shifted[i-1]);
221 u_uastrcpy(t2, shifted[i]);
222 doTest(coll, t1, t2, shiftedTert[i]);
223 }
224 }
225
226 ucol_close(coll);
227 }
228
229 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
230 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
231 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
232 {0x0041/*'A'*/, 0x0300, 0x0000},
233 {0x00C0, 0x0301, 0x0000},
234 /* this would work with forced normalization */
235 {0x00C0, 0x0316, 0x0000}
236 };
237
238 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
239 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
240 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
241 {0x00C0, 0},
242 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
243 /* this would work with forced normalization */
244 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
245 };
246
247 const static UCollationResult results[] = {
248 UCOL_GREATER,
249 UCOL_EQUAL,
250 UCOL_EQUAL,
251 UCOL_GREATER,
252 UCOL_EQUAL
253 };
254
FunkyATest(void)255 static void FunkyATest(void)
256 {
257
258 int32_t i;
259 UErrorCode status = U_ZERO_ERROR;
260 UCollator *myCollation;
261 myCollation = ucol_open("en_US", &status);
262 if(U_FAILURE(status)){
263 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
264 return;
265 }
266 log_verbose("Testing some A letters, for some reason\n");
267 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
268 ucol_setStrength(myCollation, UCOL_TERTIARY);
269 for (i = 0; i < 4 ; i++)
270 {
271 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
272 }
273 ucol_close(myCollation);
274 }
275
276 UColAttributeValue caseFirst[] = {
277 UCOL_OFF,
278 UCOL_LOWER_FIRST,
279 UCOL_UPPER_FIRST
280 };
281
282
283 UColAttributeValue alternateHandling[] = {
284 UCOL_NON_IGNORABLE,
285 UCOL_SHIFTED
286 };
287
288 UColAttributeValue caseLevel[] = {
289 UCOL_OFF,
290 UCOL_ON
291 };
292
293 UColAttributeValue strengths[] = {
294 UCOL_PRIMARY,
295 UCOL_SECONDARY,
296 UCOL_TERTIARY,
297 UCOL_QUATERNARY,
298 UCOL_IDENTICAL
299 };
300
301 #if 0
302 static const char * strengthsC[] = {
303 "UCOL_PRIMARY",
304 "UCOL_SECONDARY",
305 "UCOL_TERTIARY",
306 "UCOL_QUATERNARY",
307 "UCOL_IDENTICAL"
308 };
309
310 static const char * caseFirstC[] = {
311 "UCOL_OFF",
312 "UCOL_LOWER_FIRST",
313 "UCOL_UPPER_FIRST"
314 };
315
316
317 static const char * alternateHandlingC[] = {
318 "UCOL_NON_IGNORABLE",
319 "UCOL_SHIFTED"
320 };
321
322 static const char * caseLevelC[] = {
323 "UCOL_OFF",
324 "UCOL_ON"
325 };
326
327 /* not used currently - does not test only prints */
328 static void PrintMarkDavis(void)
329 {
330 UErrorCode status = U_ZERO_ERROR;
331 UChar m[256];
332 uint8_t sortkey[256];
333 UCollator *coll = ucol_open("en_US", &status);
334 uint32_t h,i,j,k, sortkeysize;
335 uint32_t sizem = 0;
336 char buffer[512];
337 uint32_t len = 512;
338
339 log_verbose("PrintMarkDavis");
340
341 u_uastrcpy(m, "Mark Davis");
342 sizem = u_strlen(m);
343
344
345 m[1] = 0xe4;
346
347 for(i = 0; i<sizem; i++) {
348 fprintf(stderr, "\\u%04X ", m[i]);
349 }
350 fprintf(stderr, "\n");
351
352 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
353 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
354 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
355
356 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
357 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
358 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
359
360 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
361 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
362 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
363
364 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
365 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
366 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
367 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
368 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
369 }
370
371 }
372
373 }
374
375 }
376 }
377 #endif
378
BillFairmanTest(void)379 static void BillFairmanTest(void) {
380 /*
381 ** check for actual locale via ICU resource bundles
382 **
383 ** lp points to the original locale ("fr_FR_....")
384 */
385
386 UResourceBundle *lr,*cr;
387 UErrorCode lec = U_ZERO_ERROR;
388 const char *lp = "fr_FR_you_ll_never_find_this_locale";
389
390 log_verbose("BillFairmanTest\n");
391
392 lr = ures_open(NULL,lp,&lec);
393 if (lr) {
394 cr = ures_getByKey(lr,"collations",0,&lec);
395 if (cr) {
396 lp = ures_getLocale(cr,&lec);
397 if (lp) {
398 if (U_SUCCESS(lec)) {
399 if(strcmp(lp, "fr") != 0) {
400 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
401 }
402 }
403 }
404 ures_close(cr);
405 }
406 ures_close(lr);
407 }
408 }
409
testPrimary(UCollator * col,const UChar * p,const UChar * q)410 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
411 UChar source[256] = { '\0'};
412 UChar target[256] = { '\0'};
413 UChar preP = 0x31a3;
414 UChar preQ = 0x310d;
415 /*
416 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
417 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
418 */
419 /*log_verbose("Testing primary\n");*/
420
421 doTest(col, p, q, UCOL_LESS);
422 /*
423 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
424
425 if(result!=UCOL_LESS){
426 aescstrdup(p,utfSource,256);
427 aescstrdup(q,utfTarget,256);
428 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
429 }
430 */
431 source[0] = preP;
432 u_strcpy(source+1,p);
433 target[0] = preQ;
434 u_strcpy(target+1,q);
435 doTest(col, source, target, UCOL_LESS);
436 /*
437 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
438 */
439 }
440
testSecondary(UCollator * col,const UChar * p,const UChar * q)441 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
442 UChar source[256] = { '\0'};
443 UChar target[256] = { '\0'};
444
445 /*log_verbose("Testing secondary\n");*/
446
447 doTest(col, p, q, UCOL_LESS);
448 /*
449 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
450 */
451 source[0] = 0x0053;
452 u_strcpy(source+1,p);
453 target[0]= 0x0073;
454 u_strcpy(target+1,q);
455
456 doTest(col, source, target, UCOL_LESS);
457 /*
458 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
459 */
460
461
462 u_strcpy(source,p);
463 source[u_strlen(p)] = 0x62;
464 source[u_strlen(p)+1] = 0;
465
466
467 u_strcpy(target,q);
468 target[u_strlen(q)] = 0x61;
469 target[u_strlen(q)+1] = 0;
470
471 doTest(col, source, target, UCOL_GREATER);
472
473 /*
474 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
475 */
476 }
477
testTertiary(UCollator * col,const UChar * p,const UChar * q)478 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
479 UChar source[256] = { '\0'};
480 UChar target[256] = { '\0'};
481
482 /*log_verbose("Testing tertiary\n");*/
483
484 doTest(col, p, q, UCOL_LESS);
485 /*
486 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
487 */
488 source[0] = 0x0020;
489 u_strcpy(source+1,p);
490 target[0]= 0x002D;
491 u_strcpy(target+1,q);
492
493 doTest(col, source, target, UCOL_LESS);
494 /*
495 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
496 */
497
498 u_strcpy(source,p);
499 source[u_strlen(p)] = 0xE0;
500 source[u_strlen(p)+1] = 0;
501
502 u_strcpy(target,q);
503 target[u_strlen(q)] = 0x61;
504 target[u_strlen(q)+1] = 0;
505
506 doTest(col, source, target, UCOL_GREATER);
507
508 /*
509 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
510 */
511 }
512
testEquality(UCollator * col,const UChar * p,const UChar * q)513 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
514 /*
515 UChar source[256] = { '\0'};
516 UChar target[256] = { '\0'};
517 */
518
519 doTest(col, p, q, UCOL_EQUAL);
520 /*
521 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
522 */
523 }
524
testCollator(UCollator * coll,UErrorCode * status)525 static void testCollator(UCollator *coll, UErrorCode *status) {
526 const UChar *rules = NULL, *current = NULL;
527 int32_t ruleLen = 0;
528 uint32_t strength = 0;
529 uint32_t chOffset = 0; uint32_t chLen = 0;
530 uint32_t exOffset = 0; uint32_t exLen = 0;
531 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
532 uint32_t firstEx = 0;
533 /* uint32_t rExpsLen = 0; */
534 uint32_t firstLen = 0;
535 UBool varT = FALSE; UBool top_ = TRUE;
536 uint16_t specs = 0;
537 UBool startOfRules = TRUE;
538 UBool lastReset = FALSE;
539 UBool before = FALSE;
540 uint32_t beforeStrength = 0;
541 UColTokenParser src;
542 UColOptionSet opts;
543
544 UChar first[256];
545 UChar second[256];
546 UChar tempB[256];
547 uint32_t tempLen;
548 UChar *rulesCopy = NULL;
549 UParseError parseError;
550 src.opts = &opts;
551
552 rules = ucol_getRules(coll, &ruleLen);
553 if(U_SUCCESS(*status) && ruleLen > 0) {
554 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
555 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
556 src.current = src.source = rulesCopy;
557 src.end = rulesCopy+ruleLen;
558 src.extraCurrent = src.end;
559 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
560 *first = *second = 0;
561
562 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
563 strength = src.parsedToken.strength;
564 chOffset = src.parsedToken.charsOffset;
565 chLen = src.parsedToken.charsLen;
566 exOffset = src.parsedToken.extensionOffset;
567 exLen = src.parsedToken.extensionLen;
568 prefixOffset = src.parsedToken.prefixOffset;
569 prefixLen = src.parsedToken.prefixLen;
570 specs = src.parsedToken.flags;
571
572 startOfRules = FALSE;
573 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
574 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
575 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
576 second[0] = 0;
577 } else {
578 u_strncpy(second,rulesCopy+chOffset, chLen);
579 second[chLen] = 0;
580
581 if(exLen > 0 && firstEx == 0) {
582 u_strncat(first, rulesCopy+exOffset, exLen);
583 first[firstLen+exLen] = 0;
584 }
585
586 if(lastReset == TRUE && prefixLen != 0) {
587 u_strncpy(first+prefixLen, first, firstLen);
588 u_strncpy(first, rulesCopy+prefixOffset, prefixLen);
589 first[firstLen+prefixLen] = 0;
590 firstLen = firstLen+prefixLen;
591 }
592
593 if(before == TRUE) { /* swap first and second */
594 u_strcpy(tempB, first);
595 u_strcpy(first, second);
596 u_strcpy(second, tempB);
597
598 tempLen = firstLen;
599 firstLen = chLen;
600 chLen = tempLen;
601
602 tempLen = firstEx;
603 firstEx = exLen;
604 exLen = tempLen;
605 if(beforeStrength < strength) {
606 strength = beforeStrength;
607 }
608 }
609 }
610 lastReset = FALSE;
611
612 switch(strength){
613 case UCOL_IDENTICAL:
614 testEquality(coll,first,second);
615 break;
616 case UCOL_PRIMARY:
617 testPrimary(coll,first,second);
618 break;
619 case UCOL_SECONDARY:
620 testSecondary(coll,first,second);
621 break;
622 case UCOL_TERTIARY:
623 testTertiary(coll,first,second);
624 break;
625 case UCOL_TOK_RESET:
626 lastReset = TRUE;
627 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
628 if(before) {
629 beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
630 }
631 break;
632 default:
633 break;
634 }
635
636 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
637 before = FALSE;
638 } else {
639 firstLen = chLen;
640 firstEx = exLen;
641 u_strcpy(first, second);
642 }
643 }
644 free(rulesCopy);
645 }
646 }
647
ucaTest(void * collator,const int object,const UChar * source,const int sLen,const UChar * target,const int tLen)648 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
649 UCollator *UCA = (UCollator *)collator;
650 return ucol_strcoll(UCA, source, sLen, target, tLen);
651 }
652
653 /*
654 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
655 #ifdef U_WINDOWS
656 LCID lcid = (LCID)collator;
657 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
658 #else
659 return 0;
660 #endif
661 }
662 */
663
swampEarlier(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)664 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
665 UChar s1, UChar s2,
666 const UChar *s, const uint32_t sLen,
667 const UChar *t, const uint32_t tLen) {
668 UChar source[256] = {0};
669 UChar target[256] = {0};
670
671 source[0] = s1;
672 u_strcpy(source+1, s);
673 target[0] = s2;
674 u_strcpy(target+1, t);
675
676 return func(collator, opts, source, sLen+1, target, tLen+1);
677 }
678
swampLater(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)679 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
680 UChar s1, UChar s2,
681 const UChar *s, const uint32_t sLen,
682 const UChar *t, const uint32_t tLen) {
683 UChar source[256] = {0};
684 UChar target[256] = {0};
685
686 u_strcpy(source, s);
687 source[sLen] = s1;
688 u_strcpy(target, t);
689 target[tLen] = s2;
690
691 return func(collator, opts, source, sLen+1, target, tLen+1);
692 }
693
probeStrength(tst_strcoll * func,void * collator,int opts,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen,UCollationResult result)694 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
695 const UChar *s, const uint32_t sLen,
696 const UChar *t, const uint32_t tLen,
697 UCollationResult result) {
698 /*UChar fPrimary = 0x6d;*/
699 /*UChar sPrimary = 0x6e;*/
700 UChar fSecondary = 0x310d;
701 UChar sSecondary = 0x31a3;
702 UChar fTertiary = 0x310f;
703 UChar sTertiary = 0x31b7;
704
705 UCollationResult oposite;
706 if(result == UCOL_EQUAL) {
707 return UCOL_IDENTICAL;
708 } else if(result == UCOL_GREATER) {
709 oposite = UCOL_LESS;
710 } else {
711 oposite = UCOL_GREATER;
712 }
713
714 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
715 return UCOL_PRIMARY;
716 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
717 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
718 return UCOL_SECONDARY;
719 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
720 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
721 return UCOL_TERTIARY;
722 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
723 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
724 return UCOL_QUATERNARY;
725 } else {
726 return UCOL_IDENTICAL;
727 }
728 }
729
getRelationSymbol(UCollationResult res,uint32_t strength,char * buffer)730 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
731 uint32_t i = 0;
732
733 if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
734 buffer[0] = '=';
735 buffer[1] = '=';
736 buffer[2] = '\0';
737 } else if(res == UCOL_GREATER) {
738 for(i = 0; i<strength+1; i++) {
739 buffer[i] = '>';
740 }
741 buffer[strength+1] = '\0';
742 } else {
743 for(i = 0; i<strength+1; i++) {
744 buffer[i] = '<';
745 }
746 buffer[strength+1] = '\0';
747 }
748
749 return buffer;
750 }
751
752
753
logFailure(const char * platform,const char * test,const UChar * source,const uint32_t sLen,const UChar * target,const uint32_t tLen,UCollationResult realRes,uint32_t realStrength,UCollationResult expRes,uint32_t expStrength,UBool error)754 static void logFailure (const char *platform, const char *test,
755 const UChar *source, const uint32_t sLen,
756 const UChar *target, const uint32_t tLen,
757 UCollationResult realRes, uint32_t realStrength,
758 UCollationResult expRes, uint32_t expStrength, UBool error) {
759
760 uint32_t i = 0;
761
762 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
763 static int32_t maxOutputLength = 0;
764 int32_t outputLength;
765
766 *sEsc = *tEsc = *s = *t = 0;
767 if(error == TRUE) {
768 log_err("Difference between expected and generated order. Run test with -v for more info\n");
769 } else if(VERBOSITY == 0) {
770 return;
771 }
772 for(i = 0; i<sLen; i++) {
773 sprintf(b, "%04X", source[i]);
774 strcat(sEsc, "\\u");
775 strcat(sEsc, b);
776 strcat(s, b);
777 strcat(s, " ");
778 if(source[i] < 0x80) {
779 sprintf(b, "(%c)", source[i]);
780 strcat(sEsc, b);
781 }
782 }
783 for(i = 0; i<tLen; i++) {
784 sprintf(b, "%04X", target[i]);
785 strcat(tEsc, "\\u");
786 strcat(tEsc, b);
787 strcat(t, b);
788 strcat(t, " ");
789 if(target[i] < 0x80) {
790 sprintf(b, "(%c)", target[i]);
791 strcat(tEsc, b);
792 }
793 }
794 /*
795 strcpy(output, "[[ ");
796 strcat(output, sEsc);
797 strcat(output, getRelationSymbol(expRes, expStrength, relation));
798 strcat(output, tEsc);
799
800 strcat(output, " : ");
801
802 strcat(output, sEsc);
803 strcat(output, getRelationSymbol(realRes, realStrength, relation));
804 strcat(output, tEsc);
805 strcat(output, " ]] ");
806
807 log_verbose("%s", output);
808 */
809
810
811 strcpy(output, "DIFF: ");
812
813 strcat(output, s);
814 strcat(output, " : ");
815 strcat(output, t);
816
817 strcat(output, test);
818 strcat(output, ": ");
819
820 strcat(output, sEsc);
821 strcat(output, getRelationSymbol(expRes, expStrength, relation));
822 strcat(output, tEsc);
823
824 strcat(output, " ");
825
826 strcat(output, platform);
827 strcat(output, ": ");
828
829 strcat(output, sEsc);
830 strcat(output, getRelationSymbol(realRes, realStrength, relation));
831 strcat(output, tEsc);
832
833 outputLength = (int32_t)strlen(output);
834 if(outputLength > maxOutputLength) {
835 maxOutputLength = outputLength;
836 U_ASSERT(outputLength < sizeof(output));
837 }
838
839 log_verbose("%s\n", output);
840
841 }
842
843 /*
844 static void printOutRules(const UChar *rules) {
845 uint32_t len = u_strlen(rules);
846 uint32_t i = 0;
847 char toPrint;
848 uint32_t line = 0;
849
850 fprintf(stdout, "Rules:");
851
852 for(i = 0; i<len; i++) {
853 if(rules[i]<0x7f && rules[i]>=0x20) {
854 toPrint = (char)rules[i];
855 if(toPrint == '&') {
856 line = 1;
857 fprintf(stdout, "\n&");
858 } else if(toPrint == ';') {
859 fprintf(stdout, "<<");
860 line+=2;
861 } else if(toPrint == ',') {
862 fprintf(stdout, "<<<");
863 line+=3;
864 } else {
865 fprintf(stdout, "%c", toPrint);
866 line++;
867 }
868 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
869 fprintf(stdout, "\\u%04X", rules[i]);
870 line+=6;
871 }
872 if(line>72) {
873 fprintf(stdout, "\n");
874 line = 0;
875 }
876 }
877
878 log_verbose("\n");
879
880 }
881 */
882
testSwitch(tst_strcoll * func,void * collator,int opts,uint32_t strength,const UChar * first,const UChar * second,const char * msg,UBool error)883 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
884 uint32_t diffs = 0;
885 UCollationResult realResult;
886 uint32_t realStrength;
887
888 uint32_t sLen = u_strlen(first);
889 uint32_t tLen = u_strlen(second);
890
891 realResult = func(collator, opts, first, sLen, second, tLen);
892 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
893
894 if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
895 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
896 diffs++;
897 } else if(realResult != UCOL_LESS || realStrength != strength) {
898 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
899 diffs++;
900 }
901 return diffs;
902 }
903
904
testAgainstUCA(UCollator * coll,UCollator * UCA,const char * refName,UBool error,UErrorCode * status)905 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
906 const UChar *rules = NULL, *current = NULL;
907 int32_t ruleLen = 0;
908 uint32_t strength = 0;
909 uint32_t chOffset = 0; uint32_t chLen = 0;
910 uint32_t exOffset = 0; uint32_t exLen = 0;
911 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
912 /* uint32_t rExpsLen = 0; */
913 uint32_t firstLen = 0, secondLen = 0;
914 UBool varT = FALSE; UBool top_ = TRUE;
915 uint16_t specs = 0;
916 UBool startOfRules = TRUE;
917 UColTokenParser src;
918 UColOptionSet opts;
919
920 UChar first[256];
921 UChar second[256];
922 UChar *rulesCopy = NULL;
923
924 uint32_t UCAdiff = 0;
925 uint32_t Windiff = 1;
926 UParseError parseError;
927
928 src.opts = &opts;
929
930 rules = ucol_getRules(coll, &ruleLen);
931
932 /*printOutRules(rules);*/
933
934 if(U_SUCCESS(*status) && ruleLen > 0) {
935 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
936 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
937 src.current = src.source = rulesCopy;
938 src.end = rulesCopy+ruleLen;
939 src.extraCurrent = src.end;
940 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
941 *first = *second = 0;
942
943 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
944 strength = src.parsedToken.strength;
945 chOffset = src.parsedToken.charsOffset;
946 chLen = src.parsedToken.charsLen;
947 exOffset = src.parsedToken.extensionOffset;
948 exLen = src.parsedToken.extensionLen;
949 prefixOffset = src.parsedToken.prefixOffset;
950 prefixLen = src.parsedToken.prefixLen;
951 specs = src.parsedToken.flags;
952
953 startOfRules = FALSE;
954 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
955 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
956
957 u_strncpy(second,rulesCopy+chOffset, chLen);
958 second[chLen] = 0;
959 secondLen = chLen;
960
961 if(exLen > 0) {
962 u_strncat(first, rulesCopy+exOffset, exLen);
963 first[firstLen+exLen] = 0;
964 firstLen += exLen;
965 }
966
967 if(strength != UCOL_TOK_RESET) {
968 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
969 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
970 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
971 }
972 }
973
974
975 firstLen = chLen;
976 u_strcpy(first, second);
977
978 }
979 if(UCAdiff != 0 && Windiff != 0) {
980 log_verbose("\n");
981 }
982 if(UCAdiff == 0) {
983 log_verbose("No immediate difference with %s!\n", refName);
984 }
985 if(Windiff == 0) {
986 log_verbose("No immediate difference with Win32!\n");
987 }
988 free(rulesCopy);
989 }
990 }
991
992 /*
993 * Takes two CEs (lead and continuation) and
994 * compares them as CEs should be compared:
995 * primary vs. primary, secondary vs. secondary
996 * tertiary vs. tertiary
997 */
compareCEs(uint32_t s1,uint32_t s2,uint32_t t1,uint32_t t2)998 static int32_t compareCEs(uint32_t s1, uint32_t s2,
999 uint32_t t1, uint32_t t2) {
1000 uint32_t s = 0, t = 0;
1001 if(s1 == t1 && s2 == t2) {
1002 return 0;
1003 }
1004 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1005 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1006 if(s < t) {
1007 return -1;
1008 } else if(s > t) {
1009 return 1;
1010 } else {
1011 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1012 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1013 if(s < t) {
1014 return -1;
1015 } else if(s > t) {
1016 return 1;
1017 } else {
1018 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1019 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1020 if(s < t) {
1021 return -1;
1022 } else {
1023 return 1;
1024 }
1025 }
1026 }
1027 }
1028
1029 typedef struct {
1030 uint32_t startCE;
1031 uint32_t startContCE;
1032 uint32_t limitCE;
1033 uint32_t limitContCE;
1034 } indirectBoundaries;
1035
1036 /* these values are used for finding CE values for indirect positioning. */
1037 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1038 /* values. It only works for resets and you cannot tailor indirect names */
1039 /* An indirect name can define either an anchor point or a range. An */
1040 /* anchor point behaves in exactly the same way as a code point in reset */
1041 /* would, except that it cannot be tailored. A range (we currently only */
1042 /* know for the [top] range will explicitly set the upper bound for */
1043 /* generated CEs, thus allowing for better control over how many CEs can */
1044 /* be squeezed between in the range without performance penalty. */
1045 /* In that respect, we use [top] for tailoring of locales that use CJK */
1046 /* characters. Other indirect values are currently a pure convenience, */
1047 /* they can be used to assure that the CEs will be always positioned in */
1048 /* the same place relative to a point with known properties (e.g. first */
1049 /* primary ignorable). */
1050 static indirectBoundaries ucolIndirectBoundaries[15];
1051 static UBool indirectBoundariesSet = FALSE;
setIndirectBoundaries(uint32_t indexR,uint32_t * start,uint32_t * end)1052 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1053
1054 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1055 /* to initalize here. */
1056 ucolIndirectBoundaries[indexR].startCE = start[0];
1057 ucolIndirectBoundaries[indexR].startContCE = start[1];
1058 if(end) {
1059 ucolIndirectBoundaries[indexR].limitCE = end[0];
1060 ucolIndirectBoundaries[indexR].limitContCE = end[1];
1061 } else {
1062 ucolIndirectBoundaries[indexR].limitCE = 0;
1063 ucolIndirectBoundaries[indexR].limitContCE = 0;
1064 }
1065 }
1066
testCEs(UCollator * coll,UErrorCode * status)1067 static void testCEs(UCollator *coll, UErrorCode *status) {
1068
1069 const UChar *rules = NULL, *current = NULL;
1070 int32_t ruleLen = 0;
1071
1072 uint32_t strength = 0;
1073 uint32_t maxStrength = UCOL_IDENTICAL;
1074 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1075 uint32_t lastCE;
1076 uint32_t lastContCE;
1077
1078 int32_t result = 0;
1079 uint32_t chOffset = 0; uint32_t chLen = 0;
1080 uint32_t exOffset = 0; uint32_t exLen = 0;
1081 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1082 uint32_t oldOffset = 0;
1083
1084 /* uint32_t rExpsLen = 0; */
1085 /* uint32_t firstLen = 0; */
1086 uint16_t specs = 0;
1087 UBool varT = FALSE; UBool top_ = TRUE;
1088 UBool startOfRules = TRUE;
1089 UBool before = FALSE;
1090 UColTokenParser src;
1091 UColOptionSet opts;
1092 UParseError parseError;
1093 UChar *rulesCopy = NULL;
1094 collIterate c;
1095 UCollator *UCA = ucol_open("root", status);
1096 UCAConstants *consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1097 uint32_t UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0], /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1], */
1098 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0], UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1099
1100 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1101
1102 src.opts = &opts;
1103
1104 rules = ucol_getRules(coll, &ruleLen);
1105
1106 src.invUCA = ucol_initInverseUCA(status);
1107
1108 if(indirectBoundariesSet == FALSE) {
1109 /* UCOL_RESET_TOP_VALUE */
1110 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1111 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1112 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1113 /* UCOL_LAST_PRIMARY_IGNORABLE */
1114 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1115 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1116 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1117 /* UCOL_LAST_SECONDARY_IGNORABLE */
1118 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1119 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1120 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1121 /* UCOL_LAST_TERTIARY_IGNORABLE */
1122 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1123 /* UCOL_FIRST_VARIABLE */
1124 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1125 /* UCOL_LAST_VARIABLE */
1126 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1127 /* UCOL_FIRST_NON_VARIABLE */
1128 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1129 /* UCOL_LAST_NON_VARIABLE */
1130 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1131 /* UCOL_FIRST_IMPLICIT */
1132 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1133 /* UCOL_LAST_IMPLICIT */
1134 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1135 /* UCOL_FIRST_TRAILING */
1136 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1137 /* UCOL_LAST_TRAILING */
1138 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1139 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1140 indirectBoundariesSet = TRUE;
1141 }
1142
1143
1144 if(U_SUCCESS(*status) && ruleLen > 0) {
1145 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1146 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1147 src.current = src.source = rulesCopy;
1148 src.end = rulesCopy+ruleLen;
1149 src.extraCurrent = src.end;
1150 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1151
1152 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1153 strength = src.parsedToken.strength;
1154 chOffset = src.parsedToken.charsOffset;
1155 chLen = src.parsedToken.charsLen;
1156 exOffset = src.parsedToken.extensionOffset;
1157 exLen = src.parsedToken.extensionLen;
1158 prefixOffset = src.parsedToken.prefixOffset;
1159 prefixLen = src.parsedToken.prefixLen;
1160 specs = src.parsedToken.flags;
1161
1162 startOfRules = FALSE;
1163 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1164 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1165
1166 uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
1167
1168 currCE = ucol_getNextCE(coll, &c, status);
1169 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
1170 log_verbose("Thai prevowel detected. Will pick next CE\n");
1171 currCE = ucol_getNextCE(coll, &c, status);
1172 }
1173
1174 currContCE = ucol_getNextCE(coll, &c, status);
1175 if(!isContinuation(currContCE)) {
1176 currContCE = 0;
1177 }
1178
1179 /* we need to repack CEs here */
1180
1181 if(strength == UCOL_TOK_RESET) {
1182 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1183 if(top_ == TRUE) {
1184 int32_t index = src.parsedToken.indirectIndex;
1185
1186 nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
1187 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
1188 } else {
1189 nextCE = baseCE = currCE;
1190 nextContCE = baseContCE = currContCE;
1191 }
1192 maxStrength = UCOL_IDENTICAL;
1193 } else {
1194 if(strength < maxStrength) {
1195 maxStrength = strength;
1196 if(baseCE == UCOL_RESET_TOP_VALUE) {
1197 log_verbose("Resetting to [top]\n");
1198 nextCE = UCOL_NEXT_TOP_VALUE;
1199 nextContCE = UCOL_NEXT_TOP_CONT;
1200 } else {
1201 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1202 }
1203 if(result < 0) {
1204 if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
1205 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
1206 return;
1207 } else {
1208 log_err("couldn't find the CE\n");
1209 return;
1210 }
1211 }
1212 }
1213
1214 currCE &= 0xFFFFFF3F;
1215 currContCE &= 0xFFFFFFBF;
1216
1217 if(maxStrength == UCOL_IDENTICAL) {
1218 if(baseCE != currCE || baseContCE != currContCE) {
1219 log_err("current CE (initial strength UCOL_EQUAL)\n");
1220 }
1221 } else {
1222 if(strength == UCOL_IDENTICAL) {
1223 if(lastCE != currCE || lastContCE != currContCE) {
1224 log_err("current CE (initial strength UCOL_EQUAL)\n");
1225 }
1226 } else {
1227 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1228 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1229 log_err("current CE is not less than base CE\n");
1230 }
1231 if(!before) {
1232 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1233 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1234 log_err("sequence of generated CEs is broken\n");
1235 }
1236 } else {
1237 before = FALSE;
1238 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1239 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1240 log_err("sequence of generated CEs is broken\n");
1241 }
1242 }
1243 }
1244 }
1245
1246 }
1247
1248 oldOffset = chOffset;
1249 lastCE = currCE & 0xFFFFFF3F;
1250 lastContCE = currContCE & 0xFFFFFFBF;
1251 }
1252 free(rulesCopy);
1253 }
1254 ucol_close(UCA);
1255 }
1256
1257 #if 0
1258 /* these locales are now picked from index RB */
1259 static const char* localesToTest[] = {
1260 "ar", "bg", "ca", "cs", "da",
1261 "el", "en_BE", "en_US_POSIX",
1262 "es", "et", "fi", "fr", "hi",
1263 "hr", "hu", "is", "iw", "ja",
1264 "ko", "lt", "lv", "mk", "mt",
1265 "nb", "nn", "nn_NO", "pl", "ro",
1266 "ru", "sh", "sk", "sl", "sq",
1267 "sr", "sv", "th", "tr", "uk",
1268 "vi", "zh", "zh_TW"
1269 };
1270 #endif
1271
1272 static const char* rulesToTest[] = {
1273 /* Funky fa rule */
1274 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1275 /*"& Z < p, P",*/
1276 /* Cui Mins rules */
1277 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1278 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1279 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1280 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1281 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1282 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1283 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1284 };
1285
1286
TestCollations(void)1287 static void TestCollations(void) {
1288 int32_t noOfLoc = uloc_countAvailable();
1289 int32_t i = 0, j = 0;
1290
1291 UErrorCode status = U_ZERO_ERROR;
1292 char cName[256];
1293 UChar name[256];
1294 int32_t nameSize;
1295
1296
1297 const char *locName = NULL;
1298 UCollator *coll = NULL;
1299 UCollator *UCA = ucol_open("", &status);
1300 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1301 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1302
1303 for(i = 0; i<noOfLoc; i++) {
1304 status = U_ZERO_ERROR;
1305 locName = uloc_getAvailable(i);
1306 if(uprv_strcmp("ja", locName) == 0) {
1307 log_verbose("Don't know how to test prefixes\n");
1308 continue;
1309 }
1310 if(hasCollationElements(locName)) {
1311 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1312 for(j = 0; j<nameSize; j++) {
1313 cName[j] = (char)name[j];
1314 }
1315 cName[nameSize] = 0;
1316 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1317 coll = ucol_open(locName, &status);
1318 if(U_SUCCESS(status)) {
1319 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1320 ucol_close(coll);
1321 } else {
1322 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1323 status = U_ZERO_ERROR;
1324 }
1325 }
1326 }
1327 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1328 ucol_close(UCA);
1329 }
1330
RamsRulesTest(void)1331 static void RamsRulesTest(void) {
1332 UErrorCode status = U_ZERO_ERROR;
1333 int32_t i = 0;
1334 UCollator *coll = NULL;
1335 UChar rule[2048];
1336 uint32_t ruleLen;
1337 int32_t noOfLoc = uloc_countAvailable();
1338 const char *locName = NULL;
1339
1340 log_verbose("RamsRulesTest\n");
1341
1342 for(i = 0; i<noOfLoc; i++) {
1343 status = U_ZERO_ERROR;
1344 locName = uloc_getAvailable(i);
1345 if(hasCollationElements(locName)) {
1346 if (uprv_strcmp("ja", locName)==0) {
1347 log_verbose("Don't know how to test Japanese because of prefixes\n");
1348 continue;
1349 }
1350 if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1351 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1352 continue;
1353 }
1354 if (uprv_strcmp("km", locName)==0 ||
1355 uprv_strcmp("km_KH", locName)==0 ||
1356 uprv_strcmp("zh", locName)==0 ||
1357 uprv_strcmp("zh_Hant", locName)==0 ) {
1358 continue; /* TODO: enable these locale tests after trac#6040 is fixed. */
1359 }
1360 log_verbose("Testing locale %s\n", locName);
1361 coll = ucol_open(locName, &status);
1362 if(U_SUCCESS(status)) {
1363 if(coll->image->jamoSpecial == TRUE) {
1364 log_err("%s has special JAMOs\n", locName);
1365 }
1366 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1367 testCollator(coll, &status);
1368 testCEs(coll, &status);
1369 ucol_close(coll);
1370 }
1371 }
1372 }
1373
1374 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1375 log_verbose("Testing rule: %s\n", rulesToTest[i]);
1376 ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1377 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1378 if(U_SUCCESS(status)) {
1379 testCollator(coll, &status);
1380 testCEs(coll, &status);
1381 ucol_close(coll);
1382 }
1383 }
1384
1385 }
1386
IsTailoredTest(void)1387 static void IsTailoredTest(void) {
1388 UErrorCode status = U_ZERO_ERROR;
1389 uint32_t i = 0;
1390 UCollator *coll = NULL;
1391 UChar rule[2048];
1392 UChar tailored[2048];
1393 UChar notTailored[2048];
1394 uint32_t ruleLen, tailoredLen, notTailoredLen;
1395
1396 log_verbose("IsTailoredTest\n");
1397
1398 u_uastrcpy(rule, "&Z < A, B, C;c < d");
1399 ruleLen = u_strlen(rule);
1400
1401 u_uastrcpy(tailored, "ABCcd");
1402 tailoredLen = u_strlen(tailored);
1403
1404 u_uastrcpy(notTailored, "ZabD");
1405 notTailoredLen = u_strlen(notTailored);
1406
1407 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1408 if(U_SUCCESS(status)) {
1409 for(i = 0; i<tailoredLen; i++) {
1410 if(!ucol_isTailored(coll, tailored[i], &status)) {
1411 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1412 }
1413 }
1414 for(i = 0; i<notTailoredLen; i++) {
1415 if(ucol_isTailored(coll, notTailored[i], &status)) {
1416 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1417 }
1418 }
1419 ucol_close(coll);
1420 }
1421 else {
1422 log_err("Can't tailor rules");
1423 }
1424 /* Code coverage */
1425 status = U_ZERO_ERROR;
1426 coll = ucol_open("ja", &status);
1427 if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1428 log_err("0x4E9C should be tailored - it is reported as not\n");
1429 }
1430 ucol_close(coll);
1431 }
1432
1433
1434 const static char chTest[][20] = {
1435 "c",
1436 "C",
1437 "ca", "cb", "cx", "cy", "CZ",
1438 "c\\u030C", "C\\u030C",
1439 "h",
1440 "H",
1441 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1442 "ch", "cH", "Ch", "CH",
1443 "cha", "charly", "che", "chh", "chch", "chr",
1444 "i", "I", "iarly",
1445 "r", "R",
1446 "r\\u030C", "R\\u030C",
1447 "s",
1448 "S",
1449 "s\\u030C", "S\\u030C",
1450 "z", "Z",
1451 "z\\u030C", "Z\\u030C"
1452 };
1453
TestChMove(void)1454 static void TestChMove(void) {
1455 UChar t1[256] = {0};
1456 UChar t2[256] = {0};
1457
1458 uint32_t i = 0, j = 0;
1459 uint32_t size = 0;
1460 UErrorCode status = U_ZERO_ERROR;
1461
1462 UCollator *coll = ucol_open("cs", &status);
1463
1464 if(U_SUCCESS(status)) {
1465 size = sizeof(chTest)/sizeof(chTest[0]);
1466 for(i = 0; i < size-1; i++) {
1467 for(j = i+1; j < size; j++) {
1468 u_unescape(chTest[i], t1, 256);
1469 u_unescape(chTest[j], t2, 256);
1470 doTest(coll, t1, t2, UCOL_LESS);
1471 }
1472 }
1473 }
1474 else {
1475 log_err("Can't open collator");
1476 }
1477 ucol_close(coll);
1478 }
1479
1480
1481
1482
1483 const static char impTest[][20] = {
1484 "\\u4e00",
1485 "a",
1486 "A",
1487 "b",
1488 "B",
1489 "\\u4e01"
1490 };
1491
1492
TestImplicitTailoring(void)1493 static void TestImplicitTailoring(void) {
1494 static const struct {
1495 const char *rules;
1496 const char *data[10];
1497 const uint32_t len;
1498 } tests[] = {
1499 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1500 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1501 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1502 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1503 };
1504
1505 int32_t i = 0;
1506
1507 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1508 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1509 }
1510
1511 /*
1512 UChar t1[256] = {0};
1513 UChar t2[256] = {0};
1514
1515 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1516
1517 uint32_t i = 0, j = 0;
1518 uint32_t size = 0;
1519 uint32_t ruleLen = 0;
1520 UErrorCode status = U_ZERO_ERROR;
1521 UCollator *coll = NULL;
1522 ruleLen = u_unescape(rule, t1, 256);
1523
1524 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1525
1526 if(U_SUCCESS(status)) {
1527 size = sizeof(impTest)/sizeof(impTest[0]);
1528 for(i = 0; i < size-1; i++) {
1529 for(j = i+1; j < size; j++) {
1530 u_unescape(impTest[i], t1, 256);
1531 u_unescape(impTest[j], t2, 256);
1532 doTest(coll, t1, t2, UCOL_LESS);
1533 }
1534 }
1535 }
1536 else {
1537 log_err("Can't open collator");
1538 }
1539 ucol_close(coll);
1540 */
1541 }
1542
TestFCDProblem(void)1543 static void TestFCDProblem(void) {
1544 UChar t1[256] = {0};
1545 UChar t2[256] = {0};
1546
1547 const char *s1 = "\\u0430\\u0306\\u0325";
1548 const char *s2 = "\\u04D1\\u0325";
1549
1550 UErrorCode status = U_ZERO_ERROR;
1551 UCollator *coll = ucol_open("", &status);
1552 u_unescape(s1, t1, 256);
1553 u_unescape(s2, t2, 256);
1554
1555 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1556 doTest(coll, t1, t2, UCOL_EQUAL);
1557
1558 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1559 doTest(coll, t1, t2, UCOL_EQUAL);
1560
1561 ucol_close(coll);
1562 }
1563
1564 #define NORM_BUFFER_TEST_LEN 32
1565 typedef struct {
1566 UChar32 u;
1567 UChar NFC[NORM_BUFFER_TEST_LEN];
1568 UChar NFD[NORM_BUFFER_TEST_LEN];
1569 } tester;
1570
TestComposeDecompose(void)1571 static void TestComposeDecompose(void) {
1572 int32_t noOfLoc;
1573 int32_t i = 0, j = 0;
1574
1575 UErrorCode status = U_ZERO_ERROR;
1576
1577 const char *locName = NULL;
1578
1579 uint32_t nfcSize;
1580 uint32_t nfdSize;
1581 tester **t;
1582 uint32_t noCases = 0;
1583 UCollator *coll = NULL;
1584 UChar32 u = 0;
1585 UChar comp[NORM_BUFFER_TEST_LEN];
1586 uint32_t len = 0;
1587 UCollationElements *iter;
1588
1589 noOfLoc = uloc_countAvailable();
1590
1591 t = malloc(0x30000 * sizeof(tester *));
1592 t[0] = (tester *)malloc(sizeof(tester));
1593 log_verbose("Testing UCA extensively\n");
1594 coll = ucol_open("", &status);
1595 if(status == U_FILE_ACCESS_ERROR) {
1596 log_data_err("Is your data around?\n");
1597 return;
1598 } else if(U_FAILURE(status)) {
1599 log_err("Error opening collator\n");
1600 return;
1601 }
1602
1603
1604 for(u = 0; u < 0x30000; u++) {
1605 len = 0;
1606 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1607 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1608 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1609
1610 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1611 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1612 t[noCases]->u = u;
1613 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1614 u_strncpy(t[noCases]->NFC, comp, len);
1615 t[noCases]->NFC[len] = 0;
1616 }
1617 noCases++;
1618 t[noCases] = (tester *)malloc(sizeof(tester));
1619 uprv_memset(t[noCases], 0, sizeof(tester));
1620 }
1621 }
1622
1623 for(u=0; u<(UChar32)noCases; u++) {
1624 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1625 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1626 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1627 }
1628 }
1629 /*
1630 for(u = 0; u < 0x30000; u++) {
1631 if(!(u&0xFFFF)) {
1632 log_verbose("%08X ", u);
1633 }
1634 uprv_memset(t[noCases], 0, sizeof(tester));
1635 t[noCases]->u = u;
1636 len = 0;
1637 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1638 comp[len] = 0;
1639 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1640 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1641 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1642 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1643 }
1644 */
1645
1646 ucol_close(coll);
1647
1648 log_verbose("Testing locales, number of cases = %i\n", noCases);
1649 for(i = 0; i<noOfLoc; i++) {
1650 status = U_ZERO_ERROR;
1651 locName = uloc_getAvailable(i);
1652 if(hasCollationElements(locName)) {
1653 char cName[256];
1654 UChar name[256];
1655 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1656
1657 for(j = 0; j<nameSize; j++) {
1658 cName[j] = (char)name[j];
1659 }
1660 cName[nameSize] = 0;
1661 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1662
1663 coll = ucol_open(locName, &status);
1664 ucol_setStrength(coll, UCOL_IDENTICAL);
1665 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1666
1667 for(u=0; u<(UChar32)noCases; u++) {
1668 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1669 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1670 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1671 log_verbose("Testing NFC\n");
1672 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1673 backAndForth(iter);
1674 log_verbose("Testing NFD\n");
1675 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1676 backAndForth(iter);
1677 }
1678 }
1679 ucol_closeElements(iter);
1680 ucol_close(coll);
1681 }
1682 }
1683 for(u = 0; u <= (UChar32)noCases; u++) {
1684 free(t[u]);
1685 }
1686 free(t);
1687 }
1688
TestEmptyRule(void)1689 static void TestEmptyRule(void) {
1690 UErrorCode status = U_ZERO_ERROR;
1691 UChar rulez[] = { 0 };
1692 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1693
1694 ucol_close(coll);
1695 }
1696
TestUCARules(void)1697 static void TestUCARules(void) {
1698 UErrorCode status = U_ZERO_ERROR;
1699 UChar b[256];
1700 UChar *rules = b;
1701 uint32_t ruleLen = 0;
1702 UCollator *UCAfromRules = NULL;
1703 UCollator *coll = ucol_open("", &status);
1704 if(status == U_FILE_ACCESS_ERROR) {
1705 log_data_err("Is your data around?\n");
1706 return;
1707 } else if(U_FAILURE(status)) {
1708 log_err("Error opening collator\n");
1709 return;
1710 }
1711 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1712
1713 log_verbose("TestUCARules\n");
1714 if(ruleLen > 256) {
1715 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1716 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1717 }
1718 log_verbose("Rules length is %d\n", ruleLen);
1719 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1720 if(U_SUCCESS(status)) {
1721 ucol_close(UCAfromRules);
1722 } else {
1723 log_verbose("Unable to create a collator from UCARules!\n");
1724 }
1725 /*
1726 u_unescape(blah, b, 256);
1727 ucol_getSortKey(coll, b, 1, res, 256);
1728 */
1729 ucol_close(coll);
1730 if(rules != b) {
1731 free(rules);
1732 }
1733 }
1734
1735
1736 /* Pinyin tonal order */
1737 /*
1738 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1739 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1740 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1741 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1742 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1743 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1744 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1745 .. (\u00fc)
1746
1747 However, in testing we got the following order:
1748 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1749 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1750 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1751 .. (\u0113)
1752 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1753 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1754 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1755 .. (\u01d8)
1756 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1757 */
1758
TestBefore(void)1759 static void TestBefore(void) {
1760 const static char *data[] = {
1761 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1762 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1763 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1764 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1765 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1766 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1767 };
1768 genericRulesStarter(
1769 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1770 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1771 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1772 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1773 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1774 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1775 data, sizeof(data)/sizeof(data[0]));
1776 }
1777
1778 #if 0
1779 /* superceded by TestBeforePinyin */
1780 static void TestJ784(void) {
1781 const static char *data[] = {
1782 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1783 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1784 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1785 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1786 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1787 "\\u00fc",
1788 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1789 };
1790 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1791 }
1792 #endif
1793
1794 #if 0
1795 /* superceded by the changes to the lv locale */
1796 static void TestJ831(void) {
1797 const static char *data[] = {
1798 "I",
1799 "i",
1800 "Y",
1801 "y"
1802 };
1803 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1804 }
1805 #endif
1806
TestJ815(void)1807 static void TestJ815(void) {
1808 const static char *data[] = {
1809 "aa",
1810 "Aa",
1811 "ab",
1812 "Ab",
1813 "ad",
1814 "Ad",
1815 "ae",
1816 "Ae",
1817 "\\u00e6",
1818 "\\u00c6",
1819 "af",
1820 "Af",
1821 "b",
1822 "B"
1823 };
1824 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1825 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1826 }
1827
1828
1829 /*
1830 "& a < b < c < d& r < c", "& a < b < d& r < c",
1831 "& a < b < c < d& c < m", "& a < b < c < m < d",
1832 "& a < b < c < d& a < m", "& a < m < b < c < d",
1833 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1834 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1835 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1836 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1837 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1838 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1839 */
TestRedundantRules(void)1840 static void TestRedundantRules(void) {
1841 int32_t i;
1842
1843 static const struct {
1844 const char *rules;
1845 const char *expectedRules;
1846 const char *testdata[8];
1847 uint32_t testdatalen;
1848 } tests[] = {
1849 /* this test conflicts with positioning of CODAN placeholder */
1850 /*{
1851 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1852 "&\\u2089<<<x",
1853 {"\\u2089", "x"}, 2
1854 }, */
1855 /* this test conflicts with the [before x] syntax tightening */
1856 /*{
1857 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1858 "&\\u0252<<<x",
1859 {"\\u0252", "x"}, 2
1860 }, */
1861 /* this test conflicts with the [before x] syntax tightening */
1862 /*{
1863 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1864 "& a <<< x < b <<< c << d <<< e",
1865 {"a", "x", "b", "c", "d", "e"}, 6
1866 }, */
1867 {
1868 "& a < b < c < d& [before 1] c < m",
1869 "& a < b < m < c < d",
1870 {"a", "b", "m", "c", "d"}, 5
1871 },
1872 {
1873 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1874 "& a < b <<< c << d <<< x <<< e",
1875 {"a", "b", "c", "d", "x", "e"}, 6
1876 },
1877 /* this test conflicts with the [before x] syntax tightening */
1878 /* {
1879 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1880 "& a < b <<< c <<< x << d <<< e",
1881 {"a", "b", "c", "x", "d", "e"},, 6
1882 }, */
1883 {
1884 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1885 "& a < b <<< c << d <<< e <<< f < x < g",
1886 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1887 },
1888 {
1889 "& a <<< b << c < d& a < m",
1890 "& a <<< b << c < m < d",
1891 {"a", "b", "c", "m", "d"}, 5
1892 },
1893 {
1894 "&a<b<<b\\u0301 &z<b",
1895 "&a<b\\u0301 &z<b",
1896 {"a", "b\\u0301", "z", "b"}, 4
1897 },
1898 {
1899 "&z<m<<<q<<<m",
1900 "&z<q<<<m",
1901 {"z", "q", "m"},3
1902 },
1903 {
1904 "&z<<<m<q<<<m",
1905 "&z<q<<<m",
1906 {"z", "q", "m"}, 3
1907 },
1908 {
1909 "& a < b < c < d& r < c",
1910 "& a < b < d& r < c",
1911 {"a", "b", "d"}, 3
1912 },
1913 {
1914 "& a < b < c < d& r < c",
1915 "& a < b < d& r < c",
1916 {"r", "c"}, 2
1917 },
1918 {
1919 "& a < b < c < d& c < m",
1920 "& a < b < c < m < d",
1921 {"a", "b", "c", "m", "d"}, 5
1922 },
1923 {
1924 "& a < b < c < d& a < m",
1925 "& a < m < b < c < d",
1926 {"a", "m", "b", "c", "d"}, 5
1927 }
1928 };
1929
1930
1931 UCollator *credundant = NULL;
1932 UCollator *cresulting = NULL;
1933 UErrorCode status = U_ZERO_ERROR;
1934 UChar rlz[2048] = { 0 };
1935 uint32_t rlen = 0;
1936
1937 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
1938 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
1939 rlen = u_unescape(tests[i].rules, rlz, 2048);
1940
1941 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1942 if(status == U_FILE_ACCESS_ERROR) {
1943 log_data_err("Is your data around?\n");
1944 return;
1945 } else if(U_FAILURE(status)) {
1946 log_err("Error opening collator\n");
1947 return;
1948 }
1949
1950 rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
1951 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1952
1953 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
1954
1955 ucol_close(credundant);
1956 ucol_close(cresulting);
1957
1958 log_verbose("testing using data\n");
1959
1960 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
1961 }
1962
1963 }
1964
TestExpansionSyntax(void)1965 static void TestExpansionSyntax(void) {
1966 int32_t i;
1967
1968 const static char *rules[] = {
1969 "&AE <<< a << b <<< c &d <<< f",
1970 "&AE <<< a <<< b << c << d < e < f <<< g",
1971 "&AE <<< B <<< C / D <<< F"
1972 };
1973
1974 const static char *expectedRules[] = {
1975 "&A <<< a / E << b / E <<< c /E &d <<< f",
1976 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
1977 "&A <<< B / E <<< C / ED <<< F / E"
1978 };
1979
1980 const static char *testdata[][8] = {
1981 {"AE", "a", "b", "c"},
1982 {"AE", "a", "b", "c", "d", "e", "f", "g"},
1983 {"AE", "B", "C"} /* / ED <<< F / E"},*/
1984 };
1985
1986 const static uint32_t testdatalen[] = {
1987 4,
1988 8,
1989 3
1990 };
1991
1992
1993
1994 UCollator *credundant = NULL;
1995 UCollator *cresulting = NULL;
1996 UErrorCode status = U_ZERO_ERROR;
1997 UChar rlz[2048] = { 0 };
1998 uint32_t rlen = 0;
1999
2000 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2001 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2002 rlen = u_unescape(rules[i], rlz, 2048);
2003
2004 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2005 if(status == U_FILE_ACCESS_ERROR) {
2006 log_data_err("Is your data around?\n");
2007 return;
2008 } else if(U_FAILURE(status)) {
2009 log_err("Error opening collator\n");
2010 return;
2011 }
2012 rlen = u_unescape(expectedRules[i], rlz, 2048);
2013 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2014
2015 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2016 /* as a hard error test, but only in information mode */
2017 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2018
2019 ucol_close(credundant);
2020 ucol_close(cresulting);
2021
2022 log_verbose("testing using data\n");
2023
2024 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2025 }
2026 }
2027
TestCase(void)2028 static void TestCase(void)
2029 {
2030 const static UChar gRules[MAX_TOKEN_LEN] =
2031 /*" & 0 < 1,\u2461<a,A"*/
2032 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2033
2034 const static UChar testCase[][MAX_TOKEN_LEN] =
2035 {
2036 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2037 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2038 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2039 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2040 };
2041
2042 const static UCollationResult caseTestResults[][9] =
2043 {
2044 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2045 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2046 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2047 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2048 };
2049
2050 const static UColAttributeValue caseTestAttributes[][2] =
2051 {
2052 { UCOL_LOWER_FIRST, UCOL_OFF},
2053 { UCOL_UPPER_FIRST, UCOL_OFF},
2054 { UCOL_LOWER_FIRST, UCOL_ON},
2055 { UCOL_UPPER_FIRST, UCOL_ON}
2056 };
2057 int32_t i,j,k;
2058 UErrorCode status = U_ZERO_ERROR;
2059 UCollationElements *iter;
2060 UCollator *myCollation;
2061 myCollation = ucol_open("en_US", &status);
2062
2063 if(U_FAILURE(status)){
2064 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2065 return;
2066 }
2067 log_verbose("Testing different case settings\n");
2068 ucol_setStrength(myCollation, UCOL_TERTIARY);
2069
2070 for(k = 0; k<4; k++) {
2071 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2072 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2073 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2074 for (i = 0; i < 3 ; i++) {
2075 for(j = i+1; j<4; j++) {
2076 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2077 }
2078 }
2079 }
2080 ucol_close(myCollation);
2081
2082 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2083 if(U_FAILURE(status)){
2084 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2085 return;
2086 }
2087 log_verbose("Testing different case settings with custom rules\n");
2088 ucol_setStrength(myCollation, UCOL_TERTIARY);
2089
2090 for(k = 0; k<4; k++) {
2091 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2092 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2093 for (i = 0; i < 3 ; i++) {
2094 for(j = i+1; j<4; j++) {
2095 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2096 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2097 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2098 backAndForth(iter);
2099 ucol_closeElements(iter);
2100 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2101 backAndForth(iter);
2102 ucol_closeElements(iter);
2103 }
2104 }
2105 }
2106 ucol_close(myCollation);
2107 {
2108 const static char *lowerFirst[] = {
2109 "h",
2110 "H",
2111 "ch",
2112 "Ch",
2113 "CH",
2114 "cha",
2115 "chA",
2116 "Cha",
2117 "ChA",
2118 "CHa",
2119 "CHA",
2120 "i",
2121 "I"
2122 };
2123
2124 const static char *upperFirst[] = {
2125 "H",
2126 "h",
2127 "CH",
2128 "Ch",
2129 "ch",
2130 "CHA",
2131 "CHa",
2132 "ChA",
2133 "Cha",
2134 "chA",
2135 "cha",
2136 "I",
2137 "i"
2138 };
2139 log_verbose("mixed case test\n");
2140 log_verbose("lower first, case level off\n");
2141 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2142 log_verbose("upper first, case level off\n");
2143 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2144 log_verbose("lower first, case level on\n");
2145 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2146 log_verbose("upper first, case level on\n");
2147 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2148 }
2149
2150 }
2151
TestIncrementalNormalize(void)2152 static void TestIncrementalNormalize(void) {
2153
2154 /*UChar baseA =0x61;*/
2155 UChar baseA =0x41;
2156 /* UChar baseB = 0x42;*/
2157 static const UChar ccMix[] = {0x316, 0x321, 0x300};
2158 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2159 /*
2160 0x316 is combining grave accent below, cc=220
2161 0x321 is combining palatalized hook below, cc=202
2162 0x300 is combining grave accent, cc=230
2163 */
2164
2165 int maxSLen = 2000;
2166 /*int maxSLen = 64000;*/
2167 int sLen;
2168 int i;
2169
2170 UCollator *coll;
2171 UErrorCode status = U_ZERO_ERROR;
2172 UCollationResult result;
2173
2174 int32_t myQ = QUICK;
2175
2176 if(QUICK < 0) {
2177 QUICK = 1;
2178 }
2179
2180 {
2181 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2182 /* most buffers along the way.*/
2183 UChar *strA;
2184 UChar *strB;
2185
2186 strA = malloc((maxSLen+1) * sizeof(UChar));
2187 strB = malloc((maxSLen+1) * sizeof(UChar));
2188
2189 coll = ucol_open("en_US", &status);
2190 if(status == U_FILE_ACCESS_ERROR) {
2191 log_data_err("Is your data around?\n");
2192 return;
2193 } else if(U_FAILURE(status)) {
2194 log_err("Error opening collator\n");
2195 return;
2196 }
2197 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2198
2199 /*for (sLen = 257; sLen<maxSLen; sLen++) {*/
2200 /*for (sLen = 4; sLen<maxSLen; sLen++) {*/
2201 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2202 for (sLen = 500; sLen<501; sLen++) {
2203 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2204 strA[0] = baseA;
2205 strB[0] = baseA;
2206 for (i=1; i<=sLen-1; i++) {
2207 strA[i] = ccMix[i % 3];
2208 strB[sLen-i] = ccMix[i % 3];
2209 }
2210 strA[sLen] = 0;
2211 strB[sLen] = 0;
2212
2213 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
2214 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
2215 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
2216 doTest(coll, strA, strB, UCOL_EQUAL);
2217 }
2218 free(strA);
2219 free(strB);
2220 }
2221
2222 QUICK = myQ;
2223
2224
2225 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2226 /* of the string. Checks a couple of edge cases.*/
2227
2228 {
2229 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2230 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2231 ucol_setStrength(coll, UCOL_TERTIARY);
2232 doTest(coll, strA, strB, UCOL_EQUAL);
2233 }
2234
2235 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2236
2237 {
2238 /* New UCA 3.1.1.
2239 * test below used a code point from Desseret, which sorts differently
2240 * than d800 dc00
2241 */
2242 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2243 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2244 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2245 ucol_setStrength(coll, UCOL_TERTIARY);
2246 doTest(coll, strA, strB, UCOL_GREATER);
2247 }
2248
2249 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2250
2251 {
2252 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2253 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2254 char sortKeyA[50];
2255 char sortKeyAz[50];
2256 char sortKeyB[50];
2257 char sortKeyBz[50];
2258 int r;
2259
2260 /* there used to be -3 here. Hmmmm.... */
2261 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2262 result = ucol_strcoll(coll, strA, 3, strB, 3);
2263 if (result != UCOL_GREATER) {
2264 log_err("ERROR 1 in test 4\n");
2265 }
2266 result = ucol_strcoll(coll, strA, -1, strB, -1);
2267 if (result != UCOL_EQUAL) {
2268 log_err("ERROR 2 in test 4\n");
2269 }
2270
2271 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2272 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2273 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2274 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2275
2276 r = strcmp(sortKeyA, sortKeyAz);
2277 if (r <= 0) {
2278 log_err("Error 3 in test 4\n");
2279 }
2280 r = strcmp(sortKeyA, sortKeyB);
2281 if (r <= 0) {
2282 log_err("Error 4 in test 4\n");
2283 }
2284 r = strcmp(sortKeyAz, sortKeyBz);
2285 if (r != 0) {
2286 log_err("Error 5 in test 4\n");
2287 }
2288
2289 ucol_setStrength(coll, UCOL_IDENTICAL);
2290 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2291 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2292 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2293 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2294
2295 r = strcmp(sortKeyA, sortKeyAz);
2296 if (r <= 0) {
2297 log_err("Error 6 in test 4\n");
2298 }
2299 r = strcmp(sortKeyA, sortKeyB);
2300 if (r <= 0) {
2301 log_err("Error 7 in test 4\n");
2302 }
2303 r = strcmp(sortKeyAz, sortKeyBz);
2304 if (r != 0) {
2305 log_err("Error 8 in test 4\n");
2306 }
2307 ucol_setStrength(coll, UCOL_TERTIARY);
2308 }
2309
2310
2311 /* Test 5: Null characters in non-normal source strings.*/
2312
2313 {
2314 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2315 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2316 char sortKeyA[50];
2317 char sortKeyAz[50];
2318 char sortKeyB[50];
2319 char sortKeyBz[50];
2320 int r;
2321
2322 result = ucol_strcoll(coll, strA, 6, strB, 6);
2323 if (result != UCOL_GREATER) {
2324 log_err("ERROR 1 in test 5\n");
2325 }
2326 result = ucol_strcoll(coll, strA, -1, strB, -1);
2327 if (result != UCOL_EQUAL) {
2328 log_err("ERROR 2 in test 5\n");
2329 }
2330
2331 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2332 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2333 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2334 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2335
2336 r = strcmp(sortKeyA, sortKeyAz);
2337 if (r <= 0) {
2338 log_err("Error 3 in test 5\n");
2339 }
2340 r = strcmp(sortKeyA, sortKeyB);
2341 if (r <= 0) {
2342 log_err("Error 4 in test 5\n");
2343 }
2344 r = strcmp(sortKeyAz, sortKeyBz);
2345 if (r != 0) {
2346 log_err("Error 5 in test 5\n");
2347 }
2348
2349 ucol_setStrength(coll, UCOL_IDENTICAL);
2350 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2351 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2352 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2353 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2354
2355 r = strcmp(sortKeyA, sortKeyAz);
2356 if (r <= 0) {
2357 log_err("Error 6 in test 5\n");
2358 }
2359 r = strcmp(sortKeyA, sortKeyB);
2360 if (r <= 0) {
2361 log_err("Error 7 in test 5\n");
2362 }
2363 r = strcmp(sortKeyAz, sortKeyBz);
2364 if (r != 0) {
2365 log_err("Error 8 in test 5\n");
2366 }
2367 ucol_setStrength(coll, UCOL_TERTIARY);
2368 }
2369
2370
2371 /* Test 6: Null character as base of a non-normal combining sequence.*/
2372
2373 {
2374 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2375 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2376
2377 result = ucol_strcoll(coll, strA, 5, strB, 5);
2378 if (result != UCOL_LESS) {
2379 log_err("Error 1 in test 6\n");
2380 }
2381 result = ucol_strcoll(coll, strA, -1, strB, -1);
2382 if (result != UCOL_EQUAL) {
2383 log_err("Error 2 in test 6\n");
2384 }
2385 }
2386
2387 ucol_close(coll);
2388 }
2389
2390
2391
2392 #if 0
2393 static void TestGetCaseBit(void) {
2394 static const char *caseBitData[] = {
2395 "a", "A", "ch", "Ch", "CH",
2396 "\\uFF9E", "\\u0009"
2397 };
2398
2399 static const uint8_t results[] = {
2400 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2401 UCOL_UPPER_CASE, UCOL_LOWER_CASE
2402 };
2403
2404 uint32_t i, blen = 0;
2405 UChar b[256] = {0};
2406 UErrorCode status = U_ZERO_ERROR;
2407 UCollator *UCA = ucol_open("", &status);
2408 uint8_t res = 0;
2409
2410 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2411 blen = u_unescape(caseBitData[i], b, 256);
2412 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2413 if(results[i] != res) {
2414 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2415 }
2416 }
2417 }
2418 #endif
2419
TestHangulTailoring(void)2420 static void TestHangulTailoring(void) {
2421 static const char *koreanData[] = {
2422 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2423 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2424 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2425 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2426 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2427 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2428 };
2429
2430 const char *rules =
2431 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2432 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2433 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2434 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2435 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2436 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2437
2438
2439 UErrorCode status = U_ZERO_ERROR;
2440 UChar rlz[2048] = { 0 };
2441 uint32_t rlen = u_unescape(rules, rlz, 2048);
2442
2443 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2444 if(status == U_FILE_ACCESS_ERROR) {
2445 log_data_err("Is your data around?\n");
2446 return;
2447 } else if(U_FAILURE(status)) {
2448 log_err("Error opening collator\n");
2449 return;
2450 }
2451
2452 log_verbose("Using start of korean rules\n");
2453
2454 if(U_SUCCESS(status)) {
2455 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2456 } else {
2457 log_err("Unable to open collator with rules %s\n", rules);
2458 }
2459
2460 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2461 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
2462 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2463
2464 ucol_close(coll);
2465
2466 log_verbose("Using ko__LOTUS locale\n");
2467 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2468 }
2469
TestCompressOverlap(void)2470 static void TestCompressOverlap(void) {
2471 UChar secstr[150];
2472 UChar tertstr[150];
2473 UErrorCode status = U_ZERO_ERROR;
2474 UCollator *coll;
2475 char result[200];
2476 uint32_t resultlen;
2477 int count = 0;
2478 char *tempptr;
2479
2480 coll = ucol_open("", &status);
2481
2482 if (U_FAILURE(status)) {
2483 log_err("Collator can't be created\n");
2484 return;
2485 }
2486 while (count < 149) {
2487 secstr[count] = 0x0020; /* [06, 05, 05] */
2488 tertstr[count] = 0x0020;
2489 count ++;
2490 }
2491
2492 /* top down compression ----------------------------------- */
2493 secstr[count] = 0x0332; /* [, 87, 05] */
2494 tertstr[count] = 0x3000; /* [06, 05, 07] */
2495
2496 /* no compression secstr should have 150 secondary bytes, tertstr should
2497 have 150 tertiary bytes.
2498 with correct overlapping compression, secstr should have 4 secondary
2499 bytes, tertstr should have > 2 tertiary bytes */
2500 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2501 tempptr = uprv_strchr(result, 1) + 1;
2502 while (*(tempptr + 1) != 1) {
2503 /* the last secondary collation element is not checked since it is not
2504 part of the compression */
2505 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2506 log_err("Secondary compression overlapped\n");
2507 }
2508 tempptr ++;
2509 }
2510
2511 /* tertiary top/bottom/common for en_US is similar to the secondary
2512 top/bottom/common */
2513 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2514 tempptr = uprv_strrchr(result, 1) + 1;
2515 while (*(tempptr + 1) != 0) {
2516 /* the last secondary collation element is not checked since it is not
2517 part of the compression */
2518 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2519 log_err("Tertiary compression overlapped\n");
2520 }
2521 tempptr ++;
2522 }
2523
2524 /* bottom up compression ------------------------------------- */
2525 secstr[count] = 0;
2526 tertstr[count] = 0;
2527 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2528 tempptr = uprv_strchr(result, 1) + 1;
2529 while (*(tempptr + 1) != 1) {
2530 /* the last secondary collation element is not checked since it is not
2531 part of the compression */
2532 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2533 log_err("Secondary compression overlapped\n");
2534 }
2535 tempptr ++;
2536 }
2537
2538 /* tertiary top/bottom/common for en_US is similar to the secondary
2539 top/bottom/common */
2540 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2541 tempptr = uprv_strrchr(result, 1) + 1;
2542 while (*(tempptr + 1) != 0) {
2543 /* the last secondary collation element is not checked since it is not
2544 part of the compression */
2545 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2546 log_err("Tertiary compression overlapped\n");
2547 }
2548 tempptr ++;
2549 }
2550
2551 ucol_close(coll);
2552 }
2553
TestCyrillicTailoring(void)2554 static void TestCyrillicTailoring(void) {
2555 static const char *test[] = {
2556 "\\u0410b",
2557 "\\u0410\\u0306a",
2558 "\\u04d0A"
2559 };
2560
2561 /* Russian overrides contractions, so this test is not valid anymore */
2562 /*genericLocaleStarter("ru", test, 3);*/
2563
2564 genericLocaleStarter("root", test, 3);
2565 genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2566 genericRulesStarter("&Z < \\u0410", test, 3);
2567 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2568 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2569 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2570 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2571 }
2572
TestSuppressContractions(void)2573 static void TestSuppressContractions(void) {
2574
2575 static const char *testNoCont2[] = {
2576 "\\u0410\\u0302a",
2577 "\\u0410\\u0306b",
2578 "\\u0410c"
2579 };
2580 static const char *testNoCont[] = {
2581 "a\\u0410",
2582 "A\\u0410\\u0306",
2583 "\\uFF21\\u0410\\u0302"
2584 };
2585
2586 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2587 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2588 }
2589
TestContraction(void)2590 static void TestContraction(void) {
2591 const static char *testrules[] = {
2592 "&A = AB / B",
2593 "&A = A\\u0306/\\u0306",
2594 "&c = ch / h"
2595 };
2596 const static UChar testdata[][2] = {
2597 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2598 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2599 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2600 };
2601 const static UChar testdata2[][2] = {
2602 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2603 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2604 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2605 };
2606 const static char *testrules3[] = {
2607 "&z < xyz &xyzw << B",
2608 "&z < xyz &xyz << B / w",
2609 "&z < ch &achm << B",
2610 "&z < ch &a << B / chm",
2611 "&\\ud800\\udc00w << B",
2612 "&\\ud800\\udc00 << B / w",
2613 "&a\\ud800\\udc00m << B",
2614 "&a << B / \\ud800\\udc00m",
2615 };
2616
2617 UErrorCode status = U_ZERO_ERROR;
2618 UCollator *coll;
2619 UChar rule[256] = {0};
2620 uint32_t rlen = 0;
2621 int i;
2622
2623 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2624 UCollationElements *iter1;
2625 int j = 0;
2626 log_verbose("Rule %s for testing\n", testrules[i]);
2627 rlen = u_unescape(testrules[i], rule, 32);
2628 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2629 if (U_FAILURE(status)) {
2630 log_err("Collator creation failed %s\n", testrules[i]);
2631 return;
2632 }
2633 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2634 if (U_FAILURE(status)) {
2635 log_err("Collation iterator creation failed\n");
2636 return;
2637 }
2638 while (j < 2) {
2639 UCollationElements *iter2 = ucol_openElements(coll,
2640 &(testdata[i][j]),
2641 1, &status);
2642 uint32_t ce;
2643 if (U_FAILURE(status)) {
2644 log_err("Collation iterator creation failed\n");
2645 return;
2646 }
2647 ce = ucol_next(iter2, &status);
2648 while (ce != UCOL_NULLORDER) {
2649 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2650 log_err("Collation elements in contraction split does not match\n");
2651 return;
2652 }
2653 ce = ucol_next(iter2, &status);
2654 }
2655 j ++;
2656 ucol_closeElements(iter2);
2657 }
2658 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2659 log_err("Collation elements not exhausted\n");
2660 return;
2661 }
2662 ucol_closeElements(iter1);
2663 ucol_close(coll);
2664 }
2665
2666 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2667 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2668 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2669 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2670 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2671 testdata2[1][1]);
2672 return;
2673 }
2674 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2675 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2676 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2677 testdata2[2][1]);
2678 return;
2679 }
2680 ucol_close(coll);
2681
2682 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2683 UCollator *coll1,
2684 *coll2;
2685 UCollationElements *iter1,
2686 *iter2;
2687 UChar ch = 0x0042 /* 'B' */;
2688 uint32_t ce;
2689 rlen = u_unescape(testrules3[i], rule, 32);
2690 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2691 rlen = u_unescape(testrules3[i + 1], rule, 32);
2692 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2693 if (U_FAILURE(status)) {
2694 log_err("Collator creation failed %s\n", testrules[i]);
2695 return;
2696 }
2697 iter1 = ucol_openElements(coll1, &ch, 1, &status);
2698 iter2 = ucol_openElements(coll2, &ch, 1, &status);
2699 if (U_FAILURE(status)) {
2700 log_err("Collation iterator creation failed\n");
2701 return;
2702 }
2703 ce = ucol_next(iter1, &status);
2704 if (U_FAILURE(status)) {
2705 log_err("Retrieving ces failed\n");
2706 return;
2707 }
2708 while (ce != UCOL_NULLORDER) {
2709 if (ce != (uint32_t)ucol_next(iter2, &status)) {
2710 log_err("CEs does not match\n");
2711 return;
2712 }
2713 ce = ucol_next(iter1, &status);
2714 if (U_FAILURE(status)) {
2715 log_err("Retrieving ces failed\n");
2716 return;
2717 }
2718 }
2719 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2720 log_err("CEs not exhausted\n");
2721 return;
2722 }
2723 ucol_closeElements(iter1);
2724 ucol_closeElements(iter2);
2725 ucol_close(coll1);
2726 ucol_close(coll2);
2727 }
2728 }
2729
TestExpansion(void)2730 static void TestExpansion(void) {
2731 const static char *testrules[] = {
2732 "&J << K / B & K << M",
2733 "&J << K / B << M"
2734 };
2735 const static UChar testdata[][3] = {
2736 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2737 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2738 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2739 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2740 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2741 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2742 };
2743
2744 UErrorCode status = U_ZERO_ERROR;
2745 UCollator *coll;
2746 UChar rule[256] = {0};
2747 uint32_t rlen = 0;
2748 int i;
2749
2750 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2751 int j = 0;
2752 log_verbose("Rule %s for testing\n", testrules[i]);
2753 rlen = u_unescape(testrules[i], rule, 32);
2754 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2755 if (U_FAILURE(status)) {
2756 log_err("Collator creation failed %s\n", testrules[i]);
2757 return;
2758 }
2759
2760 for (j = 0; j < 5; j ++) {
2761 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2762 }
2763 ucol_close(coll);
2764 }
2765 }
2766
2767 #if 0
2768 /* this test tests the current limitations of the engine */
2769 /* it always fail, so it is disabled by default */
2770 static void TestLimitations(void) {
2771 /* recursive expansions */
2772 {
2773 static const char *rule = "&a=b/c&d=c/e";
2774 static const char *tlimit01[] = {"add","b","adf"};
2775 static const char *tlimit02[] = {"aa","b","af"};
2776 log_verbose("recursive expansions\n");
2777 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2778 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2779 }
2780 /* contractions spanning expansions */
2781 {
2782 static const char *rule = "&a<<<c/e&g<<<eh";
2783 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2784 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2785 log_verbose("contractions spanning expansions\n");
2786 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2787 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2788 }
2789 /* normalization: nulls in contractions */
2790 {
2791 static const char *rule = "&a<<<\\u0000\\u0302";
2792 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2793 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2794 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2795 static const UColAttributeValue valOn[] = { UCOL_ON };
2796 static const UColAttributeValue valOff[] = { UCOL_OFF };
2797
2798 log_verbose("NULL in contractions\n");
2799 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2800 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2801 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2802 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2803
2804 }
2805 /* normalization: contractions spanning normalization */
2806 {
2807 static const char *rule = "&a<<<\\u0000\\u0302";
2808 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2809 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2810 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2811 static const UColAttributeValue valOn[] = { UCOL_ON };
2812 static const UColAttributeValue valOff[] = { UCOL_OFF };
2813
2814 log_verbose("contractions spanning normalization\n");
2815 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2816 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2817 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2818 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2819
2820 }
2821 /* variable top: */
2822 {
2823 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2824 static const char *rule = "&\\u2010<x<[variable top]=z";
2825 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2826 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2827 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2828 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2829 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2830 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2831 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2832
2833 log_verbose("variable top\n");
2834 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2835 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2836 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2837 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2838 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2839
2840 }
2841 /* case level */
2842 {
2843 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2844 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2845 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2846 static const UColAttribute att[] = { UCOL_CASE_FIRST};
2847 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2848 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2849 log_verbose("case level\n");
2850 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2851 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2852 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2853 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2854 }
2855
2856 }
2857 #endif
2858
TestBocsuCoverage(void)2859 static void TestBocsuCoverage(void) {
2860 UErrorCode status = U_ZERO_ERROR;
2861 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2862 UChar test[256] = {0};
2863 uint32_t tlen = u_unescape(testString, test, 32);
2864 uint8_t key[256] = {0};
2865 uint32_t klen = 0;
2866
2867 UCollator *coll = ucol_open("", &status);
2868 if(U_SUCCESS(status)) {
2869 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2870
2871 klen = ucol_getSortKey(coll, test, tlen, key, 256);
2872
2873 ucol_close(coll);
2874 } else {
2875 log_data_err("Couldn't open UCA\n");
2876 }
2877 }
2878
TestVariableTopSetting(void)2879 static void TestVariableTopSetting(void) {
2880 UErrorCode status = U_ZERO_ERROR;
2881 const UChar *current = NULL;
2882 uint32_t varTopOriginal = 0, varTop1, varTop2;
2883 UCollator *coll = ucol_open("", &status);
2884 if(U_SUCCESS(status)) {
2885
2886 uint32_t strength = 0;
2887 uint16_t specs = 0;
2888 uint32_t chOffset = 0;
2889 uint32_t chLen = 0;
2890 uint32_t exOffset = 0;
2891 uint32_t exLen = 0;
2892 uint32_t oldChOffset = 0;
2893 uint32_t oldChLen = 0;
2894 uint32_t oldExOffset = 0;
2895 uint32_t oldExLen = 0;
2896 uint32_t prefixOffset = 0;
2897 uint32_t prefixLen = 0;
2898
2899 UBool startOfRules = TRUE;
2900 UColTokenParser src;
2901 UColOptionSet opts;
2902
2903 UChar *rulesCopy = NULL;
2904 uint32_t rulesLen;
2905
2906 UCollationResult result;
2907
2908 UChar first[256] = { 0 };
2909 UChar second[256] = { 0 };
2910 UParseError parseError;
2911 int32_t myQ = QUICK;
2912
2913 src.opts = &opts;
2914
2915 if(QUICK <= 0) {
2916 QUICK = 1;
2917 }
2918
2919 /* this test will fail when normalization is turned on */
2920 /* therefore we always turn off exhaustive mode for it */
2921 { /* QUICK > 0*/
2922 log_verbose("Slide variable top over UCARules\n");
2923 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2924 rulesCopy = (UChar *)malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
2925 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
2926
2927 if(U_SUCCESS(status) && rulesLen > 0) {
2928 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2929 src.current = src.source = rulesCopy;
2930 src.end = rulesCopy+rulesLen;
2931 src.extraCurrent = src.end;
2932 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
2933
2934 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
2935 strength = src.parsedToken.strength;
2936 chOffset = src.parsedToken.charsOffset;
2937 chLen = src.parsedToken.charsLen;
2938 exOffset = src.parsedToken.extensionOffset;
2939 exLen = src.parsedToken.extensionLen;
2940 prefixOffset = src.parsedToken.prefixOffset;
2941 prefixLen = src.parsedToken.prefixLen;
2942 specs = src.parsedToken.flags;
2943
2944 startOfRules = FALSE;
2945 {
2946 log_verbose("%04X %d ", *(rulesCopy+chOffset), chLen);
2947 }
2948 if(strength == UCOL_PRIMARY) {
2949 status = U_ZERO_ERROR;
2950 varTopOriginal = ucol_getVariableTop(coll, &status);
2951 varTop1 = ucol_setVariableTop(coll, rulesCopy+oldChOffset, oldChLen, &status);
2952 if(U_FAILURE(status)) {
2953 char buffer[256];
2954 char *buf = buffer;
2955 uint32_t i = 0, j;
2956 uint32_t CE = UCOL_NO_MORE_CES;
2957
2958 /* before we start screaming, let's see if there is a problem with the rules */
2959 collIterate s;
2960 uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s);
2961
2962 CE = ucol_getNextCE(coll, &s, &status);
2963
2964 for(i = 0; i < oldChLen; i++) {
2965 j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i));
2966 buf += j;
2967 }
2968 if(status == U_PRIMARY_TOO_LONG_ERROR) {
2969 log_verbose("= Expected failure for %s =", buffer);
2970 } else {
2971 if(s.pos == s.endp) {
2972 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
2973 oldChOffset, u_errorName(status), buffer);
2974 } else {
2975 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
2976 buffer);
2977 }
2978 }
2979 }
2980 varTop2 = ucol_getVariableTop(coll, &status);
2981 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
2982 log_err("cannot retrieve set varTop value!\n");
2983 continue;
2984 }
2985
2986 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
2987
2988 u_strncpy(first, rulesCopy+oldChOffset, oldChLen);
2989 u_strncpy(first+oldChLen, rulesCopy+chOffset, chLen);
2990 u_strncpy(first+oldChLen+chLen, rulesCopy+oldChOffset, oldChLen);
2991 first[2*oldChLen+chLen] = 0;
2992
2993 if(oldExLen == 0) {
2994 u_strncpy(second, rulesCopy+chOffset, chLen);
2995 second[chLen] = 0;
2996 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
2997 u_strncpy(second, rulesCopy+oldExOffset, oldExLen);
2998 u_strncpy(second+oldChLen, rulesCopy+chOffset, chLen);
2999 u_strncpy(second+oldChLen+chLen, rulesCopy+oldExOffset, oldExLen);
3000 second[2*oldExLen+chLen] = 0;
3001 }
3002 result = ucol_strcoll(coll, first, -1, second, -1);
3003 if(result == UCOL_EQUAL) {
3004 doTest(coll, first, second, UCOL_EQUAL);
3005 } else {
3006 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy+oldChOffset), *(rulesCopy+chOffset));
3007 }
3008 }
3009 }
3010 if(strength != UCOL_TOK_RESET) {
3011 oldChOffset = chOffset;
3012 oldChLen = chLen;
3013 oldExOffset = exOffset;
3014 oldExLen = exLen;
3015 }
3016 }
3017 status = U_ZERO_ERROR;
3018 }
3019 else {
3020 log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3021 return;
3022 }
3023 if (U_FAILURE(status)) {
3024 log_err("Error parsing rules %s\n", u_errorName(status));
3025 return;
3026 }
3027 status = U_ZERO_ERROR;
3028 }
3029
3030 QUICK = myQ;
3031
3032 log_verbose("Testing setting variable top to contractions\n");
3033 {
3034 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3035 /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3036 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3037 while(*conts != 0) {
3038 if(*(conts+2) == 0) {
3039 varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3040 } else {
3041 varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3042 }
3043 if(U_FAILURE(status)) {
3044 log_err("Couldn't set variable top to a contraction %04X %04X %04X\n",
3045 *conts, *(conts+1), *(conts+2));
3046 status = U_ZERO_ERROR;
3047 }
3048 conts+=3;
3049 }
3050
3051 status = U_ZERO_ERROR;
3052
3053 first[0] = 0x0040;
3054 first[1] = 0x0050;
3055 first[2] = 0x0000;
3056
3057 ucol_setVariableTop(coll, first, -1, &status);
3058
3059 if(U_SUCCESS(status)) {
3060 log_err("Invalid contraction succeded in setting variable top!\n");
3061 }
3062
3063 }
3064
3065 log_verbose("Test restoring variable top\n");
3066
3067 status = U_ZERO_ERROR;
3068 ucol_restoreVariableTop(coll, varTopOriginal, &status);
3069 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3070 log_err("Couldn't restore old variable top\n");
3071 }
3072
3073 log_verbose("Testing calling with error set\n");
3074
3075 status = U_INTERNAL_PROGRAM_ERROR;
3076 varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3077 varTop2 = ucol_getVariableTop(coll, &status);
3078 ucol_restoreVariableTop(coll, varTop2, &status);
3079 varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3080 varTop2 = ucol_getVariableTop(NULL, &status);
3081 ucol_restoreVariableTop(NULL, varTop2, &status);
3082 if(status != U_INTERNAL_PROGRAM_ERROR) {
3083 log_err("Bad reaction to passed error!\n");
3084 }
3085 free(rulesCopy);
3086 ucol_close(coll);
3087 } else {
3088 log_data_err("Couldn't open UCA collator\n");
3089 }
3090
3091 }
3092
TestNonChars(void)3093 static void TestNonChars(void) {
3094 static const char *test[] = {
3095 "\\u0000",
3096 "\\uFFFE", "\\uFFFF",
3097 "\\U0001FFFE", "\\U0001FFFF",
3098 "\\U0002FFFE", "\\U0002FFFF",
3099 "\\U0003FFFE", "\\U0003FFFF",
3100 "\\U0004FFFE", "\\U0004FFFF",
3101 "\\U0005FFFE", "\\U0005FFFF",
3102 "\\U0006FFFE", "\\U0006FFFF",
3103 "\\U0007FFFE", "\\U0007FFFF",
3104 "\\U0008FFFE", "\\U0008FFFF",
3105 "\\U0009FFFE", "\\U0009FFFF",
3106 "\\U000AFFFE", "\\U000AFFFF",
3107 "\\U000BFFFE", "\\U000BFFFF",
3108 "\\U000CFFFE", "\\U000CFFFF",
3109 "\\U000DFFFE", "\\U000DFFFF",
3110 "\\U000EFFFE", "\\U000EFFFF",
3111 "\\U000FFFFE", "\\U000FFFFF",
3112 "\\U0010FFFE", "\\U0010FFFF"
3113 };
3114 UErrorCode status = U_ZERO_ERROR;
3115 UCollator *coll = ucol_open("en_US", &status);
3116
3117 log_verbose("Test non characters\n");
3118
3119 if(U_SUCCESS(status)) {
3120 genericOrderingTestWithResult(coll, test, 35, UCOL_EQUAL);
3121 } else {
3122 log_err("Unable to open collator\n");
3123 }
3124
3125 ucol_close(coll);
3126 }
3127
TestExtremeCompression(void)3128 static void TestExtremeCompression(void) {
3129 static char *test[4];
3130 int32_t j = 0, i = 0;
3131
3132 for(i = 0; i<4; i++) {
3133 test[i] = (char *)malloc(2048*sizeof(char));
3134 }
3135
3136 for(j = 20; j < 500; j++) {
3137 for(i = 0; i<4; i++) {
3138 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3139 test[i][j-1] = (char)('a'+i);
3140 test[i][j] = 0;
3141 }
3142 genericLocaleStarter("en_US", (const char **)test, 4);
3143 }
3144
3145
3146 for(i = 0; i<4; i++) {
3147 free(test[i]);
3148 }
3149 }
3150
3151 #if 0
3152 static void TestExtremeCompression(void) {
3153 static char *test[4];
3154 int32_t j = 0, i = 0;
3155 UErrorCode status = U_ZERO_ERROR;
3156 UCollator *coll = ucol_open("en_US", status);
3157 for(i = 0; i<4; i++) {
3158 test[i] = (char *)malloc(2048*sizeof(char));
3159 }
3160 for(j = 10; j < 2048; j++) {
3161 for(i = 0; i<4; i++) {
3162 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3163 test[i][j-1] = (char)('a'+i);
3164 test[i][j] = 0;
3165 }
3166 }
3167 genericLocaleStarter("en_US", (const char **)test, 4);
3168
3169 for(j = 10; j < 2048; j++) {
3170 for(i = 0; i<1; i++) {
3171 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3172 test[i][j] = 0;
3173 }
3174 }
3175 for(i = 0; i<4; i++) {
3176 free(test[i]);
3177 }
3178 }
3179 #endif
3180
TestSurrogates(void)3181 static void TestSurrogates(void) {
3182 static const char *test[] = {
3183 "z","\\ud900\\udc25", "\\ud805\\udc50",
3184 "\\ud800\\udc00y", "\\ud800\\udc00r",
3185 "\\ud800\\udc00f", "\\ud800\\udc00",
3186 "\\ud800\\udc00c", "\\ud800\\udc00b",
3187 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3188 "\\ud800\\udc00a",
3189 "c", "b"
3190 };
3191
3192 static const char *rule =
3193 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3194 "< \\ud800\\udc00y < \\ud800\\udc00r"
3195 "< \\ud800\\udc00f << \\ud800\\udc00"
3196 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3197 "< \\ud800\\udc00a < c < b" ;
3198
3199 genericRulesStarter(rule, test, 14);
3200 }
3201
3202 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)3203 static void TestPrefix(void) {
3204 uint32_t i;
3205
3206 static const struct {
3207 const char *rules;
3208 const char *data[50];
3209 const uint32_t len;
3210 } tests[] = {
3211 { "&z <<< z|a",
3212 {"zz", "za"}, 2 },
3213
3214 { "&z <<< z| a",
3215 {"zz", "za"}, 2 },
3216 { "[strength I]"
3217 "&a=\\ud900\\udc25"
3218 "&z<<<\\ud900\\udc25|a",
3219 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3220 };
3221
3222
3223 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3224 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3225 }
3226 }
3227
3228 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3229 /* JIS X 4061 collation order implementation */
TestNewJapanese(void)3230 static void TestNewJapanese(void) {
3231
3232 static const char * const test1[] = {
3233 "\\u30b7\\u30e3\\u30fc\\u30ec",
3234 "\\u30b7\\u30e3\\u30a4",
3235 "\\u30b7\\u30e4\\u30a3",
3236 "\\u30b7\\u30e3\\u30ec",
3237 "\\u3061\\u3087\\u3053",
3238 "\\u3061\\u3088\\u3053",
3239 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3240 "\\u3066\\u30fc\\u305f",
3241 "\\u30c6\\u30fc\\u30bf",
3242 "\\u30c6\\u30a7\\u30bf",
3243 "\\u3066\\u3048\\u305f",
3244 "\\u3067\\u30fc\\u305f",
3245 "\\u30c7\\u30fc\\u30bf",
3246 "\\u30c7\\u30a7\\u30bf",
3247 "\\u3067\\u3048\\u305f",
3248 "\\u3066\\u30fc\\u305f\\u30fc",
3249 "\\u30c6\\u30fc\\u30bf\\u30a1",
3250 "\\u30c6\\u30a7\\u30bf\\u30fc",
3251 "\\u3066\\u3047\\u305f\\u3041",
3252 "\\u3066\\u3048\\u305f\\u30fc",
3253 "\\u3067\\u30fc\\u305f\\u30fc",
3254 "\\u30c7\\u30fc\\u30bf\\u30a1",
3255 "\\u3067\\u30a7\\u305f\\u30a1",
3256 "\\u30c7\\u3047\\u30bf\\u3041",
3257 "\\u30c7\\u30a8\\u30bf\\u30a2",
3258 "\\u3072\\u3086",
3259 "\\u3073\\u3085\\u3042",
3260 "\\u3074\\u3085\\u3042",
3261 "\\u3073\\u3085\\u3042\\u30fc",
3262 "\\u30d3\\u30e5\\u30a2\\u30fc",
3263 "\\u3074\\u3085\\u3042\\u30fc",
3264 "\\u30d4\\u30e5\\u30a2\\u30fc",
3265 "\\u30d2\\u30e5\\u30a6",
3266 "\\u30d2\\u30e6\\u30a6",
3267 "\\u30d4\\u30e5\\u30a6\\u30a2",
3268 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3269 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3270 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3271 "\\u3072\\u3085\\u3093",
3272 "\\u3074\\u3085\\u3093",
3273 "\\u3075\\u30fc\\u308a",
3274 "\\u30d5\\u30fc\\u30ea",
3275 "\\u3075\\u3045\\u308a",
3276 "\\u3075\\u30a5\\u308a",
3277 "\\u3075\\u30a5\\u30ea",
3278 "\\u30d5\\u30a6\\u30ea",
3279 "\\u3076\\u30fc\\u308a",
3280 "\\u30d6\\u30fc\\u30ea",
3281 "\\u3076\\u3045\\u308a",
3282 "\\u30d6\\u30a5\\u308a",
3283 "\\u3077\\u3046\\u308a",
3284 "\\u30d7\\u30a6\\u30ea",
3285 "\\u3075\\u30fc\\u308a\\u30fc",
3286 "\\u30d5\\u30a5\\u30ea\\u30fc",
3287 "\\u3075\\u30a5\\u308a\\u30a3",
3288 "\\u30d5\\u3045\\u308a\\u3043",
3289 "\\u30d5\\u30a6\\u30ea\\u30fc",
3290 "\\u3075\\u3046\\u308a\\u3043",
3291 "\\u30d6\\u30a6\\u30ea\\u30a4",
3292 "\\u3077\\u30fc\\u308a\\u30fc",
3293 "\\u3077\\u30a5\\u308a\\u30a4",
3294 "\\u3077\\u3046\\u308a\\u30fc",
3295 "\\u30d7\\u30a6\\u30ea\\u30a4",
3296 "\\u30d5\\u30fd",
3297 "\\u3075\\u309e",
3298 "\\u3076\\u309d",
3299 "\\u3076\\u3075",
3300 "\\u3076\\u30d5",
3301 "\\u30d6\\u3075",
3302 "\\u30d6\\u30d5",
3303 "\\u3076\\u309e",
3304 "\\u3076\\u3077",
3305 "\\u30d6\\u3077",
3306 "\\u3077\\u309d",
3307 "\\u30d7\\u30fd",
3308 "\\u3077\\u3075",
3309 };
3310
3311 static const char *test2[] = {
3312 "\\u306f\\u309d", /* H\\u309d */
3313 "\\u30cf\\u30fd", /* K\\u30fd */
3314 "\\u306f\\u306f", /* HH */
3315 "\\u306f\\u30cf", /* HK */
3316 "\\u30cf\\u30cf", /* KK */
3317 "\\u306f\\u309e", /* H\\u309e */
3318 "\\u30cf\\u30fe", /* K\\u30fe */
3319 "\\u306f\\u3070", /* HH\\u309b */
3320 "\\u30cf\\u30d0", /* KK\\u309b */
3321 "\\u306f\\u3071", /* HH\\u309c */
3322 "\\u30cf\\u3071", /* KH\\u309c */
3323 "\\u30cf\\u30d1", /* KK\\u309c */
3324 "\\u3070\\u309d", /* H\\u309b\\u309d */
3325 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3326 "\\u3070\\u306f", /* H\\u309bH */
3327 "\\u30d0\\u30cf", /* K\\u309bK */
3328 "\\u3070\\u309e", /* H\\u309b\\u309e */
3329 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3330 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3331 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3332 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3333 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3334 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3335 "\\u3071\\u309d", /* H\\u309c\\u309d */
3336 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3337 "\\u3071\\u306f", /* H\\u309cH */
3338 "\\u30d1\\u30cf", /* K\\u309cK */
3339 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3340 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3341 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3342 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3343 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3344 };
3345 /*
3346 static const char *test3[] = {
3347 "\\u221er\\u221e",
3348 "\\u221eR#",
3349 "\\u221et\\u221e",
3350 "#r\\u221e",
3351 "#R#",
3352 "#t%",
3353 "#T%",
3354 "8t\\u221e",
3355 "8T\\u221e",
3356 "8t#",
3357 "8T#",
3358 "8t%",
3359 "8T%",
3360 "8t8",
3361 "8T8",
3362 "\\u03c9r\\u221e",
3363 "\\u03a9R%",
3364 "rr\\u221e",
3365 "rR\\u221e",
3366 "Rr\\u221e",
3367 "RR\\u221e",
3368 "RT%",
3369 "rt8",
3370 "tr\\u221e",
3371 "tr8",
3372 "TR8",
3373 "tt8",
3374 "\\u30b7\\u30e3\\u30fc\\u30ec",
3375 };
3376 */
3377 static const UColAttribute att[] = { UCOL_STRENGTH };
3378 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3379
3380 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3381 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3382
3383 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3384 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3385 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3386 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3387 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3388 }
3389
TestStrCollIdenticalPrefix(void)3390 static void TestStrCollIdenticalPrefix(void) {
3391 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3392 const char* test[] = {
3393 "ab\\ud9b0\\udc70",
3394 "ab\\ud9b0\\udc71"
3395 };
3396 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3397 }
3398 /* Contractions should have all their canonically equivalent */
3399 /* strings included */
TestContractionClosure(void)3400 static void TestContractionClosure(void) {
3401 static const struct {
3402 const char *rules;
3403 const char *data[10];
3404 const uint32_t len;
3405 } tests[] = {
3406 { "&b=\\u00e4\\u00e4",
3407 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3408 { "&b=\\u00C5",
3409 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3410 };
3411 uint32_t i;
3412
3413
3414 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3415 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3416 }
3417 }
3418
3419 /* This tests also fails*/
TestBeforePrefixFailure(void)3420 static void TestBeforePrefixFailure(void) {
3421 static const struct {
3422 const char *rules;
3423 const char *data[10];
3424 const uint32_t len;
3425 } tests[] = {
3426 { "&g <<< a"
3427 "&[before 3]\\uff41 <<< x",
3428 {"x", "\\uff41"}, 2 },
3429 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3430 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3431 "&[before 3]\\u30a7<<<\\u30a9",
3432 {"\\u30a9", "\\u30a7"}, 2 },
3433 { "&[before 3]\\u30a7<<<\\u30a9"
3434 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3435 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3436 {"\\u30a9", "\\u30a7"}, 2 },
3437 };
3438 uint32_t i;
3439
3440
3441 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3442 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3443 }
3444
3445 #if 0
3446 const char* rule1 =
3447 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3448 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3449 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3450 const char* rule2 =
3451 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3452 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3453 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3454 const char* test[] = {
3455 "\\u30c6\\u30fc\\u30bf",
3456 "\\u30c6\\u30a7\\u30bf",
3457 };
3458 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3459 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3460 /* this piece of code should be in some sort of verbose mode */
3461 /* it gets the collation elements for elements and prints them */
3462 /* This is useful when trying to see whether the problem is */
3463 {
3464 UErrorCode status = U_ZERO_ERROR;
3465 uint32_t i = 0;
3466 UCollationElements *it = NULL;
3467 uint32_t CE;
3468 UChar string[256];
3469 uint32_t uStringLen;
3470 UCollator *coll = NULL;
3471
3472 uStringLen = u_unescape(rule1, string, 256);
3473
3474 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3475
3476 /*coll = ucol_open("ja_JP_JIS", &status);*/
3477 it = ucol_openElements(coll, string, 0, &status);
3478
3479 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3480 log_verbose("%s\n", test[i]);
3481 uStringLen = u_unescape(test[i], string, 256);
3482 ucol_setText(it, string, uStringLen, &status);
3483
3484 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3485 log_verbose("%08X\n", CE);
3486 }
3487 log_verbose("\n");
3488
3489 }
3490
3491 ucol_closeElements(it);
3492 ucol_close(coll);
3493 }
3494 #endif
3495 }
3496
TestPrefixCompose(void)3497 static void TestPrefixCompose(void) {
3498 const char* rule1 =
3499 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3500 /*
3501 const char* test[] = {
3502 "\\u30c6\\u30fc\\u30bf",
3503 "\\u30c6\\u30a7\\u30bf",
3504 };
3505 */
3506 {
3507 UErrorCode status = U_ZERO_ERROR;
3508 /*uint32_t i = 0;*/
3509 /*UCollationElements *it = NULL;*/
3510 /* uint32_t CE;*/
3511 UChar string[256];
3512 uint32_t uStringLen;
3513 UCollator *coll = NULL;
3514
3515 uStringLen = u_unescape(rule1, string, 256);
3516
3517 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3518 ucol_close(coll);
3519 }
3520
3521
3522 }
3523
3524 /*
3525 [last variable] last variable value
3526 [last primary ignorable] largest CE for primary ignorable
3527 [last secondary ignorable] largest CE for secondary ignorable
3528 [last tertiary ignorable] largest CE for tertiary ignorable
3529 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3530 */
3531
TestRuleOptions(void)3532 static void TestRuleOptions(void) {
3533 /* values here are hardcoded and are correct for the current UCA
3534 * when the UCA changes, one might be forced to change these
3535 * values. (\\u02d0, \\U00010FFFC etc...)
3536 */
3537 static const struct {
3538 const char *rules;
3539 const char *data[10];
3540 const uint32_t len;
3541 } tests[] = {
3542 /* - all befores here amount to zero */
3543 { "&[before 3][first tertiary ignorable]<<<a",
3544 { "\\u0000", "a"}, 2
3545 }, /* you cannot go before first tertiary ignorable */
3546
3547 { "&[before 3][last tertiary ignorable]<<<a",
3548 { "\\u0000", "a"}, 2
3549 }, /* you cannot go before last tertiary ignorable */
3550
3551 { "&[before 3][first secondary ignorable]<<<a",
3552 { "\\u0000", "a"}, 2
3553 }, /* you cannot go before first secondary ignorable */
3554
3555 { "&[before 3][last secondary ignorable]<<<a",
3556 { "\\u0000", "a"}, 2
3557 }, /* you cannot go before first secondary ignorable */
3558
3559 /* 'normal' befores */
3560
3561 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3562 { "c", "b", "\\u0332", "a" }, 4
3563 },
3564
3565 /* we don't have a code point that corresponds to
3566 * the last primary ignorable
3567 */
3568 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3569 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3570 },
3571
3572 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3573 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3574 },
3575
3576 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3577 { "c", "b", "\\uD834\\uDF71", "a", "\\u02d0" }, 5
3578 },
3579
3580 { "&[first regular]<a"
3581 "&[before 1][first regular]<b",
3582 { "b", "\\u02d0", "a", "\\u02d1"}, 4
3583 },
3584
3585 { "&[before 1][last regular]<b"
3586 "&[last regular]<a",
3587 { "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4
3588 },
3589
3590 { "&[before 1][first implicit]<b"
3591 "&[first implicit]<a",
3592 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3593 },
3594
3595 { "&[before 1][last implicit]<b"
3596 "&[last implicit]<a",
3597 { "b", "\\U0010FFFD", "a" }, 3
3598 },
3599
3600 { "&[last variable]<z"
3601 "&[last primary ignorable]<x"
3602 "&[last secondary ignorable]<<y"
3603 "&[last tertiary ignorable]<<<w"
3604 "&[top]<u",
3605 {"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
3606 }
3607
3608 };
3609 uint32_t i;
3610
3611
3612 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3613 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3614 }
3615 }
3616
3617
TestOptimize(void)3618 static void TestOptimize(void) {
3619 /* this is not really a test - just trying out
3620 * whether copying of UCA contents will fail
3621 * Cannot really test, since the functionality
3622 * remains the same.
3623 */
3624 static const struct {
3625 const char *rules;
3626 const char *data[10];
3627 const uint32_t len;
3628 } tests[] = {
3629 /* - all befores here amount to zero */
3630 { "[optimize [\\uAC00-\\uD7FF]]",
3631 { "a", "b"}, 2}
3632 };
3633 uint32_t i;
3634
3635 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3636 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3637 }
3638 }
3639
3640 /*
3641 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3642 weiv ucol_strcollIter?
3643 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3644 weiv these are the input strings?
3645 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3646 weiv will check - could be a problem with utf-8 iterator
3647 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3648 weiv hmmm
3649 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3650 weiv that doesn't sound right
3651 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3652 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3653 cycheng@ca.ibm.c... yes
3654 weiv and then do the comparison
3655 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3656 weiv utf-16 strings look like a little endian ones in the example you sent me
3657 weiv It could be a bug - let me try to test it out
3658 cycheng@ca.ibm.c... ok
3659 cycheng@ca.ibm.c... we can wait till the conf. call
3660 cycheng@ca.ibm.c... next weke
3661 weiv that would be great
3662 weiv hmmm
3663 weiv I might be wrong
3664 weiv let me play with it some more
3665 cycheng@ca.ibm.c... ok
3666 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3667 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3668 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3669 weiv ok
3670 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3671 weiv thanks
3672 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3673 */
3674 #if 0
3675 static void Alexis(void) {
3676 UErrorCode status = U_ZERO_ERROR;
3677 UCollator *coll = ucol_open("", &status);
3678
3679
3680 const char utf16be[2][4] = {
3681 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3682 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3683 };
3684
3685 const char utf8[2][4] = {
3686 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3687 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3688 };
3689
3690 UCharIterator iterU161, iterU162;
3691 UCharIterator iterU81, iterU82;
3692
3693 UCollationResult resU16, resU8;
3694
3695 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3696 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3697
3698 uiter_setUTF8(&iterU81, utf8[0], 4);
3699 uiter_setUTF8(&iterU82, utf8[1], 4);
3700
3701 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3702
3703 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3704 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3705
3706
3707 if(resU16 != resU8) {
3708 log_err("different results\n");
3709 }
3710
3711 ucol_close(coll);
3712 }
3713 #endif
3714
3715 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)3716 static void Alexis2(void) {
3717 UErrorCode status = U_ZERO_ERROR;
3718 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3719 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3720 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3721 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3722
3723 UConverter *conv = NULL;
3724
3725 UCharIterator U16BEItS, U16BEItT;
3726 UCharIterator U8ItS, U8ItT;
3727
3728 UCollationResult resU16, resU16BE, resU8;
3729
3730 static const char* const pairs[][2] = {
3731 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3732 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3733 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3734 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3735 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3736 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3737 { "\\u0020", "\\u0020\\u0000"}
3738 /*
3739 5F20 (my result here)
3740 5F204E008E3F
3741 5F20 (your result here)
3742 */
3743 };
3744
3745 int32_t i = 0;
3746
3747 UCollator *coll = ucol_open("", &status);
3748 if(status == U_FILE_ACCESS_ERROR) {
3749 log_data_err("Is your data around?\n");
3750 return;
3751 } else if(U_FAILURE(status)) {
3752 log_err("Error opening collator\n");
3753 return;
3754 }
3755 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3756 conv = ucnv_open("UTF16BE", &status);
3757 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3758 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3759 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3760
3761 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3762
3763 log_verbose("Result of strcoll is %i\n", resU16);
3764
3765 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3766 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3767
3768 /* use the original sizes, as the result from converter is in bytes */
3769 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3770 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3771
3772 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3773
3774 log_verbose("Result of U16BE is %i\n", resU16BE);
3775
3776 if(resU16 != resU16BE) {
3777 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3778 }
3779
3780 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3781 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3782
3783 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3784 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3785
3786 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3787
3788 if(resU16 != resU8) {
3789 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3790 }
3791
3792 }
3793
3794 ucol_close(coll);
3795 ucnv_close(conv);
3796 }
3797
TestHebrewUCA(void)3798 static void TestHebrewUCA(void) {
3799 UErrorCode status = U_ZERO_ERROR;
3800 static const char *first[] = {
3801 "d790d6b8d79cd795d6bcd7a9",
3802 "d790d79cd79ed7a7d799d799d7a1",
3803 "d790d6b4d79ed795d6bcd7a9",
3804 };
3805
3806 char utf8String[3][256];
3807 UChar utf16String[3][256];
3808
3809 int32_t i = 0, j = 0;
3810 int32_t sizeUTF8[3];
3811 int32_t sizeUTF16[3];
3812
3813 UCollator *coll = ucol_open("", &status);
3814 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3815
3816 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3817 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3818 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3819 log_verbose("%i: ");
3820 for(j = 0; j < sizeUTF16[i]; j++) {
3821 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3822 log_verbose("%04X", utf16String[i][j]);
3823 }
3824 log_verbose("\n");
3825 }
3826 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3827 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3828 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3829 }
3830 }
3831
3832 ucol_close(coll);
3833
3834 }
3835
TestPartialSortKeyTermination(void)3836 static void TestPartialSortKeyTermination(void) {
3837 static const char* cases[] = {
3838 "\\u1234\\u1234\\udc00",
3839 "\\udc00\\ud800\\ud800"
3840 };
3841
3842 int32_t i = sizeof(UCollator);
3843
3844 UErrorCode status = U_ZERO_ERROR;
3845
3846 UCollator *coll = ucol_open("", &status);
3847
3848 UCharIterator iter;
3849
3850 UChar currCase[256];
3851 int32_t length = 0;
3852 int32_t pKeyLen = 0;
3853
3854 uint8_t key[256];
3855
3856 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3857 uint32_t state[2] = {0, 0};
3858 length = u_unescape(cases[i], currCase, 256);
3859 uiter_setString(&iter, currCase, length);
3860 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3861
3862 log_verbose("Done\n");
3863
3864 }
3865 ucol_close(coll);
3866 }
3867
TestSettings(void)3868 static void TestSettings(void) {
3869 static const char* cases[] = {
3870 "apple",
3871 "Apple"
3872 };
3873
3874 static const char* locales[] = {
3875 "",
3876 "en"
3877 };
3878
3879 UErrorCode status = U_ZERO_ERROR;
3880
3881 int32_t i = 0, j = 0;
3882
3883 UChar source[256], target[256];
3884 int32_t sLen = 0, tLen = 0;
3885
3886 UCollator *collateObject = NULL;
3887 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
3888 collateObject = ucol_open(locales[i], &status);
3889 ucol_setStrength(collateObject, UCOL_PRIMARY);
3890 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
3891 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
3892 sLen = u_unescape(cases[j-1], source, 256);
3893 source[sLen] = 0;
3894 tLen = u_unescape(cases[j], target, 256);
3895 source[tLen] = 0;
3896 doTest(collateObject, source, target, UCOL_EQUAL);
3897 }
3898 ucol_close(collateObject);
3899 }
3900 }
3901
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)3902 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
3903 UErrorCode status = U_ZERO_ERROR;
3904 int32_t errorNo = 0;
3905 /*const UChar *sourceRules = NULL;*/
3906 /*int32_t sourceRulesLen = 0;*/
3907 UColAttributeValue french = UCOL_OFF;
3908 int32_t cloneSize = 0;
3909
3910 if(!ucol_equals(source, target)) {
3911 log_err("Same collators, different address not equal\n");
3912 errorNo++;
3913 }
3914 ucol_close(target);
3915 if(uprv_strcmp(ucol_getLocale(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocale(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
3916 /* currently, safeClone is implemented through getRules/openRules
3917 * so it is the same as the test below - I will comment that test out.
3918 */
3919 /* real thing */
3920 target = ucol_safeClone(source, NULL, &cloneSize, &status);
3921 if(U_FAILURE(status)) {
3922 log_err("Error creating clone\n");
3923 errorNo++;
3924 return errorNo;
3925 }
3926 if(!ucol_equals(source, target)) {
3927 log_err("Collator different from it's clone\n");
3928 errorNo++;
3929 }
3930 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
3931 if(french == UCOL_ON) {
3932 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
3933 } else {
3934 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
3935 }
3936 if(U_FAILURE(status)) {
3937 log_err("Error setting attributes\n");
3938 errorNo++;
3939 return errorNo;
3940 }
3941 if(ucol_equals(source, target)) {
3942 log_err("Collators same even when options changed\n");
3943 errorNo++;
3944 }
3945 ucol_close(target);
3946 /* commented out since safeClone uses exactly the same technique */
3947 /*
3948 sourceRules = ucol_getRules(source, &sourceRulesLen);
3949 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3950 if(U_FAILURE(status)) {
3951 log_err("Error instantiating target from rules\n");
3952 errorNo++;
3953 return errorNo;
3954 }
3955 if(!ucol_equals(source, target)) {
3956 log_err("Collator different from collator that was created from the same rules\n");
3957 errorNo++;
3958 }
3959 ucol_close(target);
3960 */
3961 }
3962 return errorNo;
3963 }
3964
3965
TestEquals(void)3966 static void TestEquals(void) {
3967 /* ucol_equals is not currently a public API. There is a chance that it will become
3968 * something like this, but currently it is only used by RuleBasedCollator::operator==
3969 */
3970 /* test whether the two collators instantiated from the same locale are equal */
3971 UErrorCode status = U_ZERO_ERROR;
3972 UParseError parseError;
3973 int32_t noOfLoc = uloc_countAvailable();
3974 const char *locName = NULL;
3975 UCollator *source = NULL, *target = NULL;
3976 int32_t i = 0;
3977
3978 const char* rules[] = {
3979 "&l < lj <<< Lj <<< LJ",
3980 "&n < nj <<< Nj <<< NJ",
3981 "&ae <<< \\u00e4",
3982 "&AE <<< \\u00c4"
3983 };
3984 /*
3985 const char* badRules[] = {
3986 "&l <<< Lj",
3987 "&n < nj <<< nJ <<< NJ",
3988 "&a <<< \\u00e4",
3989 "&AE <<< \\u00c4 <<< x"
3990 };
3991 */
3992
3993 UChar sourceRules[1024], targetRules[1024];
3994 int32_t sourceRulesSize = 0, targetRulesSize = 0;
3995 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
3996
3997 for(i = 0; i < rulesSize; i++) {
3998 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
3999 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4000 }
4001
4002 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4003 if(status == U_FILE_ACCESS_ERROR) {
4004 log_data_err("Is your data around?\n");
4005 return;
4006 } else if(U_FAILURE(status)) {
4007 log_err("Error opening collator\n");
4008 return;
4009 }
4010 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4011 if(!ucol_equals(source, target)) {
4012 log_err("Equivalent collators not equal!\n");
4013 }
4014 ucol_close(source);
4015 ucol_close(target);
4016
4017 source = ucol_open("root", &status);
4018 target = ucol_open("root", &status);
4019 log_verbose("Testing root\n");
4020 if(!ucol_equals(source, source)) {
4021 log_err("Same collator not equal\n");
4022 }
4023 if(TestEqualsForCollator(locName, source, target)) {
4024 log_err("Errors for root\n", locName);
4025 }
4026 ucol_close(source);
4027
4028 for(i = 0; i<noOfLoc; i++) {
4029 status = U_ZERO_ERROR;
4030 locName = uloc_getAvailable(i);
4031 /*if(hasCollationElements(locName)) {*/
4032 log_verbose("Testing equality for locale %s\n", locName);
4033 source = ucol_open(locName, &status);
4034 target = ucol_open(locName, &status);
4035 if(TestEqualsForCollator(locName, source, target)) {
4036 log_err("Errors for locale %s\n", locName);
4037 }
4038 ucol_close(source);
4039 /*}*/
4040 }
4041 }
4042
TestJ2726(void)4043 static void TestJ2726(void) {
4044 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4045 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4046 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4047 UErrorCode status = U_ZERO_ERROR;
4048 UCollator *coll = ucol_open("en", &status);
4049 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4050 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4051 doTest(coll, a, aSpace, UCOL_EQUAL);
4052 doTest(coll, aSpace, a, UCOL_EQUAL);
4053 doTest(coll, a, spaceA, UCOL_EQUAL);
4054 doTest(coll, spaceA, a, UCOL_EQUAL);
4055 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4056 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4057 ucol_close(coll);
4058 }
4059
NullRule(void)4060 static void NullRule(void) {
4061 UChar r[3] = {0};
4062 UErrorCode status = U_ZERO_ERROR;
4063 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4064 if(U_SUCCESS(status)) {
4065 log_err("This should have been an error!\n");
4066 ucol_close(coll);
4067 } else {
4068 status = U_ZERO_ERROR;
4069 }
4070 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4071 if(U_FAILURE(status)) {
4072 log_err("Empty rules should have produced a valid collator\n");
4073 } else {
4074 ucol_close(coll);
4075 }
4076 }
4077
4078 /**
4079 * Test for CollationElementIterator previous and next for the whole set of
4080 * unicode characters with normalization on.
4081 */
TestNumericCollation(void)4082 static void TestNumericCollation(void)
4083 {
4084 UErrorCode status = U_ZERO_ERROR;
4085
4086 const static char *basicTestStrings[]={
4087 "hello1",
4088 "hello2",
4089 "hello2002",
4090 "hello2003",
4091 "hello123456",
4092 "hello1234567",
4093 "hello10000000",
4094 "hello100000000",
4095 "hello1000000000",
4096 "hello10000000000",
4097 };
4098
4099 const static char *preZeroTestStrings[]={
4100 "avery10000",
4101 "avery010000",
4102 "avery0010000",
4103 "avery00010000",
4104 "avery000010000",
4105 "avery0000010000",
4106 "avery00000010000",
4107 "avery000000010000",
4108 };
4109
4110 const static char *thirtyTwoBitNumericStrings[]={
4111 "avery42949672960",
4112 "avery42949672961",
4113 "avery42949672962",
4114 "avery429496729610"
4115 };
4116
4117 const static char *supplementaryDigits[] = {
4118 "\\uD835\\uDFCE", /* 0 */
4119 "\\uD835\\uDFCF", /* 1 */
4120 "\\uD835\\uDFD0", /* 2 */
4121 "\\uD835\\uDFD1", /* 3 */
4122 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4123 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4124 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4125 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4126 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4127 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4128 };
4129
4130 const static char *foreignDigits[] = {
4131 "\\u0661",
4132 "\\u0662",
4133 "\\u0663",
4134 "\\u0661\\u0660",
4135 "\\u0661\\u0662",
4136 "\\u0661\\u0663",
4137 "\\u0662\\u0660",
4138 "\\u0662\\u0662",
4139 "\\u0662\\u0663",
4140 "\\u0663\\u0660",
4141 "\\u0663\\u0662",
4142 "\\u0663\\u0663"
4143 };
4144
4145 const static char *evenZeroes[] = {
4146 "2000",
4147 "2001",
4148 "2002",
4149 "2003"
4150 };
4151
4152 UColAttribute att = UCOL_NUMERIC_COLLATION;
4153 UColAttributeValue val = UCOL_ON;
4154
4155 /* Open our collator. */
4156 UCollator* coll = ucol_open("root", &status);
4157 if (U_FAILURE(status)){
4158 log_err("ERROR: in using ucol_open()\n %s\n",
4159 myErrorName(status));
4160 return;
4161 }
4162 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4163 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4164 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4165 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4166 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4167
4168 /* Setting up our collator to do digits. */
4169 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4170 if (U_FAILURE(status)){
4171 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4172 myErrorName(status));
4173 return;
4174 }
4175
4176 /*
4177 Testing that prepended zeroes still yield the correct collation behavior.
4178 We expect that every element in our strings array will be equal.
4179 */
4180 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4181
4182 ucol_close(coll);
4183 }
4184
TestTibetanConformance(void)4185 static void TestTibetanConformance(void)
4186 {
4187 const char* test[] = {
4188 "\\u0FB2\\u0591\\u0F71\\u0061",
4189 "\\u0FB2\\u0F71\\u0061"
4190 };
4191
4192 UErrorCode status = U_ZERO_ERROR;
4193 UCollator *coll = ucol_open("", &status);
4194 UChar source[100];
4195 UChar target[100];
4196 int result;
4197 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4198 if (U_SUCCESS(status)) {
4199 u_unescape(test[0], source, 100);
4200 u_unescape(test[1], target, 100);
4201 doTest(coll, source, target, UCOL_EQUAL);
4202 result = ucol_strcoll(coll, source, -1, target, -1);
4203 log_verbose("result %d\n", result);
4204 if (UCOL_EQUAL != result) {
4205 log_err("Tibetan comparison error\n");
4206 }
4207 }
4208 ucol_close(coll);
4209
4210 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4211 }
4212
TestPinyinProblem(void)4213 static void TestPinyinProblem(void) {
4214 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4215 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4216 }
4217
4218 #define TST_UCOL_MAX_INPUT 0x220001
4219 #define topByte 0xFF000000;
4220 #define bottomByte 0xFF;
4221 #define fourBytes 0xFFFFFFFF;
4222
4223
showImplicit(UChar32 i)4224 static void showImplicit(UChar32 i) {
4225 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4226 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4227 }
4228 }
4229
TestImplicitGeneration(void)4230 static void TestImplicitGeneration(void) {
4231 UErrorCode status = U_ZERO_ERROR;
4232 UChar32 last = 0;
4233 UChar32 current;
4234 UChar32 i = 0, j = 0;
4235 UChar32 roundtrip = 0;
4236 UChar32 lastBottom = 0;
4237 UChar32 currentBottom = 0;
4238 UChar32 lastTop = 0;
4239 UChar32 currentTop = 0;
4240
4241 UCollator *coll = ucol_open("root", &status);
4242 if(U_FAILURE(status)) {
4243 log_err("Couldn't open UCA\n");
4244 return;
4245 }
4246
4247 uprv_uca_getRawFromImplicit(0xE20303E7);
4248
4249 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4250 current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4251
4252 /* check that it round-trips AND that all intervening ones are illegal*/
4253 roundtrip = uprv_uca_getRawFromImplicit(current);
4254 if (roundtrip != i) {
4255 log_err("No roundtrip %08X\n", i);
4256 }
4257 if (last != 0) {
4258 for (j = last + 1; j < current; ++j) {
4259 roundtrip = uprv_uca_getRawFromImplicit(j);
4260 /* raise an error if it *doesn't* find an error*/
4261 if (roundtrip != -1) {
4262 log_err("Fails to recognize illegal %08X\n", j);
4263 }
4264 }
4265 }
4266 /* now do other consistency checks*/
4267 lastBottom = last & bottomByte;
4268 currentBottom = current & bottomByte;
4269 lastTop = last & topByte;
4270 currentTop = current & topByte;
4271
4272 /* print out some values for spot-checking*/
4273 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4274 showImplicit(i-3);
4275 showImplicit(i-2);
4276 showImplicit(i-1);
4277 showImplicit(i);
4278 showImplicit(i+1);
4279 showImplicit(i+2);
4280 }
4281 last = current;
4282
4283 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4284 log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4285 }
4286 }
4287 showImplicit(TST_UCOL_MAX_INPUT-2);
4288 showImplicit(TST_UCOL_MAX_INPUT-1);
4289 showImplicit(TST_UCOL_MAX_INPUT);
4290 ucol_close(coll);
4291 }
4292
4293 /**
4294 * Iterate through the given iterator, checking to see that all the strings
4295 * in the expected array are present.
4296 * @param expected array of strings we expect to see, or NULL
4297 * @param expectedCount number of elements of expected, or 0
4298 */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)4299 static int32_t checkUEnumeration(const char* msg,
4300 UEnumeration* iter,
4301 const char** expected,
4302 int32_t expectedCount) {
4303 UErrorCode ec = U_ZERO_ERROR;
4304 int32_t i = 0, n, j, bit;
4305 int32_t seenMask = 0;
4306
4307 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4308 n = uenum_count(iter, &ec);
4309 if (!assertSuccess("count", &ec)) return -1;
4310 log_verbose("%s = [", msg);
4311 for (;; ++i) {
4312 const char* s = uenum_next(iter, NULL, &ec);
4313 if (!assertSuccess("snext", &ec) || s == NULL) break;
4314 if (i != 0) log_verbose(",");
4315 log_verbose("%s", s);
4316 /* check expected list */
4317 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4318 if ((seenMask&bit) == 0 &&
4319 uprv_strcmp(s, expected[j]) == 0) {
4320 seenMask |= bit;
4321 break;
4322 }
4323 }
4324 }
4325 log_verbose("] (%d)\n", i);
4326 assertTrue("count verified", i==n);
4327 /* did we see all expected strings? */
4328 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4329 if ((seenMask&bit)!=0) {
4330 log_verbose("Ok: \"%s\" seen\n", expected[j]);
4331 } else {
4332 log_err("FAIL: \"%s\" not seen\n", expected[j]);
4333 }
4334 }
4335 return n;
4336 }
4337
4338 /**
4339 * Test new API added for separate collation tree.
4340 */
TestSeparateTrees(void)4341 static void TestSeparateTrees(void) {
4342 UErrorCode ec = U_ZERO_ERROR;
4343 UEnumeration *e = NULL;
4344 int32_t n = -1;
4345 UBool isAvailable;
4346 char loc[256];
4347
4348 static const char* AVAIL[] = { "en", "de" };
4349
4350 static const char* KW[] = { "collation" };
4351
4352 static const char* KWVAL[] = { "phonebook", "stroke" };
4353
4354 #if !UCONFIG_NO_SERVICE
4355 e = ucol_openAvailableLocales(&ec);
4356 assertSuccess("ucol_openAvailableLocales", &ec);
4357 assertTrue("ucol_openAvailableLocales!=0", e!=0);
4358 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4359 /* Don't need to check n because we check list */
4360 uenum_close(e);
4361 #endif
4362
4363 e = ucol_getKeywords(&ec);
4364 assertSuccess("ucol_getKeywords", &ec);
4365 assertTrue("ucol_getKeywords!=0", e!=0);
4366 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4367 /* Don't need to check n because we check list */
4368 uenum_close(e);
4369
4370 e = ucol_getKeywordValues(KW[0], &ec);
4371 assertSuccess("ucol_getKeywordValues", &ec);
4372 assertTrue("ucol_getKeywordValues!=0", e!=0);
4373 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4374 /* Don't need to check n because we check list */
4375 uenum_close(e);
4376
4377 /* Try setting a warning before calling ucol_getKeywordValues */
4378 ec = U_USING_FALLBACK_WARNING;
4379 e = ucol_getKeywordValues(KW[0], &ec);
4380 assertSuccess("ucol_getKeywordValues [with warning code set]", &ec);
4381 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4382 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4383 /* Don't need to check n because we check list */
4384 uenum_close(e);
4385
4386 /*
4387 U_DRAFT int32_t U_EXPORT2
4388 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4389 const char* locale, UBool* isAvailable,
4390 UErrorCode* status);
4391 }
4392 */
4393 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr",
4394 &isAvailable, &ec);
4395 assertSuccess("getFunctionalEquivalent", &ec);
4396 assertEquals("getFunctionalEquivalent(fr)", "fr", loc);
4397 assertTrue("getFunctionalEquivalent(fr).isAvailable==TRUE",
4398 isAvailable == TRUE);
4399
4400 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr_FR",
4401 &isAvailable, &ec);
4402 assertSuccess("getFunctionalEquivalent", &ec);
4403 assertEquals("getFunctionalEquivalent(fr_FR)", "fr", loc);
4404 assertTrue("getFunctionalEquivalent(fr_FR).isAvailable==TRUE",
4405 isAvailable == TRUE);
4406 }
4407
4408 /* supercedes TestJ784 */
TestBeforePinyin(void)4409 static void TestBeforePinyin(void) {
4410 const static char rules[] = {
4411 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4412 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4413 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4414 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4415 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4416 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4417 };
4418
4419 const static char *test[] = {
4420 "l\\u0101",
4421 "la",
4422 "l\\u0101n",
4423 "lan ",
4424 "l\\u0113",
4425 "le",
4426 "l\\u0113n",
4427 "len"
4428 };
4429
4430 const static char *test2[] = {
4431 "x\\u0101",
4432 "x\\u0100",
4433 "X\\u0101",
4434 "X\\u0100",
4435 "x\\u00E1",
4436 "x\\u00C1",
4437 "X\\u00E1",
4438 "X\\u00C1",
4439 "x\\u01CE",
4440 "x\\u01CD",
4441 "X\\u01CE",
4442 "X\\u01CD",
4443 "x\\u00E0",
4444 "x\\u00C0",
4445 "X\\u00E0",
4446 "X\\u00C0",
4447 "xa",
4448 "xA",
4449 "Xa",
4450 "XA",
4451 "x\\u0101x",
4452 "x\\u0100x",
4453 "x\\u00E1x",
4454 "x\\u00C1x",
4455 "x\\u01CEx",
4456 "x\\u01CDx",
4457 "x\\u00E0x",
4458 "x\\u00C0x",
4459 "xax",
4460 "xAx"
4461 };
4462
4463 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4464 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4465 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4466 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4467 }
4468
TestBeforeTightening(void)4469 static void TestBeforeTightening(void) {
4470 static const struct {
4471 const char *rules;
4472 UErrorCode expectedStatus;
4473 } tests[] = {
4474 { "&[before 1]a<x", U_ZERO_ERROR },
4475 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4476 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4477 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4478 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4479 { "&[before 2]a<<x",U_ZERO_ERROR },
4480 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4481 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4482 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
4483 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
4484 { "&[before 3]a<<<x",U_ZERO_ERROR },
4485 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
4486 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4487 };
4488
4489 int32_t i = 0;
4490
4491 UErrorCode status = U_ZERO_ERROR;
4492 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4493 uint32_t rlen = 0;
4494
4495 UCollator *coll = NULL;
4496
4497
4498 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4499 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4500 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4501 if(status != tests[i].expectedStatus) {
4502 log_err("Opening a collator with rules %s returned error code %s, expected %s\n",
4503 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4504 }
4505 ucol_close(coll);
4506 status = U_ZERO_ERROR;
4507 }
4508
4509 }
4510
4511 #if 0
4512 &m < a
4513 &[before 1] a < x <<< X << q <<< Q < z
4514 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4515
4516 &m < a
4517 &[before 2] a << x <<< X << q <<< Q < z
4518 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4519
4520 &m < a
4521 &[before 3] a <<< x <<< X << q <<< Q < z
4522 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4523
4524
4525 &m << a
4526 &[before 1] a < x <<< X << q <<< Q < z
4527 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4528
4529 &m << a
4530 &[before 2] a << x <<< X << q <<< Q < z
4531 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4532
4533 &m << a
4534 &[before 3] a <<< x <<< X << q <<< Q < z
4535 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4536
4537
4538 &m <<< a
4539 &[before 1] a < x <<< X << q <<< Q < z
4540 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4541
4542 &m <<< a
4543 &[before 2] a << x <<< X << q <<< Q < z
4544 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
4545
4546 &m <<< a
4547 &[before 3] a <<< x <<< X << q <<< Q < z
4548 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
4549
4550
4551 &[before 1] s < x <<< X << q <<< Q < z
4552 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4553
4554 &[before 2] s << x <<< X << q <<< Q < z
4555 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4556
4557 &[before 3] s <<< x <<< X << q <<< Q < z
4558 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4559
4560
4561 &[before 1] \u24DC < x <<< X << q <<< Q < z
4562 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4563
4564 &[before 2] \u24DC << x <<< X << q <<< Q < z
4565 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4566
4567 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4568 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
4569 #endif
4570
4571
4572 #if 0
4573 /* requires features not yet supported */
4574 static void TestMoreBefore(void) {
4575 static const struct {
4576 const char* rules;
4577 const char* order[16];
4578 int32_t size;
4579 } tests[] = {
4580 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4581 { "m","M","x","X","q","Q","z","a","n" }, 9},
4582 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4583 { "m","M","x","X","q","Q","a","z","n" }, 9},
4584 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4585 { "m","M","x","X","a","q","Q","z","n" }, 9},
4586 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4587 { "x","X","q","Q","z","m","M","a","n" }, 9},
4588 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4589 { "m","M","x","X","q","Q","a","z","n" }, 9},
4590 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4591 { "m","M","x","X","a","q","Q","z","n" }, 9},
4592 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4593 { "x","X","q","Q","z","n","m","a","M" }, 9},
4594 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4595 { "x","X","q","Q","m","a","M","z","n" }, 9},
4596 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4597 { "m","x","X","a","M","q","Q","z","n" }, 9},
4598 { "&[before 1] s < x <<< X << q <<< Q < z",
4599 { "r","R","x","X","q","Q","z","s","n" }, 9},
4600 { "&[before 2] s << x <<< X << q <<< Q < z",
4601 { "r","R","x","X","q","Q","s","z","n" }, 9},
4602 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4603 { "r","R","x","X","s","q","Q","z","n" }, 9},
4604 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4605 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4606 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4607 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4608 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4609 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4610 };
4611
4612 int32_t i = 0;
4613
4614 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4615 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4616 }
4617 }
4618 #endif
4619
TestTailorNULL(void)4620 static void TestTailorNULL( void ) {
4621 const static char* rule = "&a <<< '\\u0000'";
4622 UErrorCode status = U_ZERO_ERROR;
4623 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4624 uint32_t rlen = 0;
4625 UChar a = 1, null = 0;
4626 UCollationResult res = UCOL_EQUAL;
4627
4628 UCollator *coll = NULL;
4629
4630
4631 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4632 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4633
4634 if(U_FAILURE(status)) {
4635 log_err("Could not open default collator!\n");
4636 } else {
4637 res = ucol_strcoll(coll, &a, 1, &null, 1);
4638
4639 if(res != UCOL_LESS) {
4640 log_err("NULL was not tailored properly!\n");
4641 }
4642 }
4643
4644 ucol_close(coll);
4645 }
4646
4647 static void
TestThaiSortKey(void)4648 TestThaiSortKey(void)
4649 {
4650 UChar yamakan = 0x0E4E;
4651 UErrorCode status = U_ZERO_ERROR;
4652 uint8_t key[256];
4653 int32_t keyLen = 0;
4654 /* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */
4655 /* since it stays in the same relative position. This should be addressed in CLDR */
4656 /* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */
4657 /* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */
4658 /* UCA 5.0 moves Yammakan */
4659 uint8_t expectedKey[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 };
4660 UCollator *coll = ucol_open("th", &status);
4661 if(U_FAILURE(status)) {
4662 log_err("Could not open a collator, exiting (%s)\n", u_errorName(status));
4663 return;
4664 }
4665
4666 keyLen = ucol_getSortKey(coll, &yamakan, 1, key, 256);
4667 if(strcmp((char *)key, (char *)expectedKey)) {
4668 log_err("Yammakan key is different from ICU 34!\n");
4669 }
4670
4671 ucol_close(coll);
4672 }
4673
4674 static void
TestUpperFirstQuaternary(void)4675 TestUpperFirstQuaternary(void)
4676 {
4677 const char* tests[] = { "B", "b", "Bb", "bB" };
4678 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4679 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4680 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4681 }
4682
4683 static void
TestJ4960(void)4684 TestJ4960(void)
4685 {
4686 const char* tests[] = { "\\u00e2T", "aT" };
4687 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4688 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4689 const char* tests2[] = { "a", "A" };
4690 const char* rule = "&[first tertiary ignorable]=A=a";
4691 UColAttribute att2[] = { UCOL_CASE_LEVEL };
4692 UColAttributeValue attVals2[] = { UCOL_ON };
4693 /* Test whether we correctly ignore primary ignorables on case level when */
4694 /* we have only primary & case level */
4695 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4696 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4697 /* and case level */
4698 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4699 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4700 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4701 }
4702
4703 static void
TestJ5223(void)4704 TestJ5223(void)
4705 {
4706 static const char *test = "this is a test string";
4707 UChar ustr[256];
4708 int32_t ustr_length = u_unescape(test, ustr, 256);
4709 unsigned char sortkey[256];
4710 int32_t sortkey_length;
4711 UErrorCode status = U_ZERO_ERROR;
4712 static UCollator *coll = NULL;
4713 coll = ucol_open("root", &status);
4714 if(U_FAILURE(status)) {
4715 log_err("Couldn't open UCA\n");
4716 return;
4717 }
4718 ucol_setStrength(coll, UCOL_PRIMARY);
4719 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4720 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4721 if (U_FAILURE(status)) {
4722 log_err("Failed setting atributes\n");
4723 return;
4724 }
4725 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4726 if (sortkey_length > 256) return;
4727
4728 /* we mark the position where the null byte should be written in advance */
4729 sortkey[sortkey_length-1] = 0xAA;
4730
4731 /* we set the buffer size one byte higher than needed */
4732 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4733 sortkey_length+1);
4734
4735 /* no error occurs (for me) */
4736 if (sortkey[sortkey_length-1] == 0xAA) {
4737 log_err("Hit bug at first try\n");
4738 }
4739
4740 /* we mark the position where the null byte should be written again */
4741 sortkey[sortkey_length-1] = 0xAA;
4742
4743 /* this time we set the buffer size to the exact amount needed */
4744 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4745 sortkey_length);
4746
4747 /* now the trailing null byte is not written */
4748 if (sortkey[sortkey_length-1] == 0xAA) {
4749 log_err("Hit bug at second try\n");
4750 }
4751
4752 ucol_close(coll);
4753 }
4754
4755 /* Regression test for Thai partial sort key problem */
4756 static void
TestJ5232(void)4757 TestJ5232(void)
4758 {
4759 const static char *test[] = {
4760 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4761 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4762 };
4763
4764 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4765 }
4766
4767 static void
TestJ5367(void)4768 TestJ5367(void)
4769 {
4770 const static char *test[] = { "a", "y" };
4771 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4772 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4773 }
4774
4775 static void
TestVI5913(void)4776 TestVI5913(void)
4777 {
4778 UErrorCode status = U_ZERO_ERROR;
4779 int32_t i, j;
4780 UCollator *coll =NULL;
4781 uint8_t resColl[100], expColl[100];
4782 int32_t rLen, tLen, ruleLen, sLen, kLen;
4783 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
4784 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4785 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
4786 UChar tData[][20]={
4787 {0x1EAC, 0},
4788 {0x0041, 0x0323, 0x0302, 0},
4789 {0x1EA0, 0x0302, 0},
4790 {0x00C2, 0x0323, 0},
4791 {0x1ED8, 0}, /* O with dot and circumflex */
4792 {0x1ECC, 0x0302, 0},
4793 {0x1EB7, 0},
4794 {0x1EA1, 0x0306, 0},
4795 };
4796 UChar tailorData[][20]={
4797 {0x1FA2, 0}, /* Omega with 3 combining marks */
4798 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4799 {0x1FF3, 0x0313, 0x0300, 0},
4800 {0x1F60, 0x0300, 0x0345, 0},
4801 {0x1F62, 0x0345, 0},
4802 {0x1FA0, 0x0300, 0},
4803 };
4804 UChar tailorData2[][20]={
4805 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
4806 {0x0073, 0x0323, 0x030C, 0},
4807 {0x0073, 0x030C, 0x0323, 0},
4808 };
4809 UChar tailorData3[][20]={
4810 {0x007a, 0}, /* z */
4811 {0x0061, 0x0065, 0}, /* a + e */
4812 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4813 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
4814 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4815 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
4816 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
4817 {0x00EA, 0}, /* e with circumflex */
4818 };
4819
4820 /* Test Vietnamese sort. */
4821 coll = ucol_open("vi", &status);
4822 log_verbose("\n\nVI collation:");
4823 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4824 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4825 }
4826 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4827 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4828 }
4829 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4830 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4831 }
4832 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4833 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4834 }
4835
4836 for (j=0; j<8; j++) {
4837 tLen = u_strlen(tData[j]);
4838 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
4839 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4840 for(i = 0; i<rLen; i++) {
4841 log_verbose(" %02X", resColl[i]);
4842 }
4843 }
4844
4845 ucol_close(coll);
4846
4847 /* Test Russian sort. */
4848 coll = ucol_open("ro", &status);
4849 log_verbose("\n\nRO collation:");
4850 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4851 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4852 }
4853 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4854 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4855 }
4856 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
4857 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4858 }
4859
4860 for (j=4; j<8; j++) {
4861 tLen = u_strlen(tData[j]);
4862 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
4863 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4864 for(i = 0; i<rLen; i++) {
4865 log_verbose(" %02X", resColl[i]);
4866 }
4867 }
4868 ucol_close(coll);
4869
4870 /* Test the precomposed Greek character with 3 combining marks. */
4871 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
4872 ruleLen = u_strlen(rule);
4873 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4874 sLen = u_strlen(tailorData[0]);
4875 for (j=1; j<6; j++) {
4876 tLen = u_strlen(tailorData[j]);
4877 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
4878 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
4879 }
4880 }
4881 /* Test getSortKey. */
4882 tLen = u_strlen(tailorData[0]);
4883 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
4884 for (j=0; j<6; j++) {
4885 tLen = u_strlen(tailorData[j]);
4886 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
4887 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4888 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
4889 for(i = 0; i<rLen; i++) {
4890 log_err(" %02X", resColl[i]);
4891 }
4892 }
4893 }
4894 ucol_close(coll);
4895
4896 log_verbose("\n\nTailoring test for s with caron:");
4897 ruleLen = u_strlen(rule2);
4898 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4899 tLen = u_strlen(tailorData2[0]);
4900 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
4901 for (j=1; j<3; j++) {
4902 tLen = u_strlen(tailorData2[j]);
4903 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
4904 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4905 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
4906 for(i = 0; i<rLen; i++) {
4907 log_err(" %02X", resColl[i]);
4908 }
4909 }
4910 }
4911 ucol_close(coll);
4912
4913 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
4914 ruleLen = u_strlen(rule3);
4915 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4916 tLen = u_strlen(tailorData3[3]);
4917 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
4918 for (j=4; j<6; j++) {
4919 tLen = u_strlen(tailorData3[j]);
4920 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
4921
4922 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4923 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
4924 for(i = 0; i<rLen; i++) {
4925 log_err(" %02X", resColl[i]);
4926 }
4927 }
4928
4929 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
4930 for(i = 0; i<rLen; i++) {
4931 log_verbose(" %02X", resColl[i]);
4932 }
4933 }
4934 ucol_close(coll);
4935 }
4936
4937 #define TSKC_DATA_SIZE 5
4938 #define TSKC_BUF_SIZE 50
4939 static void
TestSortKeyConsistency(void)4940 TestSortKeyConsistency(void)
4941 {
4942 UErrorCode icuRC = U_ZERO_ERROR;
4943 UCollator* ucol;
4944 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4945
4946 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4947 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4948 int32_t i, j, i2;
4949
4950 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4951 if (U_FAILURE(icuRC))
4952 {
4953 log_err("ucol_openFromShortString failed\n");
4954 return;
4955 }
4956
4957 for (i = 0; i < TSKC_DATA_SIZE; i++)
4958 {
4959 UCharIterator uiter;
4960 uint32_t state[2] = { 0, 0 };
4961 int32_t dataLen = i+1;
4962 for (j=0; j<TSKC_BUF_SIZE; j++)
4963 bufFull[i][j] = bufPart[i][j] = 0;
4964
4965 /* Full sort key */
4966 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4967
4968 /* Partial sort key */
4969 uiter_setString(&uiter, data, dataLen);
4970 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4971 if (U_FAILURE(icuRC))
4972 {
4973 log_err("ucol_nextSortKeyPart failed\n");
4974 ucol_close(ucol);
4975 return;
4976 }
4977
4978 for (i2=0; i2<i; i2++)
4979 {
4980 UBool fullMatch = TRUE;
4981 UBool partMatch = TRUE;
4982 for (j=0; j<TSKC_BUF_SIZE; j++)
4983 {
4984 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4985 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4986 }
4987 if (fullMatch != partMatch) {
4988 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4989 : "partial key was consistent, but full key changed\n");
4990 ucol_close(ucol);
4991 return;
4992 }
4993 }
4994
4995 }
4996
4997 /*=============================================*/
4998 ucol_close(ucol);
4999 }
5000
5001
5002 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5003
addMiscCollTest(TestNode ** root)5004 void addMiscCollTest(TestNode** root)
5005 {
5006 TEST(TestRuleOptions);
5007 TEST(TestBeforePrefixFailure);
5008 TEST(TestContractionClosure);
5009 TEST(TestPrefixCompose);
5010 TEST(TestStrCollIdenticalPrefix);
5011 TEST(TestPrefix);
5012 TEST(TestNewJapanese);
5013 /*TEST(TestLimitations);*/
5014 TEST(TestNonChars);
5015 TEST(TestExtremeCompression);
5016 TEST(TestSurrogates);
5017 TEST(TestVariableTopSetting);
5018 TEST(TestBocsuCoverage);
5019 TEST(TestCyrillicTailoring);
5020 TEST(TestCase);
5021 TEST(IncompleteCntTest);
5022 TEST(BlackBirdTest);
5023 TEST(FunkyATest);
5024 TEST(BillFairmanTest);
5025 TEST(RamsRulesTest);
5026 TEST(IsTailoredTest);
5027 TEST(TestCollations);
5028 TEST(TestChMove);
5029 TEST(TestImplicitTailoring);
5030 TEST(TestFCDProblem);
5031 TEST(TestEmptyRule);
5032 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5033 TEST(TestJ815);
5034 /*TEST(TestJ831);*/ /* we changed lv locale */
5035 TEST(TestBefore);
5036 TEST(TestRedundantRules);
5037 TEST(TestExpansionSyntax);
5038 TEST(TestHangulTailoring);
5039 TEST(TestUCARules);
5040 TEST(TestIncrementalNormalize);
5041 TEST(TestComposeDecompose);
5042 TEST(TestCompressOverlap);
5043 TEST(TestContraction);
5044 TEST(TestExpansion);
5045 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5046 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5047 TEST(TestOptimize);
5048 TEST(TestSuppressContractions);
5049 TEST(Alexis2);
5050 TEST(TestHebrewUCA);
5051 TEST(TestPartialSortKeyTermination);
5052 TEST(TestSettings);
5053 TEST(TestEquals);
5054 TEST(TestJ2726);
5055 TEST(NullRule);
5056 TEST(TestNumericCollation);
5057 TEST(TestTibetanConformance);
5058 TEST(TestPinyinProblem);
5059 TEST(TestImplicitGeneration);
5060 TEST(TestSeparateTrees);
5061 TEST(TestBeforePinyin);
5062 TEST(TestBeforeTightening);
5063 /*TEST(TestMoreBefore);*/
5064 TEST(TestTailorNULL);
5065 TEST(TestThaiSortKey);
5066 TEST(TestUpperFirstQuaternary);
5067 TEST(TestJ4960);
5068 TEST(TestJ5223);
5069 TEST(TestJ5232);
5070 TEST(TestJ5367);
5071 TEST(TestSortKeyConsistency);
5072 TEST(TestVI5913); /* VI, RO tailored rules */
5073 }
5074
5075 #endif /* #if !UCONFIG_NO_COLLATION */
5076