1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2001-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
7 *
8 * File cmsccoll.C
9 *
10 *******************************************************************************/
11 /**
12 * These are the tests specific to ICU 1.8 and above, that I didn't know where
13 * to fit.
14 */
15
16 #include <stdio.h>
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_COLLATION
21
22 #include "unicode/ucol.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/uloc.h"
25 #include "cintltst.h"
26 #include "ccolltst.h"
27 #include "callcoll.h"
28 #include "unicode/ustring.h"
29 #include "string.h"
30 #include "ucol_imp.h"
31 #include "ucol_tok.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "uassert.h"
35 #include "unicode/parseerr.h"
36 #include "unicode/ucnv.h"
37 #include "unicode/ures.h"
38 #include "uparse.h"
39 #include "putilimp.h"
40
41
42 #define LEN(a) (sizeof(a)/sizeof(a[0]))
43
44 #define MAX_TOKEN_LEN 16
45
46 typedef UCollationResult tst_strcoll(void *collator, const int object,
47 const UChar *source, const int sLen,
48 const UChar *target, const int tLen);
49
50
51
52 const static char cnt1[][10] = {
53
54 "AA",
55 "AC",
56 "AZ",
57 "AQ",
58 "AB",
59 "ABZ",
60 "ABQ",
61 "Z",
62 "ABC",
63 "Q",
64 "B"
65 };
66
67 const static char cnt2[][10] = {
68 "DA",
69 "DAD",
70 "DAZ",
71 "MAR",
72 "Z",
73 "DAVIS",
74 "MARK",
75 "DAV",
76 "DAVI"
77 };
78
IncompleteCntTest(void)79 static void IncompleteCntTest(void)
80 {
81 UErrorCode status = U_ZERO_ERROR;
82 UChar temp[90];
83 UChar t1[90];
84 UChar t2[90];
85
86 UCollator *coll = NULL;
87 uint32_t i = 0, j = 0;
88 uint32_t size = 0;
89
90 u_uastrcpy(temp, " & Z < ABC < Q < B");
91
92 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
93
94 if(U_SUCCESS(status)) {
95 size = sizeof(cnt1)/sizeof(cnt1[0]);
96 for(i = 0; i < size-1; i++) {
97 for(j = i+1; j < size; j++) {
98 UCollationElements *iter;
99 u_uastrcpy(t1, cnt1[i]);
100 u_uastrcpy(t2, cnt1[j]);
101 doTest(coll, t1, t2, UCOL_LESS);
102 /* synwee : added collation element iterator test */
103 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
104 if (U_FAILURE(status)) {
105 log_err("Creation of iterator failed\n");
106 break;
107 }
108 backAndForth(iter);
109 ucol_closeElements(iter);
110 }
111 }
112 }
113
114 ucol_close(coll);
115
116
117 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
118 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
119
120 if(U_SUCCESS(status)) {
121 size = sizeof(cnt2)/sizeof(cnt2[0]);
122 for(i = 0; i < size-1; i++) {
123 for(j = i+1; j < size; j++) {
124 UCollationElements *iter;
125 u_uastrcpy(t1, cnt2[i]);
126 u_uastrcpy(t2, cnt2[j]);
127 doTest(coll, t1, t2, UCOL_LESS);
128
129 /* synwee : added collation element iterator test */
130 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
131 if (U_FAILURE(status)) {
132 log_err("Creation of iterator failed\n");
133 break;
134 }
135 backAndForth(iter);
136 ucol_closeElements(iter);
137 }
138 }
139 }
140
141 ucol_close(coll);
142
143
144 }
145
146 const static char shifted[][20] = {
147 "black bird",
148 "black-bird",
149 "blackbird",
150 "black Bird",
151 "black-Bird",
152 "blackBird",
153 "black birds",
154 "black-birds",
155 "blackbirds"
156 };
157
158 const static UCollationResult shiftedTert[] = {
159 UCOL_EQUAL,
160 UCOL_EQUAL,
161 UCOL_EQUAL,
162 UCOL_LESS,
163 UCOL_EQUAL,
164 UCOL_EQUAL,
165 UCOL_LESS,
166 UCOL_EQUAL,
167 UCOL_EQUAL
168 };
169
170 const static char nonignorable[][20] = {
171 "black bird",
172 "black Bird",
173 "black birds",
174 "black-bird",
175 "black-Bird",
176 "black-birds",
177 "blackbird",
178 "blackBird",
179 "blackbirds"
180 };
181
BlackBirdTest(void)182 static void BlackBirdTest(void) {
183 UErrorCode status = U_ZERO_ERROR;
184 UChar t1[90];
185 UChar t2[90];
186
187 uint32_t i = 0, j = 0;
188 uint32_t size = 0;
189 UCollator *coll = ucol_open("en_US", &status);
190
191 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
192 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
193
194 if(U_SUCCESS(status)) {
195 size = sizeof(nonignorable)/sizeof(nonignorable[0]);
196 for(i = 0; i < size-1; i++) {
197 for(j = i+1; j < size; j++) {
198 u_uastrcpy(t1, nonignorable[i]);
199 u_uastrcpy(t2, nonignorable[j]);
200 doTest(coll, t1, t2, UCOL_LESS);
201 }
202 }
203 }
204
205 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
206 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
207
208 if(U_SUCCESS(status)) {
209 size = sizeof(shifted)/sizeof(shifted[0]);
210 for(i = 0; i < size-1; i++) {
211 for(j = i+1; j < size; j++) {
212 u_uastrcpy(t1, shifted[i]);
213 u_uastrcpy(t2, shifted[j]);
214 doTest(coll, t1, t2, UCOL_LESS);
215 }
216 }
217 }
218
219 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
220 if(U_SUCCESS(status)) {
221 size = sizeof(shifted)/sizeof(shifted[0]);
222 for(i = 1; i < size; i++) {
223 u_uastrcpy(t1, shifted[i-1]);
224 u_uastrcpy(t2, shifted[i]);
225 doTest(coll, t1, t2, shiftedTert[i]);
226 }
227 }
228
229 ucol_close(coll);
230 }
231
232 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
233 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
234 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
235 {0x0041/*'A'*/, 0x0300, 0x0000},
236 {0x00C0, 0x0301, 0x0000},
237 /* this would work with forced normalization */
238 {0x00C0, 0x0316, 0x0000}
239 };
240
241 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
242 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
243 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
244 {0x00C0, 0},
245 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246 /* this would work with forced normalization */
247 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
248 };
249
250 const static UCollationResult results[] = {
251 UCOL_GREATER,
252 UCOL_EQUAL,
253 UCOL_EQUAL,
254 UCOL_GREATER,
255 UCOL_EQUAL
256 };
257
FunkyATest(void)258 static void FunkyATest(void)
259 {
260
261 int32_t i;
262 UErrorCode status = U_ZERO_ERROR;
263 UCollator *myCollation;
264 myCollation = ucol_open("en_US", &status);
265 if(U_FAILURE(status)){
266 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
267 return;
268 }
269 log_verbose("Testing some A letters, for some reason\n");
270 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
271 ucol_setStrength(myCollation, UCOL_TERTIARY);
272 for (i = 0; i < 4 ; i++)
273 {
274 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
275 }
276 ucol_close(myCollation);
277 }
278
279 UColAttributeValue caseFirst[] = {
280 UCOL_OFF,
281 UCOL_LOWER_FIRST,
282 UCOL_UPPER_FIRST
283 };
284
285
286 UColAttributeValue alternateHandling[] = {
287 UCOL_NON_IGNORABLE,
288 UCOL_SHIFTED
289 };
290
291 UColAttributeValue caseLevel[] = {
292 UCOL_OFF,
293 UCOL_ON
294 };
295
296 UColAttributeValue strengths[] = {
297 UCOL_PRIMARY,
298 UCOL_SECONDARY,
299 UCOL_TERTIARY,
300 UCOL_QUATERNARY,
301 UCOL_IDENTICAL
302 };
303
304 #if 0
305 static const char * strengthsC[] = {
306 "UCOL_PRIMARY",
307 "UCOL_SECONDARY",
308 "UCOL_TERTIARY",
309 "UCOL_QUATERNARY",
310 "UCOL_IDENTICAL"
311 };
312
313 static const char * caseFirstC[] = {
314 "UCOL_OFF",
315 "UCOL_LOWER_FIRST",
316 "UCOL_UPPER_FIRST"
317 };
318
319
320 static const char * alternateHandlingC[] = {
321 "UCOL_NON_IGNORABLE",
322 "UCOL_SHIFTED"
323 };
324
325 static const char * caseLevelC[] = {
326 "UCOL_OFF",
327 "UCOL_ON"
328 };
329
330 /* not used currently - does not test only prints */
331 static void PrintMarkDavis(void)
332 {
333 UErrorCode status = U_ZERO_ERROR;
334 UChar m[256];
335 uint8_t sortkey[256];
336 UCollator *coll = ucol_open("en_US", &status);
337 uint32_t h,i,j,k, sortkeysize;
338 uint32_t sizem = 0;
339 char buffer[512];
340 uint32_t len = 512;
341
342 log_verbose("PrintMarkDavis");
343
344 u_uastrcpy(m, "Mark Davis");
345 sizem = u_strlen(m);
346
347
348 m[1] = 0xe4;
349
350 for(i = 0; i<sizem; i++) {
351 fprintf(stderr, "\\u%04X ", m[i]);
352 }
353 fprintf(stderr, "\n");
354
355 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
356 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
357 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
358
359 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
360 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
361 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
362
363 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
364 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
365 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
366
367 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
368 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
369 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
370 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
371 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
372 }
373
374 }
375
376 }
377
378 }
379 }
380 #endif
381
BillFairmanTest(void)382 static void BillFairmanTest(void) {
383 /*
384 ** check for actual locale via ICU resource bundles
385 **
386 ** lp points to the original locale ("fr_FR_....")
387 */
388
389 UResourceBundle *lr,*cr;
390 UErrorCode lec = U_ZERO_ERROR;
391 const char *lp = "fr_FR_you_ll_never_find_this_locale";
392
393 log_verbose("BillFairmanTest\n");
394
395 lr = ures_open(NULL,lp,&lec);
396 if (lr) {
397 cr = ures_getByKey(lr,"collations",0,&lec);
398 if (cr) {
399 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
400 if (lp) {
401 if (U_SUCCESS(lec)) {
402 if(strcmp(lp, "fr") != 0) {
403 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
404 }
405 }
406 }
407 ures_close(cr);
408 }
409 ures_close(lr);
410 }
411 }
412
testPrimary(UCollator * col,const UChar * p,const UChar * q)413 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
414 UChar source[256] = { '\0'};
415 UChar target[256] = { '\0'};
416 UChar preP = 0x31a3;
417 UChar preQ = 0x310d;
418 /*
419 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
420 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
421 */
422 /*log_verbose("Testing primary\n");*/
423
424 doTest(col, p, q, UCOL_LESS);
425 /*
426 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
427
428 if(result!=UCOL_LESS){
429 aescstrdup(p,utfSource,256);
430 aescstrdup(q,utfTarget,256);
431 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
432 }
433 */
434 source[0] = preP;
435 u_strcpy(source+1,p);
436 target[0] = preQ;
437 u_strcpy(target+1,q);
438 doTest(col, source, target, UCOL_LESS);
439 /*
440 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
441 */
442 }
443
testSecondary(UCollator * col,const UChar * p,const UChar * q)444 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
445 UChar source[256] = { '\0'};
446 UChar target[256] = { '\0'};
447
448 /*log_verbose("Testing secondary\n");*/
449
450 doTest(col, p, q, UCOL_LESS);
451 /*
452 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
453 */
454 source[0] = 0x0053;
455 u_strcpy(source+1,p);
456 target[0]= 0x0073;
457 u_strcpy(target+1,q);
458
459 doTest(col, source, target, UCOL_LESS);
460 /*
461 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
462 */
463
464
465 u_strcpy(source,p);
466 source[u_strlen(p)] = 0x62;
467 source[u_strlen(p)+1] = 0;
468
469
470 u_strcpy(target,q);
471 target[u_strlen(q)] = 0x61;
472 target[u_strlen(q)+1] = 0;
473
474 doTest(col, source, target, UCOL_GREATER);
475
476 /*
477 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
478 */
479 }
480
testTertiary(UCollator * col,const UChar * p,const UChar * q)481 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
482 UChar source[256] = { '\0'};
483 UChar target[256] = { '\0'};
484
485 /*log_verbose("Testing tertiary\n");*/
486
487 doTest(col, p, q, UCOL_LESS);
488 /*
489 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
490 */
491 source[0] = 0x0020;
492 u_strcpy(source+1,p);
493 target[0]= 0x002D;
494 u_strcpy(target+1,q);
495
496 doTest(col, source, target, UCOL_LESS);
497 /*
498 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
499 */
500
501 u_strcpy(source,p);
502 source[u_strlen(p)] = 0xE0;
503 source[u_strlen(p)+1] = 0;
504
505 u_strcpy(target,q);
506 target[u_strlen(q)] = 0x61;
507 target[u_strlen(q)+1] = 0;
508
509 doTest(col, source, target, UCOL_GREATER);
510
511 /*
512 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
513 */
514 }
515
testEquality(UCollator * col,const UChar * p,const UChar * q)516 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
517 /*
518 UChar source[256] = { '\0'};
519 UChar target[256] = { '\0'};
520 */
521
522 doTest(col, p, q, UCOL_EQUAL);
523 /*
524 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
525 */
526 }
527
testCollator(UCollator * coll,UErrorCode * status)528 static void testCollator(UCollator *coll, UErrorCode *status) {
529 const UChar *rules = NULL, *current = NULL;
530 int32_t ruleLen = 0;
531 uint32_t strength = 0;
532 uint32_t chOffset = 0; uint32_t chLen = 0;
533 uint32_t exOffset = 0; uint32_t exLen = 0;
534 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
535 uint32_t firstEx = 0;
536 /* uint32_t rExpsLen = 0; */
537 uint32_t firstLen = 0;
538 UBool varT = FALSE; UBool top_ = TRUE;
539 uint16_t specs = 0;
540 UBool startOfRules = TRUE;
541 UBool lastReset = FALSE;
542 UBool before = FALSE;
543 uint32_t beforeStrength = 0;
544 UColTokenParser src;
545 UColOptionSet opts;
546
547 UChar first[256];
548 UChar second[256];
549 UChar tempB[256];
550 uint32_t tempLen;
551 UChar *rulesCopy = NULL;
552 UParseError parseError;
553
554 src.opts = &opts;
555
556 rules = ucol_getRules(coll, &ruleLen);
557 if(U_SUCCESS(*status) && ruleLen > 0) {
558 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
559 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
560 src.current = src.source = rulesCopy;
561 src.end = rulesCopy+ruleLen;
562 src.extraCurrent = src.end;
563 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
564 *first = *second = 0;
565
566 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
567 strength = src.parsedToken.strength;
568 chOffset = src.parsedToken.charsOffset;
569 chLen = src.parsedToken.charsLen;
570 exOffset = src.parsedToken.extensionOffset;
571 exLen = src.parsedToken.extensionLen;
572 prefixOffset = src.parsedToken.prefixOffset;
573 prefixLen = src.parsedToken.prefixLen;
574 specs = src.parsedToken.flags;
575
576 startOfRules = FALSE;
577 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
578 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
579 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
580 second[0] = 0;
581 } else {
582 u_strncpy(second,rulesCopy+chOffset, chLen);
583 second[chLen] = 0;
584
585 if(exLen > 0 && firstEx == 0) {
586 u_strncat(first, rulesCopy+exOffset, exLen);
587 first[firstLen+exLen] = 0;
588 }
589
590 if(lastReset == TRUE && prefixLen != 0) {
591 u_strncpy(first+prefixLen, first, firstLen);
592 u_strncpy(first, rulesCopy+prefixOffset, prefixLen);
593 first[firstLen+prefixLen] = 0;
594 firstLen = firstLen+prefixLen;
595 }
596
597 if(before == TRUE) { /* swap first and second */
598 u_strcpy(tempB, first);
599 u_strcpy(first, second);
600 u_strcpy(second, tempB);
601
602 tempLen = firstLen;
603 firstLen = chLen;
604 chLen = tempLen;
605
606 tempLen = firstEx;
607 firstEx = exLen;
608 exLen = tempLen;
609 if(beforeStrength < strength) {
610 strength = beforeStrength;
611 }
612 }
613 }
614 lastReset = FALSE;
615
616 switch(strength){
617 case UCOL_IDENTICAL:
618 testEquality(coll,first,second);
619 break;
620 case UCOL_PRIMARY:
621 testPrimary(coll,first,second);
622 break;
623 case UCOL_SECONDARY:
624 testSecondary(coll,first,second);
625 break;
626 case UCOL_TERTIARY:
627 testTertiary(coll,first,second);
628 break;
629 case UCOL_TOK_RESET:
630 lastReset = TRUE;
631 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
632 if(before) {
633 beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
634 }
635 break;
636 default:
637 break;
638 }
639
640 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
641 before = FALSE;
642 } else {
643 firstLen = chLen;
644 firstEx = exLen;
645 u_strcpy(first, second);
646 }
647 }
648 free(rulesCopy);
649 }
650 }
651
ucaTest(void * collator,const int object,const UChar * source,const int sLen,const UChar * target,const int tLen)652 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
653 UCollator *UCA = (UCollator *)collator;
654 return ucol_strcoll(UCA, source, sLen, target, tLen);
655 }
656
657 /*
658 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
659 #ifdef U_WINDOWS
660 LCID lcid = (LCID)collator;
661 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
662 #else
663 return 0;
664 #endif
665 }
666 */
667
swampEarlier(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)668 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
669 UChar s1, UChar s2,
670 const UChar *s, const uint32_t sLen,
671 const UChar *t, const uint32_t tLen) {
672 UChar source[256] = {0};
673 UChar target[256] = {0};
674
675 source[0] = s1;
676 u_strcpy(source+1, s);
677 target[0] = s2;
678 u_strcpy(target+1, t);
679
680 return func(collator, opts, source, sLen+1, target, tLen+1);
681 }
682
swampLater(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)683 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
684 UChar s1, UChar s2,
685 const UChar *s, const uint32_t sLen,
686 const UChar *t, const uint32_t tLen) {
687 UChar source[256] = {0};
688 UChar target[256] = {0};
689
690 u_strcpy(source, s);
691 source[sLen] = s1;
692 u_strcpy(target, t);
693 target[tLen] = s2;
694
695 return func(collator, opts, source, sLen+1, target, tLen+1);
696 }
697
probeStrength(tst_strcoll * func,void * collator,int opts,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen,UCollationResult result)698 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
699 const UChar *s, const uint32_t sLen,
700 const UChar *t, const uint32_t tLen,
701 UCollationResult result) {
702 /*UChar fPrimary = 0x6d;*/
703 /*UChar sPrimary = 0x6e;*/
704 UChar fSecondary = 0x310d;
705 UChar sSecondary = 0x31a3;
706 UChar fTertiary = 0x310f;
707 UChar sTertiary = 0x31b7;
708
709 UCollationResult oposite;
710 if(result == UCOL_EQUAL) {
711 return UCOL_IDENTICAL;
712 } else if(result == UCOL_GREATER) {
713 oposite = UCOL_LESS;
714 } else {
715 oposite = UCOL_GREATER;
716 }
717
718 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
719 return UCOL_PRIMARY;
720 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
721 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
722 return UCOL_SECONDARY;
723 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
724 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
725 return UCOL_TERTIARY;
726 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
727 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
728 return UCOL_QUATERNARY;
729 } else {
730 return UCOL_IDENTICAL;
731 }
732 }
733
getRelationSymbol(UCollationResult res,uint32_t strength,char * buffer)734 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
735 uint32_t i = 0;
736
737 if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
738 buffer[0] = '=';
739 buffer[1] = '=';
740 buffer[2] = '\0';
741 } else if(res == UCOL_GREATER) {
742 for(i = 0; i<strength+1; i++) {
743 buffer[i] = '>';
744 }
745 buffer[strength+1] = '\0';
746 } else {
747 for(i = 0; i<strength+1; i++) {
748 buffer[i] = '<';
749 }
750 buffer[strength+1] = '\0';
751 }
752
753 return buffer;
754 }
755
756
757
logFailure(const char * platform,const char * test,const UChar * source,const uint32_t sLen,const UChar * target,const uint32_t tLen,UCollationResult realRes,uint32_t realStrength,UCollationResult expRes,uint32_t expStrength,UBool error)758 static void logFailure (const char *platform, const char *test,
759 const UChar *source, const uint32_t sLen,
760 const UChar *target, const uint32_t tLen,
761 UCollationResult realRes, uint32_t realStrength,
762 UCollationResult expRes, uint32_t expStrength, UBool error) {
763
764 uint32_t i = 0;
765
766 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
767 static int32_t maxOutputLength = 0;
768 int32_t outputLength;
769
770 *sEsc = *tEsc = *s = *t = 0;
771 if(error == TRUE) {
772 log_err("Difference between expected and generated order. Run test with -v for more info\n");
773 } else if(VERBOSITY == 0) {
774 return;
775 }
776 for(i = 0; i<sLen; i++) {
777 sprintf(b, "%04X", source[i]);
778 strcat(sEsc, "\\u");
779 strcat(sEsc, b);
780 strcat(s, b);
781 strcat(s, " ");
782 if(source[i] < 0x80) {
783 sprintf(b, "(%c)", source[i]);
784 strcat(sEsc, b);
785 }
786 }
787 for(i = 0; i<tLen; i++) {
788 sprintf(b, "%04X", target[i]);
789 strcat(tEsc, "\\u");
790 strcat(tEsc, b);
791 strcat(t, b);
792 strcat(t, " ");
793 if(target[i] < 0x80) {
794 sprintf(b, "(%c)", target[i]);
795 strcat(tEsc, b);
796 }
797 }
798 /*
799 strcpy(output, "[[ ");
800 strcat(output, sEsc);
801 strcat(output, getRelationSymbol(expRes, expStrength, relation));
802 strcat(output, tEsc);
803
804 strcat(output, " : ");
805
806 strcat(output, sEsc);
807 strcat(output, getRelationSymbol(realRes, realStrength, relation));
808 strcat(output, tEsc);
809 strcat(output, " ]] ");
810
811 log_verbose("%s", output);
812 */
813
814
815 strcpy(output, "DIFF: ");
816
817 strcat(output, s);
818 strcat(output, " : ");
819 strcat(output, t);
820
821 strcat(output, test);
822 strcat(output, ": ");
823
824 strcat(output, sEsc);
825 strcat(output, getRelationSymbol(expRes, expStrength, relation));
826 strcat(output, tEsc);
827
828 strcat(output, " ");
829
830 strcat(output, platform);
831 strcat(output, ": ");
832
833 strcat(output, sEsc);
834 strcat(output, getRelationSymbol(realRes, realStrength, relation));
835 strcat(output, tEsc);
836
837 outputLength = (int32_t)strlen(output);
838 if(outputLength > maxOutputLength) {
839 maxOutputLength = outputLength;
840 U_ASSERT(outputLength < sizeof(output));
841 }
842
843 log_verbose("%s\n", output);
844
845 }
846
847 /*
848 static void printOutRules(const UChar *rules) {
849 uint32_t len = u_strlen(rules);
850 uint32_t i = 0;
851 char toPrint;
852 uint32_t line = 0;
853
854 fprintf(stdout, "Rules:");
855
856 for(i = 0; i<len; i++) {
857 if(rules[i]<0x7f && rules[i]>=0x20) {
858 toPrint = (char)rules[i];
859 if(toPrint == '&') {
860 line = 1;
861 fprintf(stdout, "\n&");
862 } else if(toPrint == ';') {
863 fprintf(stdout, "<<");
864 line+=2;
865 } else if(toPrint == ',') {
866 fprintf(stdout, "<<<");
867 line+=3;
868 } else {
869 fprintf(stdout, "%c", toPrint);
870 line++;
871 }
872 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
873 fprintf(stdout, "\\u%04X", rules[i]);
874 line+=6;
875 }
876 if(line>72) {
877 fprintf(stdout, "\n");
878 line = 0;
879 }
880 }
881
882 log_verbose("\n");
883
884 }
885 */
886
testSwitch(tst_strcoll * func,void * collator,int opts,uint32_t strength,const UChar * first,const UChar * second,const char * msg,UBool error)887 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
888 uint32_t diffs = 0;
889 UCollationResult realResult;
890 uint32_t realStrength;
891
892 uint32_t sLen = u_strlen(first);
893 uint32_t tLen = u_strlen(second);
894
895 realResult = func(collator, opts, first, sLen, second, tLen);
896 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
897
898 if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
899 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
900 diffs++;
901 } else if(realResult != UCOL_LESS || realStrength != strength) {
902 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
903 diffs++;
904 }
905 return diffs;
906 }
907
908
testAgainstUCA(UCollator * coll,UCollator * UCA,const char * refName,UBool error,UErrorCode * status)909 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
910 const UChar *rules = NULL, *current = NULL;
911 int32_t ruleLen = 0;
912 uint32_t strength = 0;
913 uint32_t chOffset = 0; uint32_t chLen = 0;
914 uint32_t exOffset = 0; uint32_t exLen = 0;
915 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
916 /* uint32_t rExpsLen = 0; */
917 uint32_t firstLen = 0, secondLen = 0;
918 UBool varT = FALSE; UBool top_ = TRUE;
919 uint16_t specs = 0;
920 UBool startOfRules = TRUE;
921 UColTokenParser src;
922 UColOptionSet opts;
923
924 UChar first[256];
925 UChar second[256];
926 UChar *rulesCopy = NULL;
927
928 uint32_t UCAdiff = 0;
929 uint32_t Windiff = 1;
930 UParseError parseError;
931
932 src.opts = &opts;
933
934 rules = ucol_getRules(coll, &ruleLen);
935
936 /*printOutRules(rules);*/
937
938 if(U_SUCCESS(*status) && ruleLen > 0) {
939 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
940 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
941 src.current = src.source = rulesCopy;
942 src.end = rulesCopy+ruleLen;
943 src.extraCurrent = src.end;
944 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
945 *first = *second = 0;
946
947 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
948 strength = src.parsedToken.strength;
949 chOffset = src.parsedToken.charsOffset;
950 chLen = src.parsedToken.charsLen;
951 exOffset = src.parsedToken.extensionOffset;
952 exLen = src.parsedToken.extensionLen;
953 prefixOffset = src.parsedToken.prefixOffset;
954 prefixLen = src.parsedToken.prefixLen;
955 specs = src.parsedToken.flags;
956
957 startOfRules = FALSE;
958 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
959 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
960
961 u_strncpy(second,rulesCopy+chOffset, chLen);
962 second[chLen] = 0;
963 secondLen = chLen;
964
965 if(exLen > 0) {
966 u_strncat(first, rulesCopy+exOffset, exLen);
967 first[firstLen+exLen] = 0;
968 firstLen += exLen;
969 }
970
971 if(strength != UCOL_TOK_RESET) {
972 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
973 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
974 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
975 }
976 }
977
978
979 firstLen = chLen;
980 u_strcpy(first, second);
981
982 }
983 if(UCAdiff != 0 && Windiff != 0) {
984 log_verbose("\n");
985 }
986 if(UCAdiff == 0) {
987 log_verbose("No immediate difference with %s!\n", refName);
988 }
989 if(Windiff == 0) {
990 log_verbose("No immediate difference with Win32!\n");
991 }
992 free(rulesCopy);
993 }
994 }
995
996 /*
997 * Takes two CEs (lead and continuation) and
998 * compares them as CEs should be compared:
999 * primary vs. primary, secondary vs. secondary
1000 * tertiary vs. tertiary
1001 */
compareCEs(uint32_t s1,uint32_t s2,uint32_t t1,uint32_t t2)1002 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1003 uint32_t t1, uint32_t t2) {
1004 uint32_t s = 0, t = 0;
1005 if(s1 == t1 && s2 == t2) {
1006 return 0;
1007 }
1008 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1009 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1010 if(s < t) {
1011 return -1;
1012 } else if(s > t) {
1013 return 1;
1014 } else {
1015 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1016 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1017 if(s < t) {
1018 return -1;
1019 } else if(s > t) {
1020 return 1;
1021 } else {
1022 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1023 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1024 if(s < t) {
1025 return -1;
1026 } else {
1027 return 1;
1028 }
1029 }
1030 }
1031 }
1032
1033 typedef struct {
1034 uint32_t startCE;
1035 uint32_t startContCE;
1036 uint32_t limitCE;
1037 uint32_t limitContCE;
1038 } indirectBoundaries;
1039
1040 /* these values are used for finding CE values for indirect positioning. */
1041 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1042 /* values. It only works for resets and you cannot tailor indirect names */
1043 /* An indirect name can define either an anchor point or a range. An */
1044 /* anchor point behaves in exactly the same way as a code point in reset */
1045 /* would, except that it cannot be tailored. A range (we currently only */
1046 /* know for the [top] range will explicitly set the upper bound for */
1047 /* generated CEs, thus allowing for better control over how many CEs can */
1048 /* be squeezed between in the range without performance penalty. */
1049 /* In that respect, we use [top] for tailoring of locales that use CJK */
1050 /* characters. Other indirect values are currently a pure convenience, */
1051 /* they can be used to assure that the CEs will be always positioned in */
1052 /* the same place relative to a point with known properties (e.g. first */
1053 /* primary ignorable). */
1054 static indirectBoundaries ucolIndirectBoundaries[15];
1055 static UBool indirectBoundariesSet = FALSE;
setIndirectBoundaries(uint32_t indexR,uint32_t * start,uint32_t * end)1056 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1057 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1058 /* to initalize here. */
1059 ucolIndirectBoundaries[indexR].startCE = start[0];
1060 ucolIndirectBoundaries[indexR].startContCE = start[1];
1061 if(end) {
1062 ucolIndirectBoundaries[indexR].limitCE = end[0];
1063 ucolIndirectBoundaries[indexR].limitContCE = end[1];
1064 } else {
1065 ucolIndirectBoundaries[indexR].limitCE = 0;
1066 ucolIndirectBoundaries[indexR].limitContCE = 0;
1067 }
1068 }
1069
testCEs(UCollator * coll,UErrorCode * status)1070 static void testCEs(UCollator *coll, UErrorCode *status) {
1071 const UChar *rules = NULL, *current = NULL;
1072 int32_t ruleLen = 0;
1073
1074 uint32_t strength = 0;
1075 uint32_t maxStrength = UCOL_IDENTICAL;
1076 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1077 uint32_t lastCE;
1078 uint32_t lastContCE;
1079
1080 int32_t result = 0;
1081 uint32_t chOffset = 0; uint32_t chLen = 0;
1082 uint32_t exOffset = 0; uint32_t exLen = 0;
1083 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1084 uint32_t oldOffset = 0;
1085
1086 /* uint32_t rExpsLen = 0; */
1087 /* uint32_t firstLen = 0; */
1088 uint16_t specs = 0;
1089 UBool varT = FALSE; UBool top_ = TRUE;
1090 UBool startOfRules = TRUE;
1091 UBool before = FALSE;
1092 UColTokenParser src;
1093 UColOptionSet opts;
1094 UParseError parseError;
1095 UChar *rulesCopy = NULL;
1096 collIterate c;
1097 UCAConstants *consts = NULL;
1098 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1099 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1100 const char *colLoc;
1101 UCollator *UCA = ucol_open("root", status);
1102
1103 if (U_FAILURE(*status)) {
1104 log_err("Could not open root collator %s\n", u_errorName(*status));
1105 return;
1106 }
1107
1108 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1109 if (U_FAILURE(*status)) {
1110 log_err("Could not get collator name: %s\n", u_errorName(*status));
1111 return;
1112 }
1113
1114 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1115 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1116 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1117 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1118 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1119
1120 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1121
1122 src.opts = &opts;
1123
1124 rules = ucol_getRules(coll, &ruleLen);
1125
1126 src.invUCA = ucol_initInverseUCA(status);
1127
1128 if(indirectBoundariesSet == FALSE) {
1129 /* UCOL_RESET_TOP_VALUE */
1130 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1131 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1132 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1133 /* UCOL_LAST_PRIMARY_IGNORABLE */
1134 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1135 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1136 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1137 /* UCOL_LAST_SECONDARY_IGNORABLE */
1138 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1139 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1140 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1141 /* UCOL_LAST_TERTIARY_IGNORABLE */
1142 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1143 /* UCOL_FIRST_VARIABLE */
1144 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1145 /* UCOL_LAST_VARIABLE */
1146 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1147 /* UCOL_FIRST_NON_VARIABLE */
1148 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1149 /* UCOL_LAST_NON_VARIABLE */
1150 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1151 /* UCOL_FIRST_IMPLICIT */
1152 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1153 /* UCOL_LAST_IMPLICIT */
1154 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1155 /* UCOL_FIRST_TRAILING */
1156 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1157 /* UCOL_LAST_TRAILING */
1158 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1159 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1160 indirectBoundariesSet = TRUE;
1161 }
1162
1163
1164 if(U_SUCCESS(*status) && ruleLen > 0) {
1165 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1166 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1167 src.current = src.source = rulesCopy;
1168 src.end = rulesCopy+ruleLen;
1169 src.extraCurrent = src.end;
1170 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1171
1172 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1173 strength = src.parsedToken.strength;
1174 chOffset = src.parsedToken.charsOffset;
1175 chLen = src.parsedToken.charsLen;
1176 exOffset = src.parsedToken.extensionOffset;
1177 exLen = src.parsedToken.extensionLen;
1178 prefixOffset = src.parsedToken.prefixOffset;
1179 prefixLen = src.parsedToken.prefixLen;
1180 specs = src.parsedToken.flags;
1181
1182 startOfRules = FALSE;
1183 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1184 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1185
1186 uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
1187
1188 currCE = ucol_getNextCE(coll, &c, status);
1189 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
1190 log_verbose("Thai prevowel detected. Will pick next CE\n");
1191 currCE = ucol_getNextCE(coll, &c, status);
1192 }
1193
1194 currContCE = ucol_getNextCE(coll, &c, status);
1195 if(!isContinuation(currContCE)) {
1196 currContCE = 0;
1197 }
1198
1199 /* we need to repack CEs here */
1200
1201 if(strength == UCOL_TOK_RESET) {
1202 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1203 if(top_ == TRUE) {
1204 int32_t index = src.parsedToken.indirectIndex;
1205
1206 nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
1207 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
1208 } else {
1209 nextCE = baseCE = currCE;
1210 nextContCE = baseContCE = currContCE;
1211 }
1212 maxStrength = UCOL_IDENTICAL;
1213 } else {
1214 if(strength < maxStrength) {
1215 maxStrength = strength;
1216 if(baseCE == UCOL_RESET_TOP_VALUE) {
1217 log_verbose("Resetting to [top]\n");
1218 nextCE = UCOL_NEXT_TOP_VALUE;
1219 nextContCE = UCOL_NEXT_TOP_CONT;
1220 } else {
1221 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1222 }
1223 if(result < 0) {
1224 if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
1225 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
1226 return;
1227 } else {
1228 log_err("%s: couldn't find the CE\n", colLoc);
1229 return;
1230 }
1231 }
1232 }
1233
1234 currCE &= 0xFFFFFF3F;
1235 currContCE &= 0xFFFFFFBF;
1236
1237 if(maxStrength == UCOL_IDENTICAL) {
1238 if(baseCE != currCE || baseContCE != currContCE) {
1239 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1240 }
1241 } else {
1242 if(strength == UCOL_IDENTICAL) {
1243 if(lastCE != currCE || lastContCE != currContCE) {
1244 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1245 }
1246 } else {
1247 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1248 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1249 log_err("%s: current CE is not less than base CE\n", colLoc);
1250 }
1251 if(!before) {
1252 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1253 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1254 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1255 }
1256 } else {
1257 before = FALSE;
1258 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1259 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1260 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1261 }
1262 }
1263 }
1264 }
1265
1266 }
1267
1268 oldOffset = chOffset;
1269 lastCE = currCE & 0xFFFFFF3F;
1270 lastContCE = currContCE & 0xFFFFFFBF;
1271 }
1272 free(rulesCopy);
1273 }
1274 ucol_close(UCA);
1275 }
1276
1277 #if 0
1278 /* these locales are now picked from index RB */
1279 static const char* localesToTest[] = {
1280 "ar", "bg", "ca", "cs", "da",
1281 "el", "en_BE", "en_US_POSIX",
1282 "es", "et", "fi", "fr", "hi",
1283 "hr", "hu", "is", "iw", "ja",
1284 "ko", "lt", "lv", "mk", "mt",
1285 "nb", "nn", "nn_NO", "pl", "ro",
1286 "ru", "sh", "sk", "sl", "sq",
1287 "sr", "sv", "th", "tr", "uk",
1288 "vi", "zh", "zh_TW"
1289 };
1290 #endif
1291
1292 static const char* rulesToTest[] = {
1293 /* Funky fa rule */
1294 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1295 /*"& Z < p, P",*/
1296 /* Cui Mins rules */
1297 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1298 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1299 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1300 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1301 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1302 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1303 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1304 };
1305
1306
TestCollations(void)1307 static void TestCollations(void) {
1308 int32_t noOfLoc = uloc_countAvailable();
1309 int32_t i = 0, j = 0;
1310
1311 UErrorCode status = U_ZERO_ERROR;
1312 char cName[256];
1313 UChar name[256];
1314 int32_t nameSize;
1315
1316
1317 const char *locName = NULL;
1318 UCollator *coll = NULL;
1319 UCollator *UCA = ucol_open("", &status);
1320 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1321 if (U_FAILURE(status)) {
1322 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1323 return;
1324 }
1325 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1326
1327 for(i = 0; i<noOfLoc; i++) {
1328 status = U_ZERO_ERROR;
1329 locName = uloc_getAvailable(i);
1330 if(uprv_strcmp("ja", locName) == 0) {
1331 log_verbose("Don't know how to test prefixes\n");
1332 continue;
1333 }
1334 if(hasCollationElements(locName)) {
1335 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1336 for(j = 0; j<nameSize; j++) {
1337 cName[j] = (char)name[j];
1338 }
1339 cName[nameSize] = 0;
1340 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1341 coll = ucol_open(locName, &status);
1342 if(U_SUCCESS(status)) {
1343 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1344 ucol_close(coll);
1345 } else {
1346 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1347 status = U_ZERO_ERROR;
1348 }
1349 }
1350 }
1351 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1352 ucol_close(UCA);
1353 }
1354
RamsRulesTest(void)1355 static void RamsRulesTest(void) {
1356 UErrorCode status = U_ZERO_ERROR;
1357 int32_t i = 0;
1358 UCollator *coll = NULL;
1359 UChar rule[2048];
1360 uint32_t ruleLen;
1361 int32_t noOfLoc = uloc_countAvailable();
1362 const char *locName = NULL;
1363
1364 log_verbose("RamsRulesTest\n");
1365
1366 for(i = 0; i<noOfLoc; i++) {
1367 status = U_ZERO_ERROR;
1368 locName = uloc_getAvailable(i);
1369 if(hasCollationElements(locName)) {
1370 if (uprv_strcmp("ja", locName)==0) {
1371 log_verbose("Don't know how to test Japanese because of prefixes\n");
1372 continue;
1373 }
1374 if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1375 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1376 continue;
1377 }
1378 if (uprv_strcmp("km", locName)==0 ||
1379 uprv_strcmp("km_KH", locName)==0 ||
1380 uprv_strcmp("si", locName)==0 ||
1381 uprv_strcmp("si_LK", locName)==0 ||
1382 uprv_strcmp("zh", locName)==0 ||
1383 uprv_strcmp("zh_Hant", locName)==0 ) {
1384 continue; /* TODO: enable these locale tests after trac#6040 is fixed. */
1385 }
1386 log_verbose("Testing locale %s\n", locName);
1387 coll = ucol_open(locName, &status);
1388 if(U_SUCCESS(status)) {
1389 if(coll->image->jamoSpecial == TRUE) {
1390 log_err("%s has special JAMOs\n", locName);
1391 }
1392 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1393 testCollator(coll, &status);
1394 testCEs(coll, &status);
1395 ucol_close(coll);
1396 }
1397 }
1398 }
1399
1400 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1401 log_verbose("Testing rule: %s\n", rulesToTest[i]);
1402 ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1403 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1404 if(U_SUCCESS(status)) {
1405 testCollator(coll, &status);
1406 testCEs(coll, &status);
1407 ucol_close(coll);
1408 }
1409 }
1410
1411 }
1412
IsTailoredTest(void)1413 static void IsTailoredTest(void) {
1414 UErrorCode status = U_ZERO_ERROR;
1415 uint32_t i = 0;
1416 UCollator *coll = NULL;
1417 UChar rule[2048];
1418 UChar tailored[2048];
1419 UChar notTailored[2048];
1420 uint32_t ruleLen, tailoredLen, notTailoredLen;
1421
1422 log_verbose("IsTailoredTest\n");
1423
1424 u_uastrcpy(rule, "&Z < A, B, C;c < d");
1425 ruleLen = u_strlen(rule);
1426
1427 u_uastrcpy(tailored, "ABCcd");
1428 tailoredLen = u_strlen(tailored);
1429
1430 u_uastrcpy(notTailored, "ZabD");
1431 notTailoredLen = u_strlen(notTailored);
1432
1433 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1434 if(U_SUCCESS(status)) {
1435 for(i = 0; i<tailoredLen; i++) {
1436 if(!ucol_isTailored(coll, tailored[i], &status)) {
1437 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1438 }
1439 }
1440 for(i = 0; i<notTailoredLen; i++) {
1441 if(ucol_isTailored(coll, notTailored[i], &status)) {
1442 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1443 }
1444 }
1445 ucol_close(coll);
1446 }
1447 else {
1448 log_err_status(status, "Can't tailor rules\n");
1449 }
1450 /* Code coverage */
1451 status = U_ZERO_ERROR;
1452 coll = ucol_open("ja", &status);
1453 if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1454 log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1455 }
1456 ucol_close(coll);
1457 }
1458
1459
1460 const static char chTest[][20] = {
1461 "c",
1462 "C",
1463 "ca", "cb", "cx", "cy", "CZ",
1464 "c\\u030C", "C\\u030C",
1465 "h",
1466 "H",
1467 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1468 "ch", "cH", "Ch", "CH",
1469 "cha", "charly", "che", "chh", "chch", "chr",
1470 "i", "I", "iarly",
1471 "r", "R",
1472 "r\\u030C", "R\\u030C",
1473 "s",
1474 "S",
1475 "s\\u030C", "S\\u030C",
1476 "z", "Z",
1477 "z\\u030C", "Z\\u030C"
1478 };
1479
TestChMove(void)1480 static void TestChMove(void) {
1481 UChar t1[256] = {0};
1482 UChar t2[256] = {0};
1483
1484 uint32_t i = 0, j = 0;
1485 uint32_t size = 0;
1486 UErrorCode status = U_ZERO_ERROR;
1487
1488 UCollator *coll = ucol_open("cs", &status);
1489
1490 if(U_SUCCESS(status)) {
1491 size = sizeof(chTest)/sizeof(chTest[0]);
1492 for(i = 0; i < size-1; i++) {
1493 for(j = i+1; j < size; j++) {
1494 u_unescape(chTest[i], t1, 256);
1495 u_unescape(chTest[j], t2, 256);
1496 doTest(coll, t1, t2, UCOL_LESS);
1497 }
1498 }
1499 }
1500 else {
1501 log_err("Can't open collator");
1502 }
1503 ucol_close(coll);
1504 }
1505
1506
1507
1508
1509 const static char impTest[][20] = {
1510 "\\u4e00",
1511 "a",
1512 "A",
1513 "b",
1514 "B",
1515 "\\u4e01"
1516 };
1517
1518
TestImplicitTailoring(void)1519 static void TestImplicitTailoring(void) {
1520 static const struct {
1521 const char *rules;
1522 const char *data[10];
1523 const uint32_t len;
1524 } tests[] = {
1525 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1526 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1527 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1528 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1529 };
1530
1531 int32_t i = 0;
1532
1533 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1534 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1535 }
1536
1537 /*
1538 UChar t1[256] = {0};
1539 UChar t2[256] = {0};
1540
1541 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1542
1543 uint32_t i = 0, j = 0;
1544 uint32_t size = 0;
1545 uint32_t ruleLen = 0;
1546 UErrorCode status = U_ZERO_ERROR;
1547 UCollator *coll = NULL;
1548 ruleLen = u_unescape(rule, t1, 256);
1549
1550 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1551
1552 if(U_SUCCESS(status)) {
1553 size = sizeof(impTest)/sizeof(impTest[0]);
1554 for(i = 0; i < size-1; i++) {
1555 for(j = i+1; j < size; j++) {
1556 u_unescape(impTest[i], t1, 256);
1557 u_unescape(impTest[j], t2, 256);
1558 doTest(coll, t1, t2, UCOL_LESS);
1559 }
1560 }
1561 }
1562 else {
1563 log_err("Can't open collator");
1564 }
1565 ucol_close(coll);
1566 */
1567 }
1568
TestFCDProblem(void)1569 static void TestFCDProblem(void) {
1570 UChar t1[256] = {0};
1571 UChar t2[256] = {0};
1572
1573 const char *s1 = "\\u0430\\u0306\\u0325";
1574 const char *s2 = "\\u04D1\\u0325";
1575
1576 UErrorCode status = U_ZERO_ERROR;
1577 UCollator *coll = ucol_open("", &status);
1578 u_unescape(s1, t1, 256);
1579 u_unescape(s2, t2, 256);
1580
1581 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1582 doTest(coll, t1, t2, UCOL_EQUAL);
1583
1584 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1585 doTest(coll, t1, t2, UCOL_EQUAL);
1586
1587 ucol_close(coll);
1588 }
1589
1590 /*
1591 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1592 We're only using NFC/NFD in this test.
1593 */
1594 #define NORM_BUFFER_TEST_LEN 18
1595 typedef struct {
1596 UChar32 u;
1597 UChar NFC[NORM_BUFFER_TEST_LEN];
1598 UChar NFD[NORM_BUFFER_TEST_LEN];
1599 } tester;
1600
TestComposeDecompose(void)1601 static void TestComposeDecompose(void) {
1602 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1603 static const UChar UNICODESET_STR[] = {
1604 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1605 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1606 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1607 };
1608 int32_t noOfLoc;
1609 int32_t i = 0, j = 0;
1610
1611 UErrorCode status = U_ZERO_ERROR;
1612 const char *locName = NULL;
1613 uint32_t nfcSize;
1614 uint32_t nfdSize;
1615 tester **t;
1616 uint32_t noCases = 0;
1617 UCollator *coll = NULL;
1618 UChar32 u = 0;
1619 UChar comp[NORM_BUFFER_TEST_LEN];
1620 uint32_t len = 0;
1621 UCollationElements *iter;
1622 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1623 int32_t charsToTestSize;
1624
1625 noOfLoc = uloc_countAvailable();
1626
1627 coll = ucol_open("", &status);
1628 if (U_FAILURE(status)) {
1629 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1630 return;
1631 }
1632 charsToTestSize = uset_size(charsToTest);
1633 if (charsToTestSize <= 0) {
1634 log_err("Set was zero. Missing data?\n");
1635 return;
1636 }
1637 t = malloc(charsToTestSize * sizeof(tester *));
1638 t[0] = (tester *)malloc(sizeof(tester));
1639 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1640
1641 for(u = 0; u < charsToTestSize; u++) {
1642 UChar32 ch = uset_charAt(charsToTest, u);
1643 len = 0;
1644 UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
1645 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1646 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1647
1648 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1649 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1650 t[noCases]->u = ch;
1651 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1652 u_strncpy(t[noCases]->NFC, comp, len);
1653 t[noCases]->NFC[len] = 0;
1654 }
1655 noCases++;
1656 t[noCases] = (tester *)malloc(sizeof(tester));
1657 uprv_memset(t[noCases], 0, sizeof(tester));
1658 }
1659 }
1660 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1661 uset_close(charsToTest);
1662 charsToTest = NULL;
1663
1664 for(u=0; u<(UChar32)noCases; u++) {
1665 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1666 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1667 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1668 }
1669 }
1670 /*
1671 for(u = 0; u < charsToTestSize; u++) {
1672 if(!(u&0xFFFF)) {
1673 log_verbose("%08X ", u);
1674 }
1675 uprv_memset(t[noCases], 0, sizeof(tester));
1676 t[noCases]->u = u;
1677 len = 0;
1678 UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1679 comp[len] = 0;
1680 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1681 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1682 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1683 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1684 }
1685 */
1686
1687 ucol_close(coll);
1688
1689 log_verbose("Testing locales, number of cases = %i\n", noCases);
1690 for(i = 0; i<noOfLoc; i++) {
1691 status = U_ZERO_ERROR;
1692 locName = uloc_getAvailable(i);
1693 if(hasCollationElements(locName)) {
1694 char cName[256];
1695 UChar name[256];
1696 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1697
1698 for(j = 0; j<nameSize; j++) {
1699 cName[j] = (char)name[j];
1700 }
1701 cName[nameSize] = 0;
1702 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1703
1704 coll = ucol_open(locName, &status);
1705 ucol_setStrength(coll, UCOL_IDENTICAL);
1706 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1707
1708 for(u=0; u<(UChar32)noCases; u++) {
1709 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1710 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1711 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1712 log_verbose("Testing NFC\n");
1713 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1714 backAndForth(iter);
1715 log_verbose("Testing NFD\n");
1716 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1717 backAndForth(iter);
1718 }
1719 }
1720 ucol_closeElements(iter);
1721 ucol_close(coll);
1722 }
1723 }
1724 for(u = 0; u <= (UChar32)noCases; u++) {
1725 free(t[u]);
1726 }
1727 free(t);
1728 }
1729
TestEmptyRule(void)1730 static void TestEmptyRule(void) {
1731 UErrorCode status = U_ZERO_ERROR;
1732 UChar rulez[] = { 0 };
1733 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1734
1735 ucol_close(coll);
1736 }
1737
TestUCARules(void)1738 static void TestUCARules(void) {
1739 UErrorCode status = U_ZERO_ERROR;
1740 UChar b[256];
1741 UChar *rules = b;
1742 uint32_t ruleLen = 0;
1743 UCollator *UCAfromRules = NULL;
1744 UCollator *coll = ucol_open("", &status);
1745 if(status == U_FILE_ACCESS_ERROR) {
1746 log_data_err("Is your data around?\n");
1747 return;
1748 } else if(U_FAILURE(status)) {
1749 log_err("Error opening collator\n");
1750 return;
1751 }
1752 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1753
1754 log_verbose("TestUCARules\n");
1755 if(ruleLen > 256) {
1756 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1757 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1758 }
1759 log_verbose("Rules length is %d\n", ruleLen);
1760 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1761 if(U_SUCCESS(status)) {
1762 ucol_close(UCAfromRules);
1763 } else {
1764 log_verbose("Unable to create a collator from UCARules!\n");
1765 }
1766 /*
1767 u_unescape(blah, b, 256);
1768 ucol_getSortKey(coll, b, 1, res, 256);
1769 */
1770 ucol_close(coll);
1771 if(rules != b) {
1772 free(rules);
1773 }
1774 }
1775
1776
1777 /* Pinyin tonal order */
1778 /*
1779 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1780 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1781 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1782 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1783 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1784 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1785 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1786 .. (\u00fc)
1787
1788 However, in testing we got the following order:
1789 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1790 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1791 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1792 .. (\u0113)
1793 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1794 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1795 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1796 .. (\u01d8)
1797 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1798 */
1799
TestBefore(void)1800 static void TestBefore(void) {
1801 const static char *data[] = {
1802 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1803 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1804 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1805 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1806 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1807 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1808 };
1809 genericRulesStarter(
1810 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1811 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1812 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1813 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1814 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1815 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1816 data, sizeof(data)/sizeof(data[0]));
1817 }
1818
1819 #if 0
1820 /* superceded by TestBeforePinyin */
1821 static void TestJ784(void) {
1822 const static char *data[] = {
1823 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1824 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1825 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1826 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1827 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1828 "\\u00fc",
1829 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1830 };
1831 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1832 }
1833 #endif
1834
1835 #if 0
1836 /* superceded by the changes to the lv locale */
1837 static void TestJ831(void) {
1838 const static char *data[] = {
1839 "I",
1840 "i",
1841 "Y",
1842 "y"
1843 };
1844 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1845 }
1846 #endif
1847
TestJ815(void)1848 static void TestJ815(void) {
1849 const static char *data[] = {
1850 "aa",
1851 "Aa",
1852 "ab",
1853 "Ab",
1854 "ad",
1855 "Ad",
1856 "ae",
1857 "Ae",
1858 "\\u00e6",
1859 "\\u00c6",
1860 "af",
1861 "Af",
1862 "b",
1863 "B"
1864 };
1865 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1866 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1867 }
1868
1869
1870 /*
1871 "& a < b < c < d& r < c", "& a < b < d& r < c",
1872 "& a < b < c < d& c < m", "& a < b < c < m < d",
1873 "& a < b < c < d& a < m", "& a < m < b < c < d",
1874 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1875 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1876 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1877 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1878 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1879 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1880 */
TestRedundantRules(void)1881 static void TestRedundantRules(void) {
1882 int32_t i;
1883
1884 static const struct {
1885 const char *rules;
1886 const char *expectedRules;
1887 const char *testdata[8];
1888 uint32_t testdatalen;
1889 } tests[] = {
1890 /* this test conflicts with positioning of CODAN placeholder */
1891 /*{
1892 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1893 "&\\u2089<<<x",
1894 {"\\u2089", "x"}, 2
1895 }, */
1896 /* this test conflicts with the [before x] syntax tightening */
1897 /*{
1898 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1899 "&\\u0252<<<x",
1900 {"\\u0252", "x"}, 2
1901 }, */
1902 /* this test conflicts with the [before x] syntax tightening */
1903 /*{
1904 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1905 "& a <<< x < b <<< c << d <<< e",
1906 {"a", "x", "b", "c", "d", "e"}, 6
1907 }, */
1908 {
1909 "& a < b < c < d& [before 1] c < m",
1910 "& a < b < m < c < d",
1911 {"a", "b", "m", "c", "d"}, 5
1912 },
1913 {
1914 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1915 "& a < b <<< c << d <<< x <<< e",
1916 {"a", "b", "c", "d", "x", "e"}, 6
1917 },
1918 /* this test conflicts with the [before x] syntax tightening */
1919 /* {
1920 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1921 "& a < b <<< c <<< x << d <<< e",
1922 {"a", "b", "c", "x", "d", "e"},, 6
1923 }, */
1924 {
1925 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1926 "& a < b <<< c << d <<< e <<< f < x < g",
1927 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1928 },
1929 {
1930 "& a <<< b << c < d& a < m",
1931 "& a <<< b << c < m < d",
1932 {"a", "b", "c", "m", "d"}, 5
1933 },
1934 {
1935 "&a<b<<b\\u0301 &z<b",
1936 "&a<b\\u0301 &z<b",
1937 {"a", "b\\u0301", "z", "b"}, 4
1938 },
1939 {
1940 "&z<m<<<q<<<m",
1941 "&z<q<<<m",
1942 {"z", "q", "m"},3
1943 },
1944 {
1945 "&z<<<m<q<<<m",
1946 "&z<q<<<m",
1947 {"z", "q", "m"}, 3
1948 },
1949 {
1950 "& a < b < c < d& r < c",
1951 "& a < b < d& r < c",
1952 {"a", "b", "d"}, 3
1953 },
1954 {
1955 "& a < b < c < d& r < c",
1956 "& a < b < d& r < c",
1957 {"r", "c"}, 2
1958 },
1959 {
1960 "& a < b < c < d& c < m",
1961 "& a < b < c < m < d",
1962 {"a", "b", "c", "m", "d"}, 5
1963 },
1964 {
1965 "& a < b < c < d& a < m",
1966 "& a < m < b < c < d",
1967 {"a", "m", "b", "c", "d"}, 5
1968 }
1969 };
1970
1971
1972 UCollator *credundant = NULL;
1973 UCollator *cresulting = NULL;
1974 UErrorCode status = U_ZERO_ERROR;
1975 UChar rlz[2048] = { 0 };
1976 uint32_t rlen = 0;
1977
1978 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
1979 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
1980 rlen = u_unescape(tests[i].rules, rlz, 2048);
1981
1982 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1983 if(status == U_FILE_ACCESS_ERROR) {
1984 log_data_err("Is your data around?\n");
1985 return;
1986 } else if(U_FAILURE(status)) {
1987 log_err("Error opening collator\n");
1988 return;
1989 }
1990
1991 rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
1992 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
1993
1994 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
1995
1996 ucol_close(credundant);
1997 ucol_close(cresulting);
1998
1999 log_verbose("testing using data\n");
2000
2001 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2002 }
2003
2004 }
2005
TestExpansionSyntax(void)2006 static void TestExpansionSyntax(void) {
2007 int32_t i;
2008
2009 const static char *rules[] = {
2010 "&AE <<< a << b <<< c &d <<< f",
2011 "&AE <<< a <<< b << c << d < e < f <<< g",
2012 "&AE <<< B <<< C / D <<< F"
2013 };
2014
2015 const static char *expectedRules[] = {
2016 "&A <<< a / E << b / E <<< c /E &d <<< f",
2017 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2018 "&A <<< B / E <<< C / ED <<< F / E"
2019 };
2020
2021 const static char *testdata[][8] = {
2022 {"AE", "a", "b", "c"},
2023 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2024 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2025 };
2026
2027 const static uint32_t testdatalen[] = {
2028 4,
2029 8,
2030 3
2031 };
2032
2033
2034
2035 UCollator *credundant = NULL;
2036 UCollator *cresulting = NULL;
2037 UErrorCode status = U_ZERO_ERROR;
2038 UChar rlz[2048] = { 0 };
2039 uint32_t rlen = 0;
2040
2041 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2042 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2043 rlen = u_unescape(rules[i], rlz, 2048);
2044
2045 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2046 if(status == U_FILE_ACCESS_ERROR) {
2047 log_data_err("Is your data around?\n");
2048 return;
2049 } else if(U_FAILURE(status)) {
2050 log_err("Error opening collator\n");
2051 return;
2052 }
2053 rlen = u_unescape(expectedRules[i], rlz, 2048);
2054 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2055
2056 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2057 /* as a hard error test, but only in information mode */
2058 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2059
2060 ucol_close(credundant);
2061 ucol_close(cresulting);
2062
2063 log_verbose("testing using data\n");
2064
2065 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2066 }
2067 }
2068
TestCase(void)2069 static void TestCase(void)
2070 {
2071 const static UChar gRules[MAX_TOKEN_LEN] =
2072 /*" & 0 < 1,\u2461<a,A"*/
2073 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2074
2075 const static UChar testCase[][MAX_TOKEN_LEN] =
2076 {
2077 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2078 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2079 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2080 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2081 };
2082
2083 const static UCollationResult caseTestResults[][9] =
2084 {
2085 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2086 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2087 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2088 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2089 };
2090
2091 const static UColAttributeValue caseTestAttributes[][2] =
2092 {
2093 { UCOL_LOWER_FIRST, UCOL_OFF},
2094 { UCOL_UPPER_FIRST, UCOL_OFF},
2095 { UCOL_LOWER_FIRST, UCOL_ON},
2096 { UCOL_UPPER_FIRST, UCOL_ON}
2097 };
2098 int32_t i,j,k;
2099 UErrorCode status = U_ZERO_ERROR;
2100 UCollationElements *iter;
2101 UCollator *myCollation;
2102 myCollation = ucol_open("en_US", &status);
2103
2104 if(U_FAILURE(status)){
2105 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2106 return;
2107 }
2108 log_verbose("Testing different case settings\n");
2109 ucol_setStrength(myCollation, UCOL_TERTIARY);
2110
2111 for(k = 0; k<4; k++) {
2112 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2113 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2114 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2115 for (i = 0; i < 3 ; i++) {
2116 for(j = i+1; j<4; j++) {
2117 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2118 }
2119 }
2120 }
2121 ucol_close(myCollation);
2122
2123 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2124 if(U_FAILURE(status)){
2125 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2126 return;
2127 }
2128 log_verbose("Testing different case settings with custom rules\n");
2129 ucol_setStrength(myCollation, UCOL_TERTIARY);
2130
2131 for(k = 0; k<4; k++) {
2132 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2133 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2134 for (i = 0; i < 3 ; i++) {
2135 for(j = i+1; j<4; j++) {
2136 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2137 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2138 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2139 backAndForth(iter);
2140 ucol_closeElements(iter);
2141 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2142 backAndForth(iter);
2143 ucol_closeElements(iter);
2144 }
2145 }
2146 }
2147 ucol_close(myCollation);
2148 {
2149 const static char *lowerFirst[] = {
2150 "h",
2151 "H",
2152 "ch",
2153 "Ch",
2154 "CH",
2155 "cha",
2156 "chA",
2157 "Cha",
2158 "ChA",
2159 "CHa",
2160 "CHA",
2161 "i",
2162 "I"
2163 };
2164
2165 const static char *upperFirst[] = {
2166 "H",
2167 "h",
2168 "CH",
2169 "Ch",
2170 "ch",
2171 "CHA",
2172 "CHa",
2173 "ChA",
2174 "Cha",
2175 "chA",
2176 "cha",
2177 "I",
2178 "i"
2179 };
2180 log_verbose("mixed case test\n");
2181 log_verbose("lower first, case level off\n");
2182 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2183 log_verbose("upper first, case level off\n");
2184 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2185 log_verbose("lower first, case level on\n");
2186 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2187 log_verbose("upper first, case level on\n");
2188 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2189 }
2190
2191 }
2192
TestIncrementalNormalize(void)2193 static void TestIncrementalNormalize(void) {
2194
2195 /*UChar baseA =0x61;*/
2196 UChar baseA =0x41;
2197 /* UChar baseB = 0x42;*/
2198 static const UChar ccMix[] = {0x316, 0x321, 0x300};
2199 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2200 /*
2201 0x316 is combining grave accent below, cc=220
2202 0x321 is combining palatalized hook below, cc=202
2203 0x300 is combining grave accent, cc=230
2204 */
2205
2206 #define MAXSLEN 2000
2207 /*int maxSLen = 64000;*/
2208 int sLen;
2209 int i;
2210
2211 UCollator *coll;
2212 UErrorCode status = U_ZERO_ERROR;
2213 UCollationResult result;
2214
2215 int32_t myQ = QUICK;
2216
2217 if(QUICK < 0) {
2218 QUICK = 1;
2219 }
2220
2221 {
2222 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2223 /* most buffers along the way.*/
2224 UChar strA[MAXSLEN+1];
2225 UChar strB[MAXSLEN+1];
2226
2227 coll = ucol_open("en_US", &status);
2228 if(status == U_FILE_ACCESS_ERROR) {
2229 log_data_err("Is your data around?\n");
2230 return;
2231 } else if(U_FAILURE(status)) {
2232 log_err("Error opening collator\n");
2233 return;
2234 }
2235 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2236
2237 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2238 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2239 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2240 for (sLen = 500; sLen<501; sLen++) {
2241 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2242 strA[0] = baseA;
2243 strB[0] = baseA;
2244 for (i=1; i<=sLen-1; i++) {
2245 strA[i] = ccMix[i % 3];
2246 strB[sLen-i] = ccMix[i % 3];
2247 }
2248 strA[sLen] = 0;
2249 strB[sLen] = 0;
2250
2251 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
2252 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
2253 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
2254 doTest(coll, strA, strB, UCOL_EQUAL);
2255 }
2256 }
2257
2258 QUICK = myQ;
2259
2260
2261 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2262 /* of the string. Checks a couple of edge cases.*/
2263
2264 {
2265 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2266 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2267 ucol_setStrength(coll, UCOL_TERTIARY);
2268 doTest(coll, strA, strB, UCOL_EQUAL);
2269 }
2270
2271 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2272
2273 {
2274 /* New UCA 3.1.1.
2275 * test below used a code point from Desseret, which sorts differently
2276 * than d800 dc00
2277 */
2278 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2279 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2280 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2281 ucol_setStrength(coll, UCOL_TERTIARY);
2282 doTest(coll, strA, strB, UCOL_GREATER);
2283 }
2284
2285 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2286
2287 {
2288 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2289 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2290 char sortKeyA[50];
2291 char sortKeyAz[50];
2292 char sortKeyB[50];
2293 char sortKeyBz[50];
2294 int r;
2295
2296 /* there used to be -3 here. Hmmmm.... */
2297 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2298 result = ucol_strcoll(coll, strA, 3, strB, 3);
2299 if (result != UCOL_GREATER) {
2300 log_err("ERROR 1 in test 4\n");
2301 }
2302 result = ucol_strcoll(coll, strA, -1, strB, -1);
2303 if (result != UCOL_EQUAL) {
2304 log_err("ERROR 2 in test 4\n");
2305 }
2306
2307 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2308 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2309 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2310 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2311
2312 r = strcmp(sortKeyA, sortKeyAz);
2313 if (r <= 0) {
2314 log_err("Error 3 in test 4\n");
2315 }
2316 r = strcmp(sortKeyA, sortKeyB);
2317 if (r <= 0) {
2318 log_err("Error 4 in test 4\n");
2319 }
2320 r = strcmp(sortKeyAz, sortKeyBz);
2321 if (r != 0) {
2322 log_err("Error 5 in test 4\n");
2323 }
2324
2325 ucol_setStrength(coll, UCOL_IDENTICAL);
2326 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2327 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2328 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2329 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2330
2331 r = strcmp(sortKeyA, sortKeyAz);
2332 if (r <= 0) {
2333 log_err("Error 6 in test 4\n");
2334 }
2335 r = strcmp(sortKeyA, sortKeyB);
2336 if (r <= 0) {
2337 log_err("Error 7 in test 4\n");
2338 }
2339 r = strcmp(sortKeyAz, sortKeyBz);
2340 if (r != 0) {
2341 log_err("Error 8 in test 4\n");
2342 }
2343 ucol_setStrength(coll, UCOL_TERTIARY);
2344 }
2345
2346
2347 /* Test 5: Null characters in non-normal source strings.*/
2348
2349 {
2350 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2351 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2352 char sortKeyA[50];
2353 char sortKeyAz[50];
2354 char sortKeyB[50];
2355 char sortKeyBz[50];
2356 int r;
2357
2358 result = ucol_strcoll(coll, strA, 6, strB, 6);
2359 if (result != UCOL_GREATER) {
2360 log_err("ERROR 1 in test 5\n");
2361 }
2362 result = ucol_strcoll(coll, strA, -1, strB, -1);
2363 if (result != UCOL_EQUAL) {
2364 log_err("ERROR 2 in test 5\n");
2365 }
2366
2367 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2368 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2369 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2370 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2371
2372 r = strcmp(sortKeyA, sortKeyAz);
2373 if (r <= 0) {
2374 log_err("Error 3 in test 5\n");
2375 }
2376 r = strcmp(sortKeyA, sortKeyB);
2377 if (r <= 0) {
2378 log_err("Error 4 in test 5\n");
2379 }
2380 r = strcmp(sortKeyAz, sortKeyBz);
2381 if (r != 0) {
2382 log_err("Error 5 in test 5\n");
2383 }
2384
2385 ucol_setStrength(coll, UCOL_IDENTICAL);
2386 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2387 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2388 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2389 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2390
2391 r = strcmp(sortKeyA, sortKeyAz);
2392 if (r <= 0) {
2393 log_err("Error 6 in test 5\n");
2394 }
2395 r = strcmp(sortKeyA, sortKeyB);
2396 if (r <= 0) {
2397 log_err("Error 7 in test 5\n");
2398 }
2399 r = strcmp(sortKeyAz, sortKeyBz);
2400 if (r != 0) {
2401 log_err("Error 8 in test 5\n");
2402 }
2403 ucol_setStrength(coll, UCOL_TERTIARY);
2404 }
2405
2406
2407 /* Test 6: Null character as base of a non-normal combining sequence.*/
2408
2409 {
2410 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2411 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2412
2413 result = ucol_strcoll(coll, strA, 5, strB, 5);
2414 if (result != UCOL_LESS) {
2415 log_err("Error 1 in test 6\n");
2416 }
2417 result = ucol_strcoll(coll, strA, -1, strB, -1);
2418 if (result != UCOL_EQUAL) {
2419 log_err("Error 2 in test 6\n");
2420 }
2421 }
2422
2423 ucol_close(coll);
2424 }
2425
2426
2427
2428 #if 0
2429 static void TestGetCaseBit(void) {
2430 static const char *caseBitData[] = {
2431 "a", "A", "ch", "Ch", "CH",
2432 "\\uFF9E", "\\u0009"
2433 };
2434
2435 static const uint8_t results[] = {
2436 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2437 UCOL_UPPER_CASE, UCOL_LOWER_CASE
2438 };
2439
2440 uint32_t i, blen = 0;
2441 UChar b[256] = {0};
2442 UErrorCode status = U_ZERO_ERROR;
2443 UCollator *UCA = ucol_open("", &status);
2444 uint8_t res = 0;
2445
2446 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2447 blen = u_unescape(caseBitData[i], b, 256);
2448 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2449 if(results[i] != res) {
2450 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2451 }
2452 }
2453 }
2454 #endif
2455
TestHangulTailoring(void)2456 static void TestHangulTailoring(void) {
2457 static const char *koreanData[] = {
2458 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2459 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2460 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2461 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2462 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2463 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2464 };
2465
2466 const char *rules =
2467 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2468 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2469 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2470 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2471 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2472 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2473
2474
2475 UErrorCode status = U_ZERO_ERROR;
2476 UChar rlz[2048] = { 0 };
2477 uint32_t rlen = u_unescape(rules, rlz, 2048);
2478
2479 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2480 if(status == U_FILE_ACCESS_ERROR) {
2481 log_data_err("Is your data around?\n");
2482 return;
2483 } else if(U_FAILURE(status)) {
2484 log_err("Error opening collator\n");
2485 return;
2486 }
2487
2488 log_verbose("Using start of korean rules\n");
2489
2490 if(U_SUCCESS(status)) {
2491 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2492 } else {
2493 log_err("Unable to open collator with rules %s\n", rules);
2494 }
2495
2496 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2497 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
2498 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2499
2500 ucol_close(coll);
2501
2502 log_verbose("Using ko__LOTUS locale\n");
2503 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2504 }
2505
TestCompressOverlap(void)2506 static void TestCompressOverlap(void) {
2507 UChar secstr[150];
2508 UChar tertstr[150];
2509 UErrorCode status = U_ZERO_ERROR;
2510 UCollator *coll;
2511 char result[200];
2512 uint32_t resultlen;
2513 int count = 0;
2514 char *tempptr;
2515
2516 coll = ucol_open("", &status);
2517
2518 if (U_FAILURE(status)) {
2519 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2520 return;
2521 }
2522 while (count < 149) {
2523 secstr[count] = 0x0020; /* [06, 05, 05] */
2524 tertstr[count] = 0x0020;
2525 count ++;
2526 }
2527
2528 /* top down compression ----------------------------------- */
2529 secstr[count] = 0x0332; /* [, 87, 05] */
2530 tertstr[count] = 0x3000; /* [06, 05, 07] */
2531
2532 /* no compression secstr should have 150 secondary bytes, tertstr should
2533 have 150 tertiary bytes.
2534 with correct overlapping compression, secstr should have 4 secondary
2535 bytes, tertstr should have > 2 tertiary bytes */
2536 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2537 tempptr = uprv_strchr(result, 1) + 1;
2538 while (*(tempptr + 1) != 1) {
2539 /* the last secondary collation element is not checked since it is not
2540 part of the compression */
2541 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2542 log_err("Secondary compression overlapped\n");
2543 }
2544 tempptr ++;
2545 }
2546
2547 /* tertiary top/bottom/common for en_US is similar to the secondary
2548 top/bottom/common */
2549 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2550 tempptr = uprv_strrchr(result, 1) + 1;
2551 while (*(tempptr + 1) != 0) {
2552 /* the last secondary collation element is not checked since it is not
2553 part of the compression */
2554 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2555 log_err("Tertiary compression overlapped\n");
2556 }
2557 tempptr ++;
2558 }
2559
2560 /* bottom up compression ------------------------------------- */
2561 secstr[count] = 0;
2562 tertstr[count] = 0;
2563 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2564 tempptr = uprv_strchr(result, 1) + 1;
2565 while (*(tempptr + 1) != 1) {
2566 /* the last secondary collation element is not checked since it is not
2567 part of the compression */
2568 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2569 log_err("Secondary compression overlapped\n");
2570 }
2571 tempptr ++;
2572 }
2573
2574 /* tertiary top/bottom/common for en_US is similar to the secondary
2575 top/bottom/common */
2576 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2577 tempptr = uprv_strrchr(result, 1) + 1;
2578 while (*(tempptr + 1) != 0) {
2579 /* the last secondary collation element is not checked since it is not
2580 part of the compression */
2581 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2582 log_err("Tertiary compression overlapped\n");
2583 }
2584 tempptr ++;
2585 }
2586
2587 ucol_close(coll);
2588 }
2589
TestCyrillicTailoring(void)2590 static void TestCyrillicTailoring(void) {
2591 static const char *test[] = {
2592 "\\u0410b",
2593 "\\u0410\\u0306a",
2594 "\\u04d0A"
2595 };
2596
2597 /* Russian overrides contractions, so this test is not valid anymore */
2598 /*genericLocaleStarter("ru", test, 3);*/
2599
2600 genericLocaleStarter("root", test, 3);
2601 genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2602 genericRulesStarter("&Z < \\u0410", test, 3);
2603 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2604 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2605 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2606 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2607 }
2608
TestSuppressContractions(void)2609 static void TestSuppressContractions(void) {
2610
2611 static const char *testNoCont2[] = {
2612 "\\u0410\\u0302a",
2613 "\\u0410\\u0306b",
2614 "\\u0410c"
2615 };
2616 static const char *testNoCont[] = {
2617 "a\\u0410",
2618 "A\\u0410\\u0306",
2619 "\\uFF21\\u0410\\u0302"
2620 };
2621
2622 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2623 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2624 }
2625
TestContraction(void)2626 static void TestContraction(void) {
2627 const static char *testrules[] = {
2628 "&A = AB / B",
2629 "&A = A\\u0306/\\u0306",
2630 "&c = ch / h"
2631 };
2632 const static UChar testdata[][2] = {
2633 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2634 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2635 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2636 };
2637 const static UChar testdata2[][2] = {
2638 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2639 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2640 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2641 };
2642 const static char *testrules3[] = {
2643 "&z < xyz &xyzw << B",
2644 "&z < xyz &xyz << B / w",
2645 "&z < ch &achm << B",
2646 "&z < ch &a << B / chm",
2647 "&\\ud800\\udc00w << B",
2648 "&\\ud800\\udc00 << B / w",
2649 "&a\\ud800\\udc00m << B",
2650 "&a << B / \\ud800\\udc00m",
2651 };
2652
2653 UErrorCode status = U_ZERO_ERROR;
2654 UCollator *coll;
2655 UChar rule[256] = {0};
2656 uint32_t rlen = 0;
2657 int i;
2658
2659 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2660 UCollationElements *iter1;
2661 int j = 0;
2662 log_verbose("Rule %s for testing\n", testrules[i]);
2663 rlen = u_unescape(testrules[i], rule, 32);
2664 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2665 if (U_FAILURE(status)) {
2666 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2667 return;
2668 }
2669 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2670 if (U_FAILURE(status)) {
2671 log_err("Collation iterator creation failed\n");
2672 return;
2673 }
2674 while (j < 2) {
2675 UCollationElements *iter2 = ucol_openElements(coll,
2676 &(testdata[i][j]),
2677 1, &status);
2678 uint32_t ce;
2679 if (U_FAILURE(status)) {
2680 log_err("Collation iterator creation failed\n");
2681 return;
2682 }
2683 ce = ucol_next(iter2, &status);
2684 while (ce != UCOL_NULLORDER) {
2685 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2686 log_err("Collation elements in contraction split does not match\n");
2687 return;
2688 }
2689 ce = ucol_next(iter2, &status);
2690 }
2691 j ++;
2692 ucol_closeElements(iter2);
2693 }
2694 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2695 log_err("Collation elements not exhausted\n");
2696 return;
2697 }
2698 ucol_closeElements(iter1);
2699 ucol_close(coll);
2700 }
2701
2702 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2703 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2704 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2705 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2706 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2707 testdata2[1][1]);
2708 return;
2709 }
2710 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2711 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2712 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2713 testdata2[2][1]);
2714 return;
2715 }
2716 ucol_close(coll);
2717
2718 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2719 UCollator *coll1,
2720 *coll2;
2721 UCollationElements *iter1,
2722 *iter2;
2723 UChar ch = 0x0042 /* 'B' */;
2724 uint32_t ce;
2725 rlen = u_unescape(testrules3[i], rule, 32);
2726 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2727 rlen = u_unescape(testrules3[i + 1], rule, 32);
2728 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2729 if (U_FAILURE(status)) {
2730 log_err("Collator creation failed %s\n", testrules[i]);
2731 return;
2732 }
2733 iter1 = ucol_openElements(coll1, &ch, 1, &status);
2734 iter2 = ucol_openElements(coll2, &ch, 1, &status);
2735 if (U_FAILURE(status)) {
2736 log_err("Collation iterator creation failed\n");
2737 return;
2738 }
2739 ce = ucol_next(iter1, &status);
2740 if (U_FAILURE(status)) {
2741 log_err("Retrieving ces failed\n");
2742 return;
2743 }
2744 while (ce != UCOL_NULLORDER) {
2745 if (ce != (uint32_t)ucol_next(iter2, &status)) {
2746 log_err("CEs does not match\n");
2747 return;
2748 }
2749 ce = ucol_next(iter1, &status);
2750 if (U_FAILURE(status)) {
2751 log_err("Retrieving ces failed\n");
2752 return;
2753 }
2754 }
2755 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2756 log_err("CEs not exhausted\n");
2757 return;
2758 }
2759 ucol_closeElements(iter1);
2760 ucol_closeElements(iter2);
2761 ucol_close(coll1);
2762 ucol_close(coll2);
2763 }
2764 }
2765
TestExpansion(void)2766 static void TestExpansion(void) {
2767 const static char *testrules[] = {
2768 "&J << K / B & K << M",
2769 "&J << K / B << M"
2770 };
2771 const static UChar testdata[][3] = {
2772 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2773 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2774 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2775 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2776 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2777 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2778 };
2779
2780 UErrorCode status = U_ZERO_ERROR;
2781 UCollator *coll;
2782 UChar rule[256] = {0};
2783 uint32_t rlen = 0;
2784 int i;
2785
2786 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2787 int j = 0;
2788 log_verbose("Rule %s for testing\n", testrules[i]);
2789 rlen = u_unescape(testrules[i], rule, 32);
2790 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2791 if (U_FAILURE(status)) {
2792 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2793 return;
2794 }
2795
2796 for (j = 0; j < 5; j ++) {
2797 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2798 }
2799 ucol_close(coll);
2800 }
2801 }
2802
2803 #if 0
2804 /* this test tests the current limitations of the engine */
2805 /* it always fail, so it is disabled by default */
2806 static void TestLimitations(void) {
2807 /* recursive expansions */
2808 {
2809 static const char *rule = "&a=b/c&d=c/e";
2810 static const char *tlimit01[] = {"add","b","adf"};
2811 static const char *tlimit02[] = {"aa","b","af"};
2812 log_verbose("recursive expansions\n");
2813 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2814 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2815 }
2816 /* contractions spanning expansions */
2817 {
2818 static const char *rule = "&a<<<c/e&g<<<eh";
2819 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2820 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2821 log_verbose("contractions spanning expansions\n");
2822 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2823 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2824 }
2825 /* normalization: nulls in contractions */
2826 {
2827 static const char *rule = "&a<<<\\u0000\\u0302";
2828 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2829 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2830 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2831 static const UColAttributeValue valOn[] = { UCOL_ON };
2832 static const UColAttributeValue valOff[] = { UCOL_OFF };
2833
2834 log_verbose("NULL in contractions\n");
2835 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2836 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2837 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2838 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2839
2840 }
2841 /* normalization: contractions spanning normalization */
2842 {
2843 static const char *rule = "&a<<<\\u0000\\u0302";
2844 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2845 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2846 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2847 static const UColAttributeValue valOn[] = { UCOL_ON };
2848 static const UColAttributeValue valOff[] = { UCOL_OFF };
2849
2850 log_verbose("contractions spanning normalization\n");
2851 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2852 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2853 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2854 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2855
2856 }
2857 /* variable top: */
2858 {
2859 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2860 static const char *rule = "&\\u2010<x<[variable top]=z";
2861 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2862 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2863 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2864 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2865 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2866 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2867 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2868
2869 log_verbose("variable top\n");
2870 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2871 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2872 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2873 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2874 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2875
2876 }
2877 /* case level */
2878 {
2879 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2880 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2881 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2882 static const UColAttribute att[] = { UCOL_CASE_FIRST};
2883 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2884 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2885 log_verbose("case level\n");
2886 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2887 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2888 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2889 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2890 }
2891
2892 }
2893 #endif
2894
TestBocsuCoverage(void)2895 static void TestBocsuCoverage(void) {
2896 UErrorCode status = U_ZERO_ERROR;
2897 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2898 UChar test[256] = {0};
2899 uint32_t tlen = u_unescape(testString, test, 32);
2900 uint8_t key[256] = {0};
2901 uint32_t klen = 0;
2902
2903 UCollator *coll = ucol_open("", &status);
2904 if(U_SUCCESS(status)) {
2905 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2906
2907 klen = ucol_getSortKey(coll, test, tlen, key, 256);
2908
2909 ucol_close(coll);
2910 } else {
2911 log_data_err("Couldn't open UCA\n");
2912 }
2913 }
2914
TestVariableTopSetting(void)2915 static void TestVariableTopSetting(void) {
2916 UErrorCode status = U_ZERO_ERROR;
2917 const UChar *current = NULL;
2918 uint32_t varTopOriginal = 0, varTop1, varTop2;
2919 UCollator *coll = ucol_open("", &status);
2920 if(U_SUCCESS(status)) {
2921
2922 uint32_t strength = 0;
2923 uint16_t specs = 0;
2924 uint32_t chOffset = 0;
2925 uint32_t chLen = 0;
2926 uint32_t exOffset = 0;
2927 uint32_t exLen = 0;
2928 uint32_t oldChOffset = 0;
2929 uint32_t oldChLen = 0;
2930 uint32_t oldExOffset = 0;
2931 uint32_t oldExLen = 0;
2932 uint32_t prefixOffset = 0;
2933 uint32_t prefixLen = 0;
2934
2935 UBool startOfRules = TRUE;
2936 UColTokenParser src;
2937 UColOptionSet opts;
2938
2939 UChar *rulesCopy = NULL;
2940 uint32_t rulesLen;
2941
2942 UCollationResult result;
2943
2944 UChar first[256] = { 0 };
2945 UChar second[256] = { 0 };
2946 UParseError parseError;
2947 int32_t myQ = QUICK;
2948
2949 src.opts = &opts;
2950
2951 if(QUICK <= 0) {
2952 QUICK = 1;
2953 }
2954
2955 /* this test will fail when normalization is turned on */
2956 /* therefore we always turn off exhaustive mode for it */
2957 { /* QUICK > 0*/
2958 log_verbose("Slide variable top over UCARules\n");
2959 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2960 rulesCopy = (UChar *)malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
2961 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
2962
2963 if(U_SUCCESS(status) && rulesLen > 0) {
2964 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2965 src.current = src.source = rulesCopy;
2966 src.end = rulesCopy+rulesLen;
2967 src.extraCurrent = src.end;
2968 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
2969
2970 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
2971 strength = src.parsedToken.strength;
2972 chOffset = src.parsedToken.charsOffset;
2973 chLen = src.parsedToken.charsLen;
2974 exOffset = src.parsedToken.extensionOffset;
2975 exLen = src.parsedToken.extensionLen;
2976 prefixOffset = src.parsedToken.prefixOffset;
2977 prefixLen = src.parsedToken.prefixLen;
2978 specs = src.parsedToken.flags;
2979
2980 startOfRules = FALSE;
2981 {
2982 log_verbose("%04X %d ", *(rulesCopy+chOffset), chLen);
2983 }
2984 if(strength == UCOL_PRIMARY) {
2985 status = U_ZERO_ERROR;
2986 varTopOriginal = ucol_getVariableTop(coll, &status);
2987 varTop1 = ucol_setVariableTop(coll, rulesCopy+oldChOffset, oldChLen, &status);
2988 if(U_FAILURE(status)) {
2989 char buffer[256];
2990 char *buf = buffer;
2991 uint32_t i = 0, j;
2992 uint32_t CE = UCOL_NO_MORE_CES;
2993
2994 /* before we start screaming, let's see if there is a problem with the rules */
2995 collIterate s;
2996 uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s);
2997
2998 CE = ucol_getNextCE(coll, &s, &status);
2999
3000 for(i = 0; i < oldChLen; i++) {
3001 j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i));
3002 buf += j;
3003 }
3004 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3005 log_verbose("= Expected failure for %s =", buffer);
3006 } else {
3007 if(s.pos == s.endp) {
3008 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3009 oldChOffset, u_errorName(status), buffer);
3010 } else {
3011 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3012 buffer);
3013 }
3014 }
3015 }
3016 varTop2 = ucol_getVariableTop(coll, &status);
3017 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3018 log_err("cannot retrieve set varTop value!\n");
3019 continue;
3020 }
3021
3022 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3023
3024 u_strncpy(first, rulesCopy+oldChOffset, oldChLen);
3025 u_strncpy(first+oldChLen, rulesCopy+chOffset, chLen);
3026 u_strncpy(first+oldChLen+chLen, rulesCopy+oldChOffset, oldChLen);
3027 first[2*oldChLen+chLen] = 0;
3028
3029 if(oldExLen == 0) {
3030 u_strncpy(second, rulesCopy+chOffset, chLen);
3031 second[chLen] = 0;
3032 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3033 u_strncpy(second, rulesCopy+oldExOffset, oldExLen);
3034 u_strncpy(second+oldChLen, rulesCopy+chOffset, chLen);
3035 u_strncpy(second+oldChLen+chLen, rulesCopy+oldExOffset, oldExLen);
3036 second[2*oldExLen+chLen] = 0;
3037 }
3038 result = ucol_strcoll(coll, first, -1, second, -1);
3039 if(result == UCOL_EQUAL) {
3040 doTest(coll, first, second, UCOL_EQUAL);
3041 } else {
3042 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy+oldChOffset), *(rulesCopy+chOffset));
3043 }
3044 }
3045 }
3046 if(strength != UCOL_TOK_RESET) {
3047 oldChOffset = chOffset;
3048 oldChLen = chLen;
3049 oldExOffset = exOffset;
3050 oldExLen = exLen;
3051 }
3052 }
3053 status = U_ZERO_ERROR;
3054 }
3055 else {
3056 log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3057 return;
3058 }
3059 if (U_FAILURE(status)) {
3060 log_err("Error parsing rules %s\n", u_errorName(status));
3061 return;
3062 }
3063 status = U_ZERO_ERROR;
3064 }
3065
3066 QUICK = myQ;
3067
3068 log_verbose("Testing setting variable top to contractions\n");
3069 {
3070 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3071 /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3072 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3073 while(*conts != 0) {
3074 if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */
3075 varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3076 } else {
3077 varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3078 }
3079 if(U_FAILURE(status)) {
3080 log_err("Couldn't set variable top to a contraction %04X %04X %04X\n",
3081 *conts, *(conts+1), *(conts+2));
3082 status = U_ZERO_ERROR;
3083 }
3084 conts+=3;
3085 }
3086
3087 status = U_ZERO_ERROR;
3088
3089 first[0] = 0x0040;
3090 first[1] = 0x0050;
3091 first[2] = 0x0000;
3092
3093 ucol_setVariableTop(coll, first, -1, &status);
3094
3095 if(U_SUCCESS(status)) {
3096 log_err("Invalid contraction succeded in setting variable top!\n");
3097 }
3098
3099 }
3100
3101 log_verbose("Test restoring variable top\n");
3102
3103 status = U_ZERO_ERROR;
3104 ucol_restoreVariableTop(coll, varTopOriginal, &status);
3105 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3106 log_err("Couldn't restore old variable top\n");
3107 }
3108
3109 log_verbose("Testing calling with error set\n");
3110
3111 status = U_INTERNAL_PROGRAM_ERROR;
3112 varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3113 varTop2 = ucol_getVariableTop(coll, &status);
3114 ucol_restoreVariableTop(coll, varTop2, &status);
3115 varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3116 varTop2 = ucol_getVariableTop(NULL, &status);
3117 ucol_restoreVariableTop(NULL, varTop2, &status);
3118 if(status != U_INTERNAL_PROGRAM_ERROR) {
3119 log_err("Bad reaction to passed error!\n");
3120 }
3121 free(rulesCopy);
3122 ucol_close(coll);
3123 } else {
3124 log_data_err("Couldn't open UCA collator\n");
3125 }
3126
3127 }
3128
TestNonChars(void)3129 static void TestNonChars(void) {
3130 static const char *test[] = {
3131 "\\u0000",
3132 "\\uFFFE", "\\uFFFF",
3133 "\\U0001FFFE", "\\U0001FFFF",
3134 "\\U0002FFFE", "\\U0002FFFF",
3135 "\\U0003FFFE", "\\U0003FFFF",
3136 "\\U0004FFFE", "\\U0004FFFF",
3137 "\\U0005FFFE", "\\U0005FFFF",
3138 "\\U0006FFFE", "\\U0006FFFF",
3139 "\\U0007FFFE", "\\U0007FFFF",
3140 "\\U0008FFFE", "\\U0008FFFF",
3141 "\\U0009FFFE", "\\U0009FFFF",
3142 "\\U000AFFFE", "\\U000AFFFF",
3143 "\\U000BFFFE", "\\U000BFFFF",
3144 "\\U000CFFFE", "\\U000CFFFF",
3145 "\\U000DFFFE", "\\U000DFFFF",
3146 "\\U000EFFFE", "\\U000EFFFF",
3147 "\\U000FFFFE", "\\U000FFFFF",
3148 "\\U0010FFFE", "\\U0010FFFF"
3149 };
3150 UErrorCode status = U_ZERO_ERROR;
3151 UCollator *coll = ucol_open("en_US", &status);
3152
3153 log_verbose("Test non characters\n");
3154
3155 if(U_SUCCESS(status)) {
3156 genericOrderingTestWithResult(coll, test, 35, UCOL_EQUAL);
3157 } else {
3158 log_err_status(status, "Unable to open collator\n");
3159 }
3160
3161 ucol_close(coll);
3162 }
3163
TestExtremeCompression(void)3164 static void TestExtremeCompression(void) {
3165 static char *test[4];
3166 int32_t j = 0, i = 0;
3167
3168 for(i = 0; i<4; i++) {
3169 test[i] = (char *)malloc(2048*sizeof(char));
3170 }
3171
3172 for(j = 20; j < 500; j++) {
3173 for(i = 0; i<4; i++) {
3174 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3175 test[i][j-1] = (char)('a'+i);
3176 test[i][j] = 0;
3177 }
3178 genericLocaleStarter("en_US", (const char **)test, 4);
3179 }
3180
3181
3182 for(i = 0; i<4; i++) {
3183 free(test[i]);
3184 }
3185 }
3186
3187 #if 0
3188 static void TestExtremeCompression(void) {
3189 static char *test[4];
3190 int32_t j = 0, i = 0;
3191 UErrorCode status = U_ZERO_ERROR;
3192 UCollator *coll = ucol_open("en_US", status);
3193 for(i = 0; i<4; i++) {
3194 test[i] = (char *)malloc(2048*sizeof(char));
3195 }
3196 for(j = 10; j < 2048; j++) {
3197 for(i = 0; i<4; i++) {
3198 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3199 test[i][j-1] = (char)('a'+i);
3200 test[i][j] = 0;
3201 }
3202 }
3203 genericLocaleStarter("en_US", (const char **)test, 4);
3204
3205 for(j = 10; j < 2048; j++) {
3206 for(i = 0; i<1; i++) {
3207 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3208 test[i][j] = 0;
3209 }
3210 }
3211 for(i = 0; i<4; i++) {
3212 free(test[i]);
3213 }
3214 }
3215 #endif
3216
TestSurrogates(void)3217 static void TestSurrogates(void) {
3218 static const char *test[] = {
3219 "z","\\ud900\\udc25", "\\ud805\\udc50",
3220 "\\ud800\\udc00y", "\\ud800\\udc00r",
3221 "\\ud800\\udc00f", "\\ud800\\udc00",
3222 "\\ud800\\udc00c", "\\ud800\\udc00b",
3223 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3224 "\\ud800\\udc00a",
3225 "c", "b"
3226 };
3227
3228 static const char *rule =
3229 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3230 "< \\ud800\\udc00y < \\ud800\\udc00r"
3231 "< \\ud800\\udc00f << \\ud800\\udc00"
3232 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3233 "< \\ud800\\udc00a < c < b" ;
3234
3235 genericRulesStarter(rule, test, 14);
3236 }
3237
3238 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)3239 static void TestPrefix(void) {
3240 uint32_t i;
3241
3242 static const struct {
3243 const char *rules;
3244 const char *data[50];
3245 const uint32_t len;
3246 } tests[] = {
3247 { "&z <<< z|a",
3248 {"zz", "za"}, 2 },
3249
3250 { "&z <<< z| a",
3251 {"zz", "za"}, 2 },
3252 { "[strength I]"
3253 "&a=\\ud900\\udc25"
3254 "&z<<<\\ud900\\udc25|a",
3255 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3256 };
3257
3258
3259 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3260 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3261 }
3262 }
3263
3264 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3265 /* JIS X 4061 collation order implementation */
TestNewJapanese(void)3266 static void TestNewJapanese(void) {
3267
3268 static const char * const test1[] = {
3269 "\\u30b7\\u30e3\\u30fc\\u30ec",
3270 "\\u30b7\\u30e3\\u30a4",
3271 "\\u30b7\\u30e4\\u30a3",
3272 "\\u30b7\\u30e3\\u30ec",
3273 "\\u3061\\u3087\\u3053",
3274 "\\u3061\\u3088\\u3053",
3275 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3276 "\\u3066\\u30fc\\u305f",
3277 "\\u30c6\\u30fc\\u30bf",
3278 "\\u30c6\\u30a7\\u30bf",
3279 "\\u3066\\u3048\\u305f",
3280 "\\u3067\\u30fc\\u305f",
3281 "\\u30c7\\u30fc\\u30bf",
3282 "\\u30c7\\u30a7\\u30bf",
3283 "\\u3067\\u3048\\u305f",
3284 "\\u3066\\u30fc\\u305f\\u30fc",
3285 "\\u30c6\\u30fc\\u30bf\\u30a1",
3286 "\\u30c6\\u30a7\\u30bf\\u30fc",
3287 "\\u3066\\u3047\\u305f\\u3041",
3288 "\\u3066\\u3048\\u305f\\u30fc",
3289 "\\u3067\\u30fc\\u305f\\u30fc",
3290 "\\u30c7\\u30fc\\u30bf\\u30a1",
3291 "\\u3067\\u30a7\\u305f\\u30a1",
3292 "\\u30c7\\u3047\\u30bf\\u3041",
3293 "\\u30c7\\u30a8\\u30bf\\u30a2",
3294 "\\u3072\\u3086",
3295 "\\u3073\\u3085\\u3042",
3296 "\\u3074\\u3085\\u3042",
3297 "\\u3073\\u3085\\u3042\\u30fc",
3298 "\\u30d3\\u30e5\\u30a2\\u30fc",
3299 "\\u3074\\u3085\\u3042\\u30fc",
3300 "\\u30d4\\u30e5\\u30a2\\u30fc",
3301 "\\u30d2\\u30e5\\u30a6",
3302 "\\u30d2\\u30e6\\u30a6",
3303 "\\u30d4\\u30e5\\u30a6\\u30a2",
3304 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3305 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3306 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3307 "\\u3072\\u3085\\u3093",
3308 "\\u3074\\u3085\\u3093",
3309 "\\u3075\\u30fc\\u308a",
3310 "\\u30d5\\u30fc\\u30ea",
3311 "\\u3075\\u3045\\u308a",
3312 "\\u3075\\u30a5\\u308a",
3313 "\\u3075\\u30a5\\u30ea",
3314 "\\u30d5\\u30a6\\u30ea",
3315 "\\u3076\\u30fc\\u308a",
3316 "\\u30d6\\u30fc\\u30ea",
3317 "\\u3076\\u3045\\u308a",
3318 "\\u30d6\\u30a5\\u308a",
3319 "\\u3077\\u3046\\u308a",
3320 "\\u30d7\\u30a6\\u30ea",
3321 "\\u3075\\u30fc\\u308a\\u30fc",
3322 "\\u30d5\\u30a5\\u30ea\\u30fc",
3323 "\\u3075\\u30a5\\u308a\\u30a3",
3324 "\\u30d5\\u3045\\u308a\\u3043",
3325 "\\u30d5\\u30a6\\u30ea\\u30fc",
3326 "\\u3075\\u3046\\u308a\\u3043",
3327 "\\u30d6\\u30a6\\u30ea\\u30a4",
3328 "\\u3077\\u30fc\\u308a\\u30fc",
3329 "\\u3077\\u30a5\\u308a\\u30a4",
3330 "\\u3077\\u3046\\u308a\\u30fc",
3331 "\\u30d7\\u30a6\\u30ea\\u30a4",
3332 "\\u30d5\\u30fd",
3333 "\\u3075\\u309e",
3334 "\\u3076\\u309d",
3335 "\\u3076\\u3075",
3336 "\\u3076\\u30d5",
3337 "\\u30d6\\u3075",
3338 "\\u30d6\\u30d5",
3339 "\\u3076\\u309e",
3340 "\\u3076\\u3077",
3341 "\\u30d6\\u3077",
3342 "\\u3077\\u309d",
3343 "\\u30d7\\u30fd",
3344 "\\u3077\\u3075",
3345 };
3346
3347 static const char *test2[] = {
3348 "\\u306f\\u309d", /* H\\u309d */
3349 "\\u30cf\\u30fd", /* K\\u30fd */
3350 "\\u306f\\u306f", /* HH */
3351 "\\u306f\\u30cf", /* HK */
3352 "\\u30cf\\u30cf", /* KK */
3353 "\\u306f\\u309e", /* H\\u309e */
3354 "\\u30cf\\u30fe", /* K\\u30fe */
3355 "\\u306f\\u3070", /* HH\\u309b */
3356 "\\u30cf\\u30d0", /* KK\\u309b */
3357 "\\u306f\\u3071", /* HH\\u309c */
3358 "\\u30cf\\u3071", /* KH\\u309c */
3359 "\\u30cf\\u30d1", /* KK\\u309c */
3360 "\\u3070\\u309d", /* H\\u309b\\u309d */
3361 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3362 "\\u3070\\u306f", /* H\\u309bH */
3363 "\\u30d0\\u30cf", /* K\\u309bK */
3364 "\\u3070\\u309e", /* H\\u309b\\u309e */
3365 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3366 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3367 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3368 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3369 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3370 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3371 "\\u3071\\u309d", /* H\\u309c\\u309d */
3372 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3373 "\\u3071\\u306f", /* H\\u309cH */
3374 "\\u30d1\\u30cf", /* K\\u309cK */
3375 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3376 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3377 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3378 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3379 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3380 };
3381 /*
3382 static const char *test3[] = {
3383 "\\u221er\\u221e",
3384 "\\u221eR#",
3385 "\\u221et\\u221e",
3386 "#r\\u221e",
3387 "#R#",
3388 "#t%",
3389 "#T%",
3390 "8t\\u221e",
3391 "8T\\u221e",
3392 "8t#",
3393 "8T#",
3394 "8t%",
3395 "8T%",
3396 "8t8",
3397 "8T8",
3398 "\\u03c9r\\u221e",
3399 "\\u03a9R%",
3400 "rr\\u221e",
3401 "rR\\u221e",
3402 "Rr\\u221e",
3403 "RR\\u221e",
3404 "RT%",
3405 "rt8",
3406 "tr\\u221e",
3407 "tr8",
3408 "TR8",
3409 "tt8",
3410 "\\u30b7\\u30e3\\u30fc\\u30ec",
3411 };
3412 */
3413 static const UColAttribute att[] = { UCOL_STRENGTH };
3414 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3415
3416 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3417 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3418
3419 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3420 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3421 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3422 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3423 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3424 }
3425
TestStrCollIdenticalPrefix(void)3426 static void TestStrCollIdenticalPrefix(void) {
3427 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3428 const char* test[] = {
3429 "ab\\ud9b0\\udc70",
3430 "ab\\ud9b0\\udc71"
3431 };
3432 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3433 }
3434 /* Contractions should have all their canonically equivalent */
3435 /* strings included */
TestContractionClosure(void)3436 static void TestContractionClosure(void) {
3437 static const struct {
3438 const char *rules;
3439 const char *data[10];
3440 const uint32_t len;
3441 } tests[] = {
3442 { "&b=\\u00e4\\u00e4",
3443 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3444 { "&b=\\u00C5",
3445 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3446 };
3447 uint32_t i;
3448
3449
3450 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3451 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3452 }
3453 }
3454
3455 /* This tests also fails*/
TestBeforePrefixFailure(void)3456 static void TestBeforePrefixFailure(void) {
3457 static const struct {
3458 const char *rules;
3459 const char *data[10];
3460 const uint32_t len;
3461 } tests[] = {
3462 { "&g <<< a"
3463 "&[before 3]\\uff41 <<< x",
3464 {"x", "\\uff41"}, 2 },
3465 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3466 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3467 "&[before 3]\\u30a7<<<\\u30a9",
3468 {"\\u30a9", "\\u30a7"}, 2 },
3469 { "&[before 3]\\u30a7<<<\\u30a9"
3470 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3471 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3472 {"\\u30a9", "\\u30a7"}, 2 },
3473 };
3474 uint32_t i;
3475
3476
3477 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3478 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3479 }
3480
3481 #if 0
3482 const char* rule1 =
3483 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3484 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3485 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3486 const char* rule2 =
3487 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3488 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3489 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3490 const char* test[] = {
3491 "\\u30c6\\u30fc\\u30bf",
3492 "\\u30c6\\u30a7\\u30bf",
3493 };
3494 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3495 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3496 /* this piece of code should be in some sort of verbose mode */
3497 /* it gets the collation elements for elements and prints them */
3498 /* This is useful when trying to see whether the problem is */
3499 {
3500 UErrorCode status = U_ZERO_ERROR;
3501 uint32_t i = 0;
3502 UCollationElements *it = NULL;
3503 uint32_t CE;
3504 UChar string[256];
3505 uint32_t uStringLen;
3506 UCollator *coll = NULL;
3507
3508 uStringLen = u_unescape(rule1, string, 256);
3509
3510 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3511
3512 /*coll = ucol_open("ja_JP_JIS", &status);*/
3513 it = ucol_openElements(coll, string, 0, &status);
3514
3515 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3516 log_verbose("%s\n", test[i]);
3517 uStringLen = u_unescape(test[i], string, 256);
3518 ucol_setText(it, string, uStringLen, &status);
3519
3520 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3521 log_verbose("%08X\n", CE);
3522 }
3523 log_verbose("\n");
3524
3525 }
3526
3527 ucol_closeElements(it);
3528 ucol_close(coll);
3529 }
3530 #endif
3531 }
3532
TestPrefixCompose(void)3533 static void TestPrefixCompose(void) {
3534 const char* rule1 =
3535 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3536 /*
3537 const char* test[] = {
3538 "\\u30c6\\u30fc\\u30bf",
3539 "\\u30c6\\u30a7\\u30bf",
3540 };
3541 */
3542 {
3543 UErrorCode status = U_ZERO_ERROR;
3544 /*uint32_t i = 0;*/
3545 /*UCollationElements *it = NULL;*/
3546 /* uint32_t CE;*/
3547 UChar string[256];
3548 uint32_t uStringLen;
3549 UCollator *coll = NULL;
3550
3551 uStringLen = u_unescape(rule1, string, 256);
3552
3553 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3554 ucol_close(coll);
3555 }
3556
3557
3558 }
3559
3560 /*
3561 [last variable] last variable value
3562 [last primary ignorable] largest CE for primary ignorable
3563 [last secondary ignorable] largest CE for secondary ignorable
3564 [last tertiary ignorable] largest CE for tertiary ignorable
3565 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3566 */
3567
TestRuleOptions(void)3568 static void TestRuleOptions(void) {
3569 /* values here are hardcoded and are correct for the current UCA
3570 * when the UCA changes, one might be forced to change these
3571 * values. (\\u02d0, \\U00010FFFC etc...)
3572 */
3573 static const struct {
3574 const char *rules;
3575 const char *data[10];
3576 const uint32_t len;
3577 } tests[] = {
3578 /* - all befores here amount to zero */
3579 { "&[before 3][first tertiary ignorable]<<<a",
3580 { "\\u0000", "a"}, 2
3581 }, /* you cannot go before first tertiary ignorable */
3582
3583 { "&[before 3][last tertiary ignorable]<<<a",
3584 { "\\u0000", "a"}, 2
3585 }, /* you cannot go before last tertiary ignorable */
3586
3587 { "&[before 3][first secondary ignorable]<<<a",
3588 { "\\u0000", "a"}, 2
3589 }, /* you cannot go before first secondary ignorable */
3590
3591 { "&[before 3][last secondary ignorable]<<<a",
3592 { "\\u0000", "a"}, 2
3593 }, /* you cannot go before first secondary ignorable */
3594
3595 /* 'normal' befores */
3596
3597 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3598 { "c", "b", "\\u0332", "a" }, 4
3599 },
3600
3601 /* we don't have a code point that corresponds to
3602 * the last primary ignorable
3603 */
3604 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3605 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3606 },
3607
3608 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3609 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3610 },
3611
3612 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3613 { "c", "b", "\\uD834\\uDF71", "a", "\\u02d0" }, 5
3614 },
3615
3616 { "&[first regular]<a"
3617 "&[before 1][first regular]<b",
3618 { "b", "\\u02d0", "a", "\\u02d1"}, 4
3619 },
3620
3621 { "&[before 1][last regular]<b"
3622 "&[last regular]<a",
3623 { "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4
3624 },
3625
3626 { "&[before 1][first implicit]<b"
3627 "&[first implicit]<a",
3628 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3629 },
3630
3631 { "&[before 1][last implicit]<b"
3632 "&[last implicit]<a",
3633 { "b", "\\U0010FFFD", "a" }, 3
3634 },
3635
3636 { "&[last variable]<z"
3637 "&[last primary ignorable]<x"
3638 "&[last secondary ignorable]<<y"
3639 "&[last tertiary ignorable]<<<w"
3640 "&[top]<u",
3641 {"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
3642 }
3643
3644 };
3645 uint32_t i;
3646
3647
3648 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3649 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3650 }
3651 }
3652
3653
TestOptimize(void)3654 static void TestOptimize(void) {
3655 /* this is not really a test - just trying out
3656 * whether copying of UCA contents will fail
3657 * Cannot really test, since the functionality
3658 * remains the same.
3659 */
3660 static const struct {
3661 const char *rules;
3662 const char *data[10];
3663 const uint32_t len;
3664 } tests[] = {
3665 /* - all befores here amount to zero */
3666 { "[optimize [\\uAC00-\\uD7FF]]",
3667 { "a", "b"}, 2}
3668 };
3669 uint32_t i;
3670
3671 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3672 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3673 }
3674 }
3675
3676 /*
3677 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3678 weiv ucol_strcollIter?
3679 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3680 weiv these are the input strings?
3681 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3682 weiv will check - could be a problem with utf-8 iterator
3683 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3684 weiv hmmm
3685 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3686 weiv that doesn't sound right
3687 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3688 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3689 cycheng@ca.ibm.c... yes
3690 weiv and then do the comparison
3691 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3692 weiv utf-16 strings look like a little endian ones in the example you sent me
3693 weiv It could be a bug - let me try to test it out
3694 cycheng@ca.ibm.c... ok
3695 cycheng@ca.ibm.c... we can wait till the conf. call
3696 cycheng@ca.ibm.c... next weke
3697 weiv that would be great
3698 weiv hmmm
3699 weiv I might be wrong
3700 weiv let me play with it some more
3701 cycheng@ca.ibm.c... ok
3702 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3703 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3704 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3705 weiv ok
3706 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3707 weiv thanks
3708 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3709 */
3710 #if 0
3711 static void Alexis(void) {
3712 UErrorCode status = U_ZERO_ERROR;
3713 UCollator *coll = ucol_open("", &status);
3714
3715
3716 const char utf16be[2][4] = {
3717 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3718 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3719 };
3720
3721 const char utf8[2][4] = {
3722 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3723 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3724 };
3725
3726 UCharIterator iterU161, iterU162;
3727 UCharIterator iterU81, iterU82;
3728
3729 UCollationResult resU16, resU8;
3730
3731 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3732 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3733
3734 uiter_setUTF8(&iterU81, utf8[0], 4);
3735 uiter_setUTF8(&iterU82, utf8[1], 4);
3736
3737 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3738
3739 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3740 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3741
3742
3743 if(resU16 != resU8) {
3744 log_err("different results\n");
3745 }
3746
3747 ucol_close(coll);
3748 }
3749 #endif
3750
3751 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)3752 static void Alexis2(void) {
3753 UErrorCode status = U_ZERO_ERROR;
3754 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3755 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3756 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3757 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3758
3759 UConverter *conv = NULL;
3760
3761 UCharIterator U16BEItS, U16BEItT;
3762 UCharIterator U8ItS, U8ItT;
3763
3764 UCollationResult resU16, resU16BE, resU8;
3765
3766 static const char* const pairs[][2] = {
3767 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3768 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3769 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3770 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3771 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3772 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3773 { "\\u0020", "\\u0020\\u0000"}
3774 /*
3775 5F20 (my result here)
3776 5F204E008E3F
3777 5F20 (your result here)
3778 */
3779 };
3780
3781 int32_t i = 0;
3782
3783 UCollator *coll = ucol_open("", &status);
3784 if(status == U_FILE_ACCESS_ERROR) {
3785 log_data_err("Is your data around?\n");
3786 return;
3787 } else if(U_FAILURE(status)) {
3788 log_err("Error opening collator\n");
3789 return;
3790 }
3791 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3792 conv = ucnv_open("UTF16BE", &status);
3793 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3794 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3795 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3796
3797 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3798
3799 log_verbose("Result of strcoll is %i\n", resU16);
3800
3801 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3802 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3803
3804 /* use the original sizes, as the result from converter is in bytes */
3805 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3806 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3807
3808 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3809
3810 log_verbose("Result of U16BE is %i\n", resU16BE);
3811
3812 if(resU16 != resU16BE) {
3813 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3814 }
3815
3816 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3817 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3818
3819 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3820 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3821
3822 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3823
3824 if(resU16 != resU8) {
3825 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3826 }
3827
3828 }
3829
3830 ucol_close(coll);
3831 ucnv_close(conv);
3832 }
3833
TestHebrewUCA(void)3834 static void TestHebrewUCA(void) {
3835 UErrorCode status = U_ZERO_ERROR;
3836 static const char *first[] = {
3837 "d790d6b8d79cd795d6bcd7a9",
3838 "d790d79cd79ed7a7d799d799d7a1",
3839 "d790d6b4d79ed795d6bcd7a9",
3840 };
3841
3842 char utf8String[3][256];
3843 UChar utf16String[3][256];
3844
3845 int32_t i = 0, j = 0;
3846 int32_t sizeUTF8[3];
3847 int32_t sizeUTF16[3];
3848
3849 UCollator *coll = ucol_open("", &status);
3850 if (U_FAILURE(status)) {
3851 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3852 return;
3853 }
3854 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3855
3856 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3857 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3858 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3859 log_verbose("%i: ");
3860 for(j = 0; j < sizeUTF16[i]; j++) {
3861 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3862 log_verbose("%04X", utf16String[i][j]);
3863 }
3864 log_verbose("\n");
3865 }
3866 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3867 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3868 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3869 }
3870 }
3871
3872 ucol_close(coll);
3873
3874 }
3875
TestPartialSortKeyTermination(void)3876 static void TestPartialSortKeyTermination(void) {
3877 static const char* cases[] = {
3878 "\\u1234\\u1234\\udc00",
3879 "\\udc00\\ud800\\ud800"
3880 };
3881
3882 int32_t i = sizeof(UCollator);
3883
3884 UErrorCode status = U_ZERO_ERROR;
3885
3886 UCollator *coll = ucol_open("", &status);
3887
3888 UCharIterator iter;
3889
3890 UChar currCase[256];
3891 int32_t length = 0;
3892 int32_t pKeyLen = 0;
3893
3894 uint8_t key[256];
3895
3896 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3897 uint32_t state[2] = {0, 0};
3898 length = u_unescape(cases[i], currCase, 256);
3899 uiter_setString(&iter, currCase, length);
3900 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3901
3902 log_verbose("Done\n");
3903
3904 }
3905 ucol_close(coll);
3906 }
3907
TestSettings(void)3908 static void TestSettings(void) {
3909 static const char* cases[] = {
3910 "apple",
3911 "Apple"
3912 };
3913
3914 static const char* locales[] = {
3915 "",
3916 "en"
3917 };
3918
3919 UErrorCode status = U_ZERO_ERROR;
3920
3921 int32_t i = 0, j = 0;
3922
3923 UChar source[256], target[256];
3924 int32_t sLen = 0, tLen = 0;
3925
3926 UCollator *collateObject = NULL;
3927 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
3928 collateObject = ucol_open(locales[i], &status);
3929 ucol_setStrength(collateObject, UCOL_PRIMARY);
3930 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
3931 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
3932 sLen = u_unescape(cases[j-1], source, 256);
3933 source[sLen] = 0;
3934 tLen = u_unescape(cases[j], target, 256);
3935 source[tLen] = 0;
3936 doTest(collateObject, source, target, UCOL_EQUAL);
3937 }
3938 ucol_close(collateObject);
3939 }
3940 }
3941
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)3942 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
3943 UErrorCode status = U_ZERO_ERROR;
3944 int32_t errorNo = 0;
3945 /*const UChar *sourceRules = NULL;*/
3946 /*int32_t sourceRulesLen = 0;*/
3947 UColAttributeValue french = UCOL_OFF;
3948 int32_t cloneSize = 0;
3949
3950 if(!ucol_equals(source, target)) {
3951 log_err("Same collators, different address not equal\n");
3952 errorNo++;
3953 }
3954 ucol_close(target);
3955 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
3956 /* currently, safeClone is implemented through getRules/openRules
3957 * so it is the same as the test below - I will comment that test out.
3958 */
3959 /* real thing */
3960 target = ucol_safeClone(source, NULL, &cloneSize, &status);
3961 if(U_FAILURE(status)) {
3962 log_err("Error creating clone\n");
3963 errorNo++;
3964 return errorNo;
3965 }
3966 if(!ucol_equals(source, target)) {
3967 log_err("Collator different from it's clone\n");
3968 errorNo++;
3969 }
3970 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
3971 if(french == UCOL_ON) {
3972 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
3973 } else {
3974 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
3975 }
3976 if(U_FAILURE(status)) {
3977 log_err("Error setting attributes\n");
3978 errorNo++;
3979 return errorNo;
3980 }
3981 if(ucol_equals(source, target)) {
3982 log_err("Collators same even when options changed\n");
3983 errorNo++;
3984 }
3985 ucol_close(target);
3986 /* commented out since safeClone uses exactly the same technique */
3987 /*
3988 sourceRules = ucol_getRules(source, &sourceRulesLen);
3989 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
3990 if(U_FAILURE(status)) {
3991 log_err("Error instantiating target from rules\n");
3992 errorNo++;
3993 return errorNo;
3994 }
3995 if(!ucol_equals(source, target)) {
3996 log_err("Collator different from collator that was created from the same rules\n");
3997 errorNo++;
3998 }
3999 ucol_close(target);
4000 */
4001 }
4002 return errorNo;
4003 }
4004
4005
TestEquals(void)4006 static void TestEquals(void) {
4007 /* ucol_equals is not currently a public API. There is a chance that it will become
4008 * something like this, but currently it is only used by RuleBasedCollator::operator==
4009 */
4010 /* test whether the two collators instantiated from the same locale are equal */
4011 UErrorCode status = U_ZERO_ERROR;
4012 UParseError parseError;
4013 int32_t noOfLoc = uloc_countAvailable();
4014 const char *locName = NULL;
4015 UCollator *source = NULL, *target = NULL;
4016 int32_t i = 0;
4017
4018 const char* rules[] = {
4019 "&l < lj <<< Lj <<< LJ",
4020 "&n < nj <<< Nj <<< NJ",
4021 "&ae <<< \\u00e4",
4022 "&AE <<< \\u00c4"
4023 };
4024 /*
4025 const char* badRules[] = {
4026 "&l <<< Lj",
4027 "&n < nj <<< nJ <<< NJ",
4028 "&a <<< \\u00e4",
4029 "&AE <<< \\u00c4 <<< x"
4030 };
4031 */
4032
4033 UChar sourceRules[1024], targetRules[1024];
4034 int32_t sourceRulesSize = 0, targetRulesSize = 0;
4035 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4036
4037 for(i = 0; i < rulesSize; i++) {
4038 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4039 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4040 }
4041
4042 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4043 if(status == U_FILE_ACCESS_ERROR) {
4044 log_data_err("Is your data around?\n");
4045 return;
4046 } else if(U_FAILURE(status)) {
4047 log_err("Error opening collator\n");
4048 return;
4049 }
4050 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4051 if(!ucol_equals(source, target)) {
4052 log_err("Equivalent collators not equal!\n");
4053 }
4054 ucol_close(source);
4055 ucol_close(target);
4056
4057 source = ucol_open("root", &status);
4058 target = ucol_open("root", &status);
4059 log_verbose("Testing root\n");
4060 if(!ucol_equals(source, source)) {
4061 log_err("Same collator not equal\n");
4062 }
4063 if(TestEqualsForCollator(locName, source, target)) {
4064 log_err("Errors for root\n", locName);
4065 }
4066 ucol_close(source);
4067
4068 for(i = 0; i<noOfLoc; i++) {
4069 status = U_ZERO_ERROR;
4070 locName = uloc_getAvailable(i);
4071 /*if(hasCollationElements(locName)) {*/
4072 log_verbose("Testing equality for locale %s\n", locName);
4073 source = ucol_open(locName, &status);
4074 target = ucol_open(locName, &status);
4075 if (U_FAILURE(status)) {
4076 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
4077 continue;
4078 }
4079 if(TestEqualsForCollator(locName, source, target)) {
4080 log_err("Errors for locale %s\n", locName);
4081 }
4082 ucol_close(source);
4083 /*}*/
4084 }
4085 }
4086
TestJ2726(void)4087 static void TestJ2726(void) {
4088 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4089 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4090 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4091 UErrorCode status = U_ZERO_ERROR;
4092 UCollator *coll = ucol_open("en", &status);
4093 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4094 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4095 doTest(coll, a, aSpace, UCOL_EQUAL);
4096 doTest(coll, aSpace, a, UCOL_EQUAL);
4097 doTest(coll, a, spaceA, UCOL_EQUAL);
4098 doTest(coll, spaceA, a, UCOL_EQUAL);
4099 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4100 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4101 ucol_close(coll);
4102 }
4103
NullRule(void)4104 static void NullRule(void) {
4105 UChar r[3] = {0};
4106 UErrorCode status = U_ZERO_ERROR;
4107 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4108 if(U_SUCCESS(status)) {
4109 log_err("This should have been an error!\n");
4110 ucol_close(coll);
4111 } else {
4112 status = U_ZERO_ERROR;
4113 }
4114 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4115 if(U_FAILURE(status)) {
4116 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4117 } else {
4118 ucol_close(coll);
4119 }
4120 }
4121
4122 /**
4123 * Test for CollationElementIterator previous and next for the whole set of
4124 * unicode characters with normalization on.
4125 */
TestNumericCollation(void)4126 static void TestNumericCollation(void)
4127 {
4128 UErrorCode status = U_ZERO_ERROR;
4129
4130 const static char *basicTestStrings[]={
4131 "hello1",
4132 "hello2",
4133 "hello2002",
4134 "hello2003",
4135 "hello123456",
4136 "hello1234567",
4137 "hello10000000",
4138 "hello100000000",
4139 "hello1000000000",
4140 "hello10000000000",
4141 };
4142
4143 const static char *preZeroTestStrings[]={
4144 "avery10000",
4145 "avery010000",
4146 "avery0010000",
4147 "avery00010000",
4148 "avery000010000",
4149 "avery0000010000",
4150 "avery00000010000",
4151 "avery000000010000",
4152 };
4153
4154 const static char *thirtyTwoBitNumericStrings[]={
4155 "avery42949672960",
4156 "avery42949672961",
4157 "avery42949672962",
4158 "avery429496729610"
4159 };
4160
4161 const static char *longNumericStrings[]={
4162 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4163 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4164 are treated as multiple collation elements. */
4165 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4166 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4167 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4168 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4169 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4170 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4171 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4172 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4173 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4174 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4175 };
4176
4177 const static char *supplementaryDigits[] = {
4178 "\\uD835\\uDFCE", /* 0 */
4179 "\\uD835\\uDFCF", /* 1 */
4180 "\\uD835\\uDFD0", /* 2 */
4181 "\\uD835\\uDFD1", /* 3 */
4182 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4183 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4184 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4185 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4186 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4187 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4188 };
4189
4190 const static char *foreignDigits[] = {
4191 "\\u0661",
4192 "\\u0662",
4193 "\\u0663",
4194 "\\u0661\\u0660",
4195 "\\u0661\\u0662",
4196 "\\u0661\\u0663",
4197 "\\u0662\\u0660",
4198 "\\u0662\\u0662",
4199 "\\u0662\\u0663",
4200 "\\u0663\\u0660",
4201 "\\u0663\\u0662",
4202 "\\u0663\\u0663"
4203 };
4204
4205 const static char *evenZeroes[] = {
4206 "2000",
4207 "2001",
4208 "2002",
4209 "2003"
4210 };
4211
4212 UColAttribute att = UCOL_NUMERIC_COLLATION;
4213 UColAttributeValue val = UCOL_ON;
4214
4215 /* Open our collator. */
4216 UCollator* coll = ucol_open("root", &status);
4217 if (U_FAILURE(status)){
4218 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4219 myErrorName(status));
4220 return;
4221 }
4222 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4223 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4224 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4225 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4226 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4227 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4228
4229 /* Setting up our collator to do digits. */
4230 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4231 if (U_FAILURE(status)){
4232 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4233 myErrorName(status));
4234 return;
4235 }
4236
4237 /*
4238 Testing that prepended zeroes still yield the correct collation behavior.
4239 We expect that every element in our strings array will be equal.
4240 */
4241 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4242
4243 ucol_close(coll);
4244 }
4245
TestTibetanConformance(void)4246 static void TestTibetanConformance(void)
4247 {
4248 const char* test[] = {
4249 "\\u0FB2\\u0591\\u0F71\\u0061",
4250 "\\u0FB2\\u0F71\\u0061"
4251 };
4252
4253 UErrorCode status = U_ZERO_ERROR;
4254 UCollator *coll = ucol_open("", &status);
4255 UChar source[100];
4256 UChar target[100];
4257 int result;
4258 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4259 if (U_SUCCESS(status)) {
4260 u_unescape(test[0], source, 100);
4261 u_unescape(test[1], target, 100);
4262 doTest(coll, source, target, UCOL_EQUAL);
4263 result = ucol_strcoll(coll, source, -1, target, -1);
4264 log_verbose("result %d\n", result);
4265 if (UCOL_EQUAL != result) {
4266 log_err("Tibetan comparison error\n");
4267 }
4268 }
4269 ucol_close(coll);
4270
4271 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4272 }
4273
TestPinyinProblem(void)4274 static void TestPinyinProblem(void) {
4275 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4276 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4277 }
4278
4279 #define TST_UCOL_MAX_INPUT 0x220001
4280 #define topByte 0xFF000000;
4281 #define bottomByte 0xFF;
4282 #define fourBytes 0xFFFFFFFF;
4283
4284
showImplicit(UChar32 i)4285 static void showImplicit(UChar32 i) {
4286 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4287 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4288 }
4289 }
4290
TestImplicitGeneration(void)4291 static void TestImplicitGeneration(void) {
4292 UErrorCode status = U_ZERO_ERROR;
4293 UChar32 last = 0;
4294 UChar32 current;
4295 UChar32 i = 0, j = 0;
4296 UChar32 roundtrip = 0;
4297 UChar32 lastBottom = 0;
4298 UChar32 currentBottom = 0;
4299 UChar32 lastTop = 0;
4300 UChar32 currentTop = 0;
4301
4302 UCollator *coll = ucol_open("root", &status);
4303 if(U_FAILURE(status)) {
4304 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4305 return;
4306 }
4307
4308 uprv_uca_getRawFromImplicit(0xE20303E7);
4309
4310 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4311 current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4312
4313 /* check that it round-trips AND that all intervening ones are illegal*/
4314 roundtrip = uprv_uca_getRawFromImplicit(current);
4315 if (roundtrip != i) {
4316 log_err("No roundtrip %08X\n", i);
4317 }
4318 if (last != 0) {
4319 for (j = last + 1; j < current; ++j) {
4320 roundtrip = uprv_uca_getRawFromImplicit(j);
4321 /* raise an error if it *doesn't* find an error*/
4322 if (roundtrip != -1) {
4323 log_err("Fails to recognize illegal %08X\n", j);
4324 }
4325 }
4326 }
4327 /* now do other consistency checks*/
4328 lastBottom = last & bottomByte;
4329 currentBottom = current & bottomByte;
4330 lastTop = last & topByte;
4331 currentTop = current & topByte;
4332
4333 /* print out some values for spot-checking*/
4334 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4335 showImplicit(i-3);
4336 showImplicit(i-2);
4337 showImplicit(i-1);
4338 showImplicit(i);
4339 showImplicit(i+1);
4340 showImplicit(i+2);
4341 }
4342 last = current;
4343
4344 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4345 log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4346 }
4347 }
4348 showImplicit(TST_UCOL_MAX_INPUT-2);
4349 showImplicit(TST_UCOL_MAX_INPUT-1);
4350 showImplicit(TST_UCOL_MAX_INPUT);
4351 ucol_close(coll);
4352 }
4353
4354 /**
4355 * Iterate through the given iterator, checking to see that all the strings
4356 * in the expected array are present.
4357 * @param expected array of strings we expect to see, or NULL
4358 * @param expectedCount number of elements of expected, or 0
4359 */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)4360 static int32_t checkUEnumeration(const char* msg,
4361 UEnumeration* iter,
4362 const char** expected,
4363 int32_t expectedCount) {
4364 UErrorCode ec = U_ZERO_ERROR;
4365 int32_t i = 0, n, j, bit;
4366 int32_t seenMask = 0;
4367
4368 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4369 n = uenum_count(iter, &ec);
4370 if (!assertSuccess("count", &ec)) return -1;
4371 log_verbose("%s = [", msg);
4372 for (;; ++i) {
4373 const char* s = uenum_next(iter, NULL, &ec);
4374 if (!assertSuccess("snext", &ec) || s == NULL) break;
4375 if (i != 0) log_verbose(",");
4376 log_verbose("%s", s);
4377 /* check expected list */
4378 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4379 if ((seenMask&bit) == 0 &&
4380 uprv_strcmp(s, expected[j]) == 0) {
4381 seenMask |= bit;
4382 break;
4383 }
4384 }
4385 }
4386 log_verbose("] (%d)\n", i);
4387 assertTrue("count verified", i==n);
4388 /* did we see all expected strings? */
4389 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4390 if ((seenMask&bit)!=0) {
4391 log_verbose("Ok: \"%s\" seen\n", expected[j]);
4392 } else {
4393 log_err("FAIL: \"%s\" not seen\n", expected[j]);
4394 }
4395 }
4396 return n;
4397 }
4398
4399 /**
4400 * Test new API added for separate collation tree.
4401 */
TestSeparateTrees(void)4402 static void TestSeparateTrees(void) {
4403 UErrorCode ec = U_ZERO_ERROR;
4404 UEnumeration *e = NULL;
4405 int32_t n = -1;
4406 UBool isAvailable;
4407 char loc[256];
4408
4409 static const char* AVAIL[] = { "en", "de" };
4410
4411 static const char* KW[] = { "collation" };
4412
4413 static const char* KWVAL[] = { "phonebook", "stroke" };
4414
4415 #if !UCONFIG_NO_SERVICE
4416 e = ucol_openAvailableLocales(&ec);
4417 if (e != NULL) {
4418 assertSuccess("ucol_openAvailableLocales", &ec);
4419 assertTrue("ucol_openAvailableLocales!=0", e!=0);
4420 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4421 /* Don't need to check n because we check list */
4422 uenum_close(e);
4423 } else {
4424 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4425 }
4426 #endif
4427
4428 e = ucol_getKeywords(&ec);
4429 if (e != NULL) {
4430 assertSuccess("ucol_getKeywords", &ec);
4431 assertTrue("ucol_getKeywords!=0", e!=0);
4432 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4433 /* Don't need to check n because we check list */
4434 uenum_close(e);
4435 } else {
4436 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4437 }
4438
4439 e = ucol_getKeywordValues(KW[0], &ec);
4440 if (e != NULL) {
4441 assertSuccess("ucol_getKeywordValues", &ec);
4442 assertTrue("ucol_getKeywordValues!=0", e!=0);
4443 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4444 /* Don't need to check n because we check list */
4445 uenum_close(e);
4446 } else {
4447 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4448 }
4449
4450 /* Try setting a warning before calling ucol_getKeywordValues */
4451 ec = U_USING_FALLBACK_WARNING;
4452 e = ucol_getKeywordValues(KW[0], &ec);
4453 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4454 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4455 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4456 /* Don't need to check n because we check list */
4457 uenum_close(e);
4458 }
4459
4460 /*
4461 U_DRAFT int32_t U_EXPORT2
4462 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4463 const char* locale, UBool* isAvailable,
4464 UErrorCode* status);
4465 }
4466 */
4467 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr",
4468 &isAvailable, &ec);
4469 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4470 assertEquals("getFunctionalEquivalent(fr)", "fr", loc);
4471 assertTrue("getFunctionalEquivalent(fr).isAvailable==TRUE",
4472 isAvailable == TRUE);
4473 }
4474
4475 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr_FR",
4476 &isAvailable, &ec);
4477 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4478 assertEquals("getFunctionalEquivalent(fr_FR)", "fr", loc);
4479 assertTrue("getFunctionalEquivalent(fr_FR).isAvailable==TRUE",
4480 isAvailable == TRUE);
4481 }
4482 }
4483
4484 /* supercedes TestJ784 */
TestBeforePinyin(void)4485 static void TestBeforePinyin(void) {
4486 const static char rules[] = {
4487 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4488 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4489 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4490 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4491 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4492 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4493 };
4494
4495 const static char *test[] = {
4496 "l\\u0101",
4497 "la",
4498 "l\\u0101n",
4499 "lan ",
4500 "l\\u0113",
4501 "le",
4502 "l\\u0113n",
4503 "len"
4504 };
4505
4506 const static char *test2[] = {
4507 "x\\u0101",
4508 "x\\u0100",
4509 "X\\u0101",
4510 "X\\u0100",
4511 "x\\u00E1",
4512 "x\\u00C1",
4513 "X\\u00E1",
4514 "X\\u00C1",
4515 "x\\u01CE",
4516 "x\\u01CD",
4517 "X\\u01CE",
4518 "X\\u01CD",
4519 "x\\u00E0",
4520 "x\\u00C0",
4521 "X\\u00E0",
4522 "X\\u00C0",
4523 "xa",
4524 "xA",
4525 "Xa",
4526 "XA",
4527 "x\\u0101x",
4528 "x\\u0100x",
4529 "x\\u00E1x",
4530 "x\\u00C1x",
4531 "x\\u01CEx",
4532 "x\\u01CDx",
4533 "x\\u00E0x",
4534 "x\\u00C0x",
4535 "xax",
4536 "xAx"
4537 };
4538
4539 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4540 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4541 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4542 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4543 }
4544
TestBeforeTightening(void)4545 static void TestBeforeTightening(void) {
4546 static const struct {
4547 const char *rules;
4548 UErrorCode expectedStatus;
4549 } tests[] = {
4550 { "&[before 1]a<x", U_ZERO_ERROR },
4551 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4552 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4553 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4554 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4555 { "&[before 2]a<<x",U_ZERO_ERROR },
4556 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4557 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4558 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
4559 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
4560 { "&[before 3]a<<<x",U_ZERO_ERROR },
4561 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
4562 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4563 };
4564
4565 int32_t i = 0;
4566
4567 UErrorCode status = U_ZERO_ERROR;
4568 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4569 uint32_t rlen = 0;
4570
4571 UCollator *coll = NULL;
4572
4573
4574 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4575 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4576 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4577 if(status != tests[i].expectedStatus) {
4578 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4579 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4580 }
4581 ucol_close(coll);
4582 status = U_ZERO_ERROR;
4583 }
4584
4585 }
4586
4587 #if 0
4588 &m < a
4589 &[before 1] a < x <<< X << q <<< Q < z
4590 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4591
4592 &m < a
4593 &[before 2] a << x <<< X << q <<< Q < z
4594 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4595
4596 &m < a
4597 &[before 3] a <<< x <<< X << q <<< Q < z
4598 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4599
4600
4601 &m << a
4602 &[before 1] a < x <<< X << q <<< Q < z
4603 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4604
4605 &m << a
4606 &[before 2] a << x <<< X << q <<< Q < z
4607 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4608
4609 &m << a
4610 &[before 3] a <<< x <<< X << q <<< Q < z
4611 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4612
4613
4614 &m <<< a
4615 &[before 1] a < x <<< X << q <<< Q < z
4616 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4617
4618 &m <<< a
4619 &[before 2] a << x <<< X << q <<< Q < z
4620 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
4621
4622 &m <<< a
4623 &[before 3] a <<< x <<< X << q <<< Q < z
4624 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
4625
4626
4627 &[before 1] s < x <<< X << q <<< Q < z
4628 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4629
4630 &[before 2] s << x <<< X << q <<< Q < z
4631 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4632
4633 &[before 3] s <<< x <<< X << q <<< Q < z
4634 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4635
4636
4637 &[before 1] \u24DC < x <<< X << q <<< Q < z
4638 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4639
4640 &[before 2] \u24DC << x <<< X << q <<< Q < z
4641 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4642
4643 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4644 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
4645 #endif
4646
4647
4648 #if 0
4649 /* requires features not yet supported */
4650 static void TestMoreBefore(void) {
4651 static const struct {
4652 const char* rules;
4653 const char* order[16];
4654 int32_t size;
4655 } tests[] = {
4656 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4657 { "m","M","x","X","q","Q","z","a","n" }, 9},
4658 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4659 { "m","M","x","X","q","Q","a","z","n" }, 9},
4660 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4661 { "m","M","x","X","a","q","Q","z","n" }, 9},
4662 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4663 { "x","X","q","Q","z","m","M","a","n" }, 9},
4664 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4665 { "m","M","x","X","q","Q","a","z","n" }, 9},
4666 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4667 { "m","M","x","X","a","q","Q","z","n" }, 9},
4668 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4669 { "x","X","q","Q","z","n","m","a","M" }, 9},
4670 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4671 { "x","X","q","Q","m","a","M","z","n" }, 9},
4672 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4673 { "m","x","X","a","M","q","Q","z","n" }, 9},
4674 { "&[before 1] s < x <<< X << q <<< Q < z",
4675 { "r","R","x","X","q","Q","z","s","n" }, 9},
4676 { "&[before 2] s << x <<< X << q <<< Q < z",
4677 { "r","R","x","X","q","Q","s","z","n" }, 9},
4678 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4679 { "r","R","x","X","s","q","Q","z","n" }, 9},
4680 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4681 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4682 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4683 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4684 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4685 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4686 };
4687
4688 int32_t i = 0;
4689
4690 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4691 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4692 }
4693 }
4694 #endif
4695
TestTailorNULL(void)4696 static void TestTailorNULL( void ) {
4697 const static char* rule = "&a <<< '\\u0000'";
4698 UErrorCode status = U_ZERO_ERROR;
4699 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4700 uint32_t rlen = 0;
4701 UChar a = 1, null = 0;
4702 UCollationResult res = UCOL_EQUAL;
4703
4704 UCollator *coll = NULL;
4705
4706
4707 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4708 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4709
4710 if(U_FAILURE(status)) {
4711 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4712 } else {
4713 res = ucol_strcoll(coll, &a, 1, &null, 1);
4714
4715 if(res != UCOL_LESS) {
4716 log_err("NULL was not tailored properly!\n");
4717 }
4718 }
4719
4720 ucol_close(coll);
4721 }
4722
4723 static void
TestThaiSortKey(void)4724 TestThaiSortKey(void)
4725 {
4726 UChar yamakan = 0x0E4E;
4727 UErrorCode status = U_ZERO_ERROR;
4728 uint8_t key[256];
4729 int32_t keyLen = 0;
4730 /* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */
4731 /* since it stays in the same relative position. This should be addressed in CLDR */
4732 /* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */
4733 /* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */
4734 /* UCA 5.0 uint8_t expectedKey[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 }; */
4735 /* UCA 5.1 moves Yammakan */
4736 uint8_t expectedKey[256] = { 0x01, 0xe0, 0x4e, 0x01, 0x05, 0x00 };
4737 UCollator *coll = ucol_open("th", &status);
4738 if(U_FAILURE(status)) {
4739 log_err_status(status, "Could not open a collator, exiting (%s)\n", u_errorName(status));
4740 return;
4741 }
4742
4743 keyLen = ucol_getSortKey(coll, &yamakan, 1, key, 256);
4744 if(strcmp((char *)key, (char *)expectedKey)) {
4745 log_err("Yammakan key is different from ICU 4.0!\n");
4746 }
4747
4748 ucol_close(coll);
4749 }
4750
4751 static void
TestUpperFirstQuaternary(void)4752 TestUpperFirstQuaternary(void)
4753 {
4754 const char* tests[] = { "B", "b", "Bb", "bB" };
4755 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4756 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4757 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4758 }
4759
4760 static void
TestJ4960(void)4761 TestJ4960(void)
4762 {
4763 const char* tests[] = { "\\u00e2T", "aT" };
4764 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4765 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4766 const char* tests2[] = { "a", "A" };
4767 const char* rule = "&[first tertiary ignorable]=A=a";
4768 UColAttribute att2[] = { UCOL_CASE_LEVEL };
4769 UColAttributeValue attVals2[] = { UCOL_ON };
4770 /* Test whether we correctly ignore primary ignorables on case level when */
4771 /* we have only primary & case level */
4772 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4773 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4774 /* and case level */
4775 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4776 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4777 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4778 }
4779
4780 static void
TestJ5223(void)4781 TestJ5223(void)
4782 {
4783 static const char *test = "this is a test string";
4784 UChar ustr[256];
4785 int32_t ustr_length = u_unescape(test, ustr, 256);
4786 unsigned char sortkey[256];
4787 int32_t sortkey_length;
4788 UErrorCode status = U_ZERO_ERROR;
4789 static UCollator *coll = NULL;
4790 coll = ucol_open("root", &status);
4791 if(U_FAILURE(status)) {
4792 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4793 return;
4794 }
4795 ucol_setStrength(coll, UCOL_PRIMARY);
4796 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4797 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4798 if (U_FAILURE(status)) {
4799 log_err("Failed setting atributes\n");
4800 return;
4801 }
4802 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4803 if (sortkey_length > 256) return;
4804
4805 /* we mark the position where the null byte should be written in advance */
4806 sortkey[sortkey_length-1] = 0xAA;
4807
4808 /* we set the buffer size one byte higher than needed */
4809 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4810 sortkey_length+1);
4811
4812 /* no error occurs (for me) */
4813 if (sortkey[sortkey_length-1] == 0xAA) {
4814 log_err("Hit bug at first try\n");
4815 }
4816
4817 /* we mark the position where the null byte should be written again */
4818 sortkey[sortkey_length-1] = 0xAA;
4819
4820 /* this time we set the buffer size to the exact amount needed */
4821 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4822 sortkey_length);
4823
4824 /* now the trailing null byte is not written */
4825 if (sortkey[sortkey_length-1] == 0xAA) {
4826 log_err("Hit bug at second try\n");
4827 }
4828
4829 ucol_close(coll);
4830 }
4831
4832 /* Regression test for Thai partial sort key problem */
4833 static void
TestJ5232(void)4834 TestJ5232(void)
4835 {
4836 const static char *test[] = {
4837 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4838 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4839 };
4840
4841 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4842 }
4843
4844 static void
TestJ5367(void)4845 TestJ5367(void)
4846 {
4847 const static char *test[] = { "a", "y" };
4848 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4849 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4850 }
4851
4852 static void
TestVI5913(void)4853 TestVI5913(void)
4854 {
4855 UErrorCode status = U_ZERO_ERROR;
4856 int32_t i, j;
4857 UCollator *coll =NULL;
4858 uint8_t resColl[100], expColl[100];
4859 int32_t rLen, tLen, ruleLen, sLen, kLen;
4860 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
4861 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4862 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
4863 static const UChar tData[][20]={
4864 {0x1EAC, 0},
4865 {0x0041, 0x0323, 0x0302, 0},
4866 {0x1EA0, 0x0302, 0},
4867 {0x00C2, 0x0323, 0},
4868 {0x1ED8, 0}, /* O with dot and circumflex */
4869 {0x1ECC, 0x0302, 0},
4870 {0x1EB7, 0},
4871 {0x1EA1, 0x0306, 0},
4872 };
4873 static const UChar tailorData[][20]={
4874 {0x1FA2, 0}, /* Omega with 3 combining marks */
4875 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4876 {0x1FF3, 0x0313, 0x0300, 0},
4877 {0x1F60, 0x0300, 0x0345, 0},
4878 {0x1F62, 0x0345, 0},
4879 {0x1FA0, 0x0300, 0},
4880 };
4881 static const UChar tailorData2[][20]={
4882 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
4883 {0x0073, 0x0323, 0x030C, 0},
4884 {0x0073, 0x030C, 0x0323, 0},
4885 };
4886 static const UChar tailorData3[][20]={
4887 {0x007a, 0}, /* z */
4888 {0x0061, 0x0065, 0}, /* a + e */
4889 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4890 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
4891 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4892 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
4893 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
4894 {0x00EA, 0}, /* e with circumflex */
4895 };
4896
4897 /* Test Vietnamese sort. */
4898 coll = ucol_open("vi", &status);
4899 if(U_FAILURE(status)) {
4900 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4901 return;
4902 }
4903 log_verbose("\n\nVI collation:");
4904 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4905 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4906 }
4907 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4908 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4909 }
4910 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4911 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4912 }
4913 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4914 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4915 }
4916
4917 for (j=0; j<8; j++) {
4918 tLen = u_strlen(tData[j]);
4919 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
4920 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4921 for(i = 0; i<rLen; i++) {
4922 log_verbose(" %02X", resColl[i]);
4923 }
4924 }
4925
4926 ucol_close(coll);
4927
4928 /* Test Romanian sort. */
4929 coll = ucol_open("ro", &status);
4930 log_verbose("\n\nRO collation:");
4931 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4932 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4933 }
4934 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4935 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4936 }
4937 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
4938 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4939 }
4940
4941 for (j=4; j<8; j++) {
4942 tLen = u_strlen(tData[j]);
4943 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
4944 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4945 for(i = 0; i<rLen; i++) {
4946 log_verbose(" %02X", resColl[i]);
4947 }
4948 }
4949 ucol_close(coll);
4950
4951 /* Test the precomposed Greek character with 3 combining marks. */
4952 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
4953 ruleLen = u_strlen(rule);
4954 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4955 if (U_FAILURE(status)) {
4956 log_err("ucol_openRules failed with %s\n", u_errorName(status));
4957 return;
4958 }
4959 sLen = u_strlen(tailorData[0]);
4960 for (j=1; j<6; j++) {
4961 tLen = u_strlen(tailorData[j]);
4962 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
4963 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
4964 }
4965 }
4966 /* Test getSortKey. */
4967 tLen = u_strlen(tailorData[0]);
4968 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
4969 for (j=0; j<6; j++) {
4970 tLen = u_strlen(tailorData[j]);
4971 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
4972 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4973 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
4974 for(i = 0; i<rLen; i++) {
4975 log_err(" %02X", resColl[i]);
4976 }
4977 }
4978 }
4979 ucol_close(coll);
4980
4981 log_verbose("\n\nTailoring test for s with caron:");
4982 ruleLen = u_strlen(rule2);
4983 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
4984 tLen = u_strlen(tailorData2[0]);
4985 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
4986 for (j=1; j<3; j++) {
4987 tLen = u_strlen(tailorData2[j]);
4988 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
4989 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
4990 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
4991 for(i = 0; i<rLen; i++) {
4992 log_err(" %02X", resColl[i]);
4993 }
4994 }
4995 }
4996 ucol_close(coll);
4997
4998 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
4999 ruleLen = u_strlen(rule3);
5000 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5001 tLen = u_strlen(tailorData3[3]);
5002 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5003 for (j=4; j<6; j++) {
5004 tLen = u_strlen(tailorData3[j]);
5005 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5006
5007 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5008 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5009 for(i = 0; i<rLen; i++) {
5010 log_err(" %02X", resColl[i]);
5011 }
5012 }
5013
5014 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5015 for(i = 0; i<rLen; i++) {
5016 log_verbose(" %02X", resColl[i]);
5017 }
5018 }
5019 ucol_close(coll);
5020 }
5021
5022 static void
TestTailor6179(void)5023 TestTailor6179(void)
5024 {
5025 UErrorCode status = U_ZERO_ERROR;
5026 int32_t i;
5027 UCollator *coll =NULL;
5028 uint8_t resColl[100];
5029 int32_t rLen, tLen, ruleLen;
5030 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
5031 UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5032 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5033 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5034 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5035 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5036 UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5037 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5038 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5039 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5040 0x3C,0x3C,0x20,0x62,0};
5041
5042 UChar tData1[][20]={
5043 {0x61, 0},
5044 {0x62, 0},
5045 { 0xFDD0,0x009E, 0}
5046 };
5047 UChar tData2[][20]={
5048 {0x61, 0},
5049 {0x62, 0},
5050 { 0xFDD0,0x009E, 0}
5051 };
5052
5053 /* UCA5.1, the value may increase in later version. */
5054 uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};
5055 uint8_t lastPrimaryIgnCE[6]={1, 0xE7, 0xB9, 1, 5, 0};
5056 uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5057 uint8_t lastSecondaryIgnCE[6]={1, 1, 0x05, 0};
5058
5059 /* Test [Last Primary ignorable] */
5060
5061 log_verbose("\n\nTailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b ");
5062 ruleLen = u_strlen(rule1);
5063 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5064 if (U_FAILURE(status)) {
5065 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5066 return;
5067 }
5068 tLen = u_strlen(tData1[0]);
5069 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5070 if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) {
5071 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
5072 for(i = 0; i<rLen; i++) {
5073 log_err(" %02X", resColl[i]);
5074 }
5075 }
5076 tLen = u_strlen(tData1[1]);
5077 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5078 if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) {
5079 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
5080 for(i = 0; i<rLen; i++) {
5081 log_err(" %02X", resColl[i]);
5082 }
5083 }
5084 ucol_close(coll);
5085
5086
5087 /* Test [Last Secondary ignorable] */
5088 log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b ");
5089 ruleLen = u_strlen(rule1);
5090 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5091 if (U_FAILURE(status)) {
5092 log_err("Tailoring test: &[last primary ignorable] failed!");
5093 return;
5094 }
5095 tLen = u_strlen(tData2[0]);
5096 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5097 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
5098 for(i = 0; i<rLen; i++) {
5099 log_verbose(" %02X", resColl[i]);
5100 }
5101 if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) {
5102 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
5103 for(i = 0; i<rLen; i++) {
5104 log_err(" %02X", resColl[i]);
5105 }
5106 }
5107 tLen = u_strlen(tData2[1]);
5108 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5109 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
5110 for(i = 0; i<rLen; i++) {
5111 log_verbose(" %02X", resColl[i]);
5112 }
5113 if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) {
5114 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
5115 for(i = 0; i<rLen; i++) {
5116 log_err(" %02X", resColl[i]);
5117 }
5118 }
5119 ucol_close(coll);
5120 }
5121
5122 static void
TestUCAPrecontext(void)5123 TestUCAPrecontext(void)
5124 {
5125 UErrorCode status = U_ZERO_ERROR;
5126 int32_t i, j;
5127 UCollator *coll =NULL;
5128 uint8_t resColl[100], prevColl[100];
5129 int32_t rLen, tLen, ruleLen;
5130 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5131 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5132 /* & l middle-dot << a a is an expansion. */
5133
5134 UChar tData1[][20]={
5135 { 0xb7, 0}, /* standalone middle dot(0xb7) */
5136 { 0x387, 0}, /* standalone middle dot(0x387) */
5137 { 0x61, 0}, /* a */
5138 { 0x6C, 0}, /* l */
5139 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
5140 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
5141 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5142 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
5143 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5144 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
5145 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
5146 };
5147
5148 log_verbose("\n\nEN collation:");
5149 coll = ucol_open("en", &status);
5150 if (U_FAILURE(status)) {
5151 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5152 return;
5153 }
5154 for (j=0; j<11; j++) {
5155 tLen = u_strlen(tData1[j]);
5156 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5157 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5158 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5159 j, tData1[j]);
5160 }
5161 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5162 for(i = 0; i<rLen; i++) {
5163 log_verbose(" %02X", resColl[i]);
5164 }
5165 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5166 }
5167 ucol_close(coll);
5168
5169
5170 log_verbose("\n\nJA collation:");
5171 coll = ucol_open("ja", &status);
5172 if (U_FAILURE(status)) {
5173 log_err("Tailoring test: &z <<a|- failed!");
5174 return;
5175 }
5176 for (j=0; j<11; j++) {
5177 tLen = u_strlen(tData1[j]);
5178 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5179 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5180 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5181 j, tData1[j]);
5182 }
5183 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5184 for(i = 0; i<rLen; i++) {
5185 log_verbose(" %02X", resColl[i]);
5186 }
5187 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5188 }
5189 ucol_close(coll);
5190
5191
5192 log_verbose("\n\nTailoring test: & middle dot < a ");
5193 ruleLen = u_strlen(rule1);
5194 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5195 if (U_FAILURE(status)) {
5196 log_err("Tailoring test: & middle dot < a failed!");
5197 return;
5198 }
5199 for (j=0; j<11; j++) {
5200 tLen = u_strlen(tData1[j]);
5201 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5202 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5203 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5204 j, tData1[j]);
5205 }
5206 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5207 for(i = 0; i<rLen; i++) {
5208 log_verbose(" %02X", resColl[i]);
5209 }
5210 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5211 }
5212 ucol_close(coll);
5213
5214
5215 log_verbose("\n\nTailoring test: & l middle-dot << a ");
5216 ruleLen = u_strlen(rule2);
5217 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5218 if (U_FAILURE(status)) {
5219 log_err("Tailoring test: & l middle-dot << a failed!");
5220 return;
5221 }
5222 for (j=0; j<11; j++) {
5223 tLen = u_strlen(tData1[j]);
5224 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5225 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5226 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5227 j, tData1[j]);
5228 }
5229 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5230 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5231 j, tData1[j]);
5232 }
5233 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5234 for(i = 0; i<rLen; i++) {
5235 log_verbose(" %02X", resColl[i]);
5236 }
5237 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5238 }
5239 ucol_close(coll);
5240 }
5241
5242 static void
TestOutOfBuffer5468(void)5243 TestOutOfBuffer5468(void)
5244 {
5245 static const char *test = "\\u4e00";
5246 UChar ustr[256];
5247 int32_t ustr_length = u_unescape(test, ustr, 256);
5248 unsigned char shortKeyBuf[1];
5249 int32_t sortkey_length;
5250 UErrorCode status = U_ZERO_ERROR;
5251 static UCollator *coll = NULL;
5252
5253 coll = ucol_open("root", &status);
5254 if(U_FAILURE(status)) {
5255 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5256 return;
5257 }
5258 ucol_setStrength(coll, UCOL_PRIMARY);
5259 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5260 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5261 if (U_FAILURE(status)) {
5262 log_err("Failed setting atributes\n");
5263 return;
5264 }
5265
5266 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5267 if (sortkey_length != 4) {
5268 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
5269 }
5270 log_verbose("length of sortKey is %d", sortkey_length);
5271 ucol_close(coll);
5272 }
5273
5274 #define TSKC_DATA_SIZE 5
5275 #define TSKC_BUF_SIZE 50
5276 static void
TestSortKeyConsistency(void)5277 TestSortKeyConsistency(void)
5278 {
5279 UErrorCode icuRC = U_ZERO_ERROR;
5280 UCollator* ucol;
5281 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5282
5283 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5284 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5285 int32_t i, j, i2;
5286
5287 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5288 if (U_FAILURE(icuRC))
5289 {
5290 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5291 return;
5292 }
5293
5294 for (i = 0; i < TSKC_DATA_SIZE; i++)
5295 {
5296 UCharIterator uiter;
5297 uint32_t state[2] = { 0, 0 };
5298 int32_t dataLen = i+1;
5299 for (j=0; j<TSKC_BUF_SIZE; j++)
5300 bufFull[i][j] = bufPart[i][j] = 0;
5301
5302 /* Full sort key */
5303 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5304
5305 /* Partial sort key */
5306 uiter_setString(&uiter, data, dataLen);
5307 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5308 if (U_FAILURE(icuRC))
5309 {
5310 log_err("ucol_nextSortKeyPart failed\n");
5311 ucol_close(ucol);
5312 return;
5313 }
5314
5315 for (i2=0; i2<i; i2++)
5316 {
5317 UBool fullMatch = TRUE;
5318 UBool partMatch = TRUE;
5319 for (j=0; j<TSKC_BUF_SIZE; j++)
5320 {
5321 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5322 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5323 }
5324 if (fullMatch != partMatch) {
5325 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5326 : "partial key was consistent, but full key changed\n");
5327 ucol_close(ucol);
5328 return;
5329 }
5330 }
5331 }
5332
5333 /*=============================================*/
5334 ucol_close(ucol);
5335 }
5336
5337 /* ticket: 6101 */
TestCroatianSortKey(void)5338 static void TestCroatianSortKey(void) {
5339 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5340 UErrorCode status = U_ZERO_ERROR;
5341 UCollator *ucol;
5342 UCharIterator iter;
5343
5344 static const UChar text[] = { 0x0044, 0xD81A };
5345
5346 size_t length = sizeof(text)/sizeof(*text);
5347
5348 uint8_t textSortKey[32];
5349 size_t lenSortKey = 32;
5350 size_t actualSortKeyLen;
5351 uint32_t uStateInfo[2] = { 0, 0 };
5352
5353 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5354 if (U_FAILURE(status)) {
5355 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5356 return;
5357 }
5358
5359 uiter_setString(&iter, text, length);
5360
5361 actualSortKeyLen = ucol_nextSortKeyPart(
5362 ucol, &iter, (uint32_t*)uStateInfo,
5363 textSortKey, lenSortKey, &status
5364 );
5365
5366 if (actualSortKeyLen == lenSortKey) {
5367 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5368 }
5369
5370 ucol_close(ucol);
5371 }
5372
5373 /* ticket: 6140 */
5374 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5375 * they are both Hiragana and Katakana
5376 */
5377 #define SORTKEYLEN 50
TestHiragana(void)5378 static void TestHiragana(void) {
5379 UErrorCode status = U_ZERO_ERROR;
5380 UCollator* ucol;
5381 UCollationResult strcollresult;
5382 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5383 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5384 int32_t data1Len = sizeof(data1)/sizeof(*data1);
5385 int32_t data2Len = sizeof(data2)/sizeof(*data2);
5386 int32_t i, j;
5387 uint8_t sortKey1[SORTKEYLEN];
5388 uint8_t sortKey2[SORTKEYLEN];
5389
5390 UCharIterator uiter1;
5391 UCharIterator uiter2;
5392 uint32_t state1[2] = { 0, 0 };
5393 uint32_t state2[2] = { 0, 0 };
5394 int32_t keySize1;
5395 int32_t keySize2;
5396
5397 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5398 &status);
5399 if (U_FAILURE(status)) {
5400 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5401 return;
5402 }
5403
5404 /* Start of full sort keys */
5405 /* Full sort key1 */
5406 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5407 /* Full sort key2 */
5408 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5409 if (keySize1 == keySize2) {
5410 for (i = 0; i < keySize1; i++) {
5411 if (sortKey1[i] != sortKey2[i]) {
5412 log_err("Full sort keys are different. Should be equal.");
5413 }
5414 }
5415 } else {
5416 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5417 }
5418 /* End of full sort keys */
5419
5420 /* Start of partial sort keys */
5421 /* Partial sort key1 */
5422 uiter_setString(&uiter1, data1, data1Len);
5423 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5424 /* Partial sort key2 */
5425 uiter_setString(&uiter2, data2, data2Len);
5426 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5427 if (U_SUCCESS(status) && keySize1 == keySize2) {
5428 for (j = 0; j < keySize1; j++) {
5429 if (sortKey1[j] != sortKey2[j]) {
5430 log_err("Partial sort keys are different. Should be equal");
5431 }
5432 }
5433 } else {
5434 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5435 }
5436 /* End of partial sort keys */
5437
5438 /* Start of strcoll */
5439 /* Use ucol_strcoll() to determine ordering */
5440 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5441 if (strcollresult != UCOL_EQUAL) {
5442 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5443 }
5444
5445 ucol_close(ucol);
5446 }
5447
5448 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5449
addMiscCollTest(TestNode ** root)5450 void addMiscCollTest(TestNode** root)
5451 {
5452 TEST(TestRuleOptions);
5453 TEST(TestBeforePrefixFailure);
5454 TEST(TestContractionClosure);
5455 TEST(TestPrefixCompose);
5456 TEST(TestStrCollIdenticalPrefix);
5457 TEST(TestPrefix);
5458 TEST(TestNewJapanese);
5459 /*TEST(TestLimitations);*/
5460 TEST(TestNonChars);
5461 TEST(TestExtremeCompression);
5462 TEST(TestSurrogates);
5463 TEST(TestVariableTopSetting);
5464 TEST(TestBocsuCoverage);
5465 TEST(TestCyrillicTailoring);
5466 TEST(TestCase);
5467 TEST(IncompleteCntTest);
5468 TEST(BlackBirdTest);
5469 TEST(FunkyATest);
5470 TEST(BillFairmanTest);
5471 TEST(RamsRulesTest);
5472 TEST(IsTailoredTest);
5473 TEST(TestCollations);
5474 TEST(TestChMove);
5475 TEST(TestImplicitTailoring);
5476 TEST(TestFCDProblem);
5477 TEST(TestEmptyRule);
5478 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5479 TEST(TestJ815);
5480 /*TEST(TestJ831);*/ /* we changed lv locale */
5481 TEST(TestBefore);
5482 TEST(TestRedundantRules);
5483 TEST(TestExpansionSyntax);
5484 TEST(TestHangulTailoring);
5485 TEST(TestUCARules);
5486 TEST(TestIncrementalNormalize);
5487 TEST(TestComposeDecompose);
5488 TEST(TestCompressOverlap);
5489 TEST(TestContraction);
5490 TEST(TestExpansion);
5491 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5492 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5493 TEST(TestOptimize);
5494 TEST(TestSuppressContractions);
5495 TEST(Alexis2);
5496 TEST(TestHebrewUCA);
5497 TEST(TestPartialSortKeyTermination);
5498 TEST(TestSettings);
5499 TEST(TestEquals);
5500 TEST(TestJ2726);
5501 TEST(NullRule);
5502 TEST(TestNumericCollation);
5503 TEST(TestTibetanConformance);
5504 TEST(TestPinyinProblem);
5505 TEST(TestImplicitGeneration);
5506 TEST(TestSeparateTrees);
5507 TEST(TestBeforePinyin);
5508 TEST(TestBeforeTightening);
5509 /*TEST(TestMoreBefore);*/
5510 TEST(TestTailorNULL);
5511 TEST(TestThaiSortKey);
5512 TEST(TestUpperFirstQuaternary);
5513 TEST(TestJ4960);
5514 TEST(TestJ5223);
5515 TEST(TestJ5232);
5516 TEST(TestJ5367);
5517 TEST(TestHiragana);
5518 TEST(TestSortKeyConsistency);
5519 TEST(TestVI5913); /* VI, RO tailored rules */
5520 TEST(TestCroatianSortKey);
5521 TEST(TestTailor6179);
5522 TEST(TestUCAPrecontext);
5523 TEST(TestOutOfBuffer5468);
5524 }
5525
5526 #endif /* #if !UCONFIG_NO_COLLATION */
5527