1
2 /********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2013, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7 /*******************************************************************************
8 *
9 * File cmsccoll.C
10 *
11 *******************************************************************************/
12 /**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17 #include <stdio.h>
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_COLLATION
22
23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h"
26 #include "cintltst.h"
27 #include "ccolltst.h"
28 #include "callcoll.h"
29 #include "unicode/ustring.h"
30 #include "string.h"
31 #include "ucol_imp.h"
32 #include "ucol_tok.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
41 #include "uparse.h"
42 #include "putilimp.h"
43
44
45 #define LEN(a) (sizeof(a)/sizeof(a[0]))
46
47 #define MAX_TOKEN_LEN 16
48
49 typedef UCollationResult tst_strcoll(void *collator, const int object,
50 const UChar *source, const int sLen,
51 const UChar *target, const int tLen);
52
53
54
55 const static char cnt1[][10] = {
56
57 "AA",
58 "AC",
59 "AZ",
60 "AQ",
61 "AB",
62 "ABZ",
63 "ABQ",
64 "Z",
65 "ABC",
66 "Q",
67 "B"
68 };
69
70 const static char cnt2[][10] = {
71 "DA",
72 "DAD",
73 "DAZ",
74 "MAR",
75 "Z",
76 "DAVIS",
77 "MARK",
78 "DAV",
79 "DAVI"
80 };
81
IncompleteCntTest(void)82 static void IncompleteCntTest(void)
83 {
84 UErrorCode status = U_ZERO_ERROR;
85 UChar temp[90];
86 UChar t1[90];
87 UChar t2[90];
88
89 UCollator *coll = NULL;
90 uint32_t i = 0, j = 0;
91 uint32_t size = 0;
92
93 u_uastrcpy(temp, " & Z < ABC < Q < B");
94
95 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
96
97 if(U_SUCCESS(status)) {
98 size = sizeof(cnt1)/sizeof(cnt1[0]);
99 for(i = 0; i < size-1; i++) {
100 for(j = i+1; j < size; j++) {
101 UCollationElements *iter;
102 u_uastrcpy(t1, cnt1[i]);
103 u_uastrcpy(t2, cnt1[j]);
104 doTest(coll, t1, t2, UCOL_LESS);
105 /* synwee : added collation element iterator test */
106 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
107 if (U_FAILURE(status)) {
108 log_err("Creation of iterator failed\n");
109 break;
110 }
111 backAndForth(iter);
112 ucol_closeElements(iter);
113 }
114 }
115 }
116
117 ucol_close(coll);
118
119
120 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
121 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
122
123 if(U_SUCCESS(status)) {
124 size = sizeof(cnt2)/sizeof(cnt2[0]);
125 for(i = 0; i < size-1; i++) {
126 for(j = i+1; j < size; j++) {
127 UCollationElements *iter;
128 u_uastrcpy(t1, cnt2[i]);
129 u_uastrcpy(t2, cnt2[j]);
130 doTest(coll, t1, t2, UCOL_LESS);
131
132 /* synwee : added collation element iterator test */
133 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
134 if (U_FAILURE(status)) {
135 log_err("Creation of iterator failed\n");
136 break;
137 }
138 backAndForth(iter);
139 ucol_closeElements(iter);
140 }
141 }
142 }
143
144 ucol_close(coll);
145
146
147 }
148
149 const static char shifted[][20] = {
150 "black bird",
151 "black-bird",
152 "blackbird",
153 "black Bird",
154 "black-Bird",
155 "blackBird",
156 "black birds",
157 "black-birds",
158 "blackbirds"
159 };
160
161 const static UCollationResult shiftedTert[] = {
162 UCOL_EQUAL,
163 UCOL_EQUAL,
164 UCOL_EQUAL,
165 UCOL_LESS,
166 UCOL_EQUAL,
167 UCOL_EQUAL,
168 UCOL_LESS,
169 UCOL_EQUAL,
170 UCOL_EQUAL
171 };
172
173 const static char nonignorable[][20] = {
174 "black bird",
175 "black Bird",
176 "black birds",
177 "black-bird",
178 "black-Bird",
179 "black-birds",
180 "blackbird",
181 "blackBird",
182 "blackbirds"
183 };
184
BlackBirdTest(void)185 static void BlackBirdTest(void) {
186 UErrorCode status = U_ZERO_ERROR;
187 UChar t1[90];
188 UChar t2[90];
189
190 uint32_t i = 0, j = 0;
191 uint32_t size = 0;
192 UCollator *coll = ucol_open("en_US", &status);
193
194 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
195 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
196
197 if(U_SUCCESS(status)) {
198 size = sizeof(nonignorable)/sizeof(nonignorable[0]);
199 for(i = 0; i < size-1; i++) {
200 for(j = i+1; j < size; j++) {
201 u_uastrcpy(t1, nonignorable[i]);
202 u_uastrcpy(t2, nonignorable[j]);
203 doTest(coll, t1, t2, UCOL_LESS);
204 }
205 }
206 }
207
208 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
209 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
210
211 if(U_SUCCESS(status)) {
212 size = sizeof(shifted)/sizeof(shifted[0]);
213 for(i = 0; i < size-1; i++) {
214 for(j = i+1; j < size; j++) {
215 u_uastrcpy(t1, shifted[i]);
216 u_uastrcpy(t2, shifted[j]);
217 doTest(coll, t1, t2, UCOL_LESS);
218 }
219 }
220 }
221
222 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
223 if(U_SUCCESS(status)) {
224 size = sizeof(shifted)/sizeof(shifted[0]);
225 for(i = 1; i < size; i++) {
226 u_uastrcpy(t1, shifted[i-1]);
227 u_uastrcpy(t2, shifted[i]);
228 doTest(coll, t1, t2, shiftedTert[i]);
229 }
230 }
231
232 ucol_close(coll);
233 }
234
235 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
236 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
237 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
238 {0x0041/*'A'*/, 0x0300, 0x0000},
239 {0x00C0, 0x0301, 0x0000},
240 /* this would work with forced normalization */
241 {0x00C0, 0x0316, 0x0000}
242 };
243
244 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
245 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
247 {0x00C0, 0},
248 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
249 /* this would work with forced normalization */
250 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
251 };
252
253 const static UCollationResult results[] = {
254 UCOL_GREATER,
255 UCOL_EQUAL,
256 UCOL_EQUAL,
257 UCOL_GREATER,
258 UCOL_EQUAL
259 };
260
FunkyATest(void)261 static void FunkyATest(void)
262 {
263
264 int32_t i;
265 UErrorCode status = U_ZERO_ERROR;
266 UCollator *myCollation;
267 myCollation = ucol_open("en_US", &status);
268 if(U_FAILURE(status)){
269 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
270 return;
271 }
272 log_verbose("Testing some A letters, for some reason\n");
273 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
274 ucol_setStrength(myCollation, UCOL_TERTIARY);
275 for (i = 0; i < 4 ; i++)
276 {
277 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
278 }
279 ucol_close(myCollation);
280 }
281
282 UColAttributeValue caseFirst[] = {
283 UCOL_OFF,
284 UCOL_LOWER_FIRST,
285 UCOL_UPPER_FIRST
286 };
287
288
289 UColAttributeValue alternateHandling[] = {
290 UCOL_NON_IGNORABLE,
291 UCOL_SHIFTED
292 };
293
294 UColAttributeValue caseLevel[] = {
295 UCOL_OFF,
296 UCOL_ON
297 };
298
299 UColAttributeValue strengths[] = {
300 UCOL_PRIMARY,
301 UCOL_SECONDARY,
302 UCOL_TERTIARY,
303 UCOL_QUATERNARY,
304 UCOL_IDENTICAL
305 };
306
307 #if 0
308 static const char * strengthsC[] = {
309 "UCOL_PRIMARY",
310 "UCOL_SECONDARY",
311 "UCOL_TERTIARY",
312 "UCOL_QUATERNARY",
313 "UCOL_IDENTICAL"
314 };
315
316 static const char * caseFirstC[] = {
317 "UCOL_OFF",
318 "UCOL_LOWER_FIRST",
319 "UCOL_UPPER_FIRST"
320 };
321
322
323 static const char * alternateHandlingC[] = {
324 "UCOL_NON_IGNORABLE",
325 "UCOL_SHIFTED"
326 };
327
328 static const char * caseLevelC[] = {
329 "UCOL_OFF",
330 "UCOL_ON"
331 };
332
333 /* not used currently - does not test only prints */
334 static void PrintMarkDavis(void)
335 {
336 UErrorCode status = U_ZERO_ERROR;
337 UChar m[256];
338 uint8_t sortkey[256];
339 UCollator *coll = ucol_open("en_US", &status);
340 uint32_t h,i,j,k, sortkeysize;
341 uint32_t sizem = 0;
342 char buffer[512];
343 uint32_t len = 512;
344
345 log_verbose("PrintMarkDavis");
346
347 u_uastrcpy(m, "Mark Davis");
348 sizem = u_strlen(m);
349
350
351 m[1] = 0xe4;
352
353 for(i = 0; i<sizem; i++) {
354 fprintf(stderr, "\\u%04X ", m[i]);
355 }
356 fprintf(stderr, "\n");
357
358 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
359 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
360 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
361
362 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
363 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
364 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
365
366 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
367 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
368 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
369
370 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
371 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
372 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
373 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
374 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
375 }
376
377 }
378
379 }
380
381 }
382 }
383 #endif
384
BillFairmanTest(void)385 static void BillFairmanTest(void) {
386 /*
387 ** check for actual locale via ICU resource bundles
388 **
389 ** lp points to the original locale ("fr_FR_....")
390 */
391
392 UResourceBundle *lr,*cr;
393 UErrorCode lec = U_ZERO_ERROR;
394 const char *lp = "fr_FR_you_ll_never_find_this_locale";
395
396 log_verbose("BillFairmanTest\n");
397
398 lr = ures_open(NULL,lp,&lec);
399 if (lr) {
400 cr = ures_getByKey(lr,"collations",0,&lec);
401 if (cr) {
402 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
403 if (lp) {
404 if (U_SUCCESS(lec)) {
405 if(strcmp(lp, "fr") != 0) {
406 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
407 }
408 }
409 }
410 ures_close(cr);
411 }
412 ures_close(lr);
413 }
414 }
415
testPrimary(UCollator * col,const UChar * p,const UChar * q)416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
417 UChar source[256] = { '\0'};
418 UChar target[256] = { '\0'};
419 UChar preP = 0x31a3;
420 UChar preQ = 0x310d;
421 /*
422 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
423 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
424 */
425 /*log_verbose("Testing primary\n");*/
426
427 doTest(col, p, q, UCOL_LESS);
428 /*
429 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
430
431 if(result!=UCOL_LESS){
432 aescstrdup(p,utfSource,256);
433 aescstrdup(q,utfTarget,256);
434 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
435 }
436 */
437 source[0] = preP;
438 u_strcpy(source+1,p);
439 target[0] = preQ;
440 u_strcpy(target+1,q);
441 doTest(col, source, target, UCOL_LESS);
442 /*
443 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
444 */
445 }
446
testSecondary(UCollator * col,const UChar * p,const UChar * q)447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
448 UChar source[256] = { '\0'};
449 UChar target[256] = { '\0'};
450
451 /*log_verbose("Testing secondary\n");*/
452
453 doTest(col, p, q, UCOL_LESS);
454 /*
455 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
456 */
457 source[0] = 0x0053;
458 u_strcpy(source+1,p);
459 target[0]= 0x0073;
460 u_strcpy(target+1,q);
461
462 doTest(col, source, target, UCOL_LESS);
463 /*
464 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
465 */
466
467
468 u_strcpy(source,p);
469 source[u_strlen(p)] = 0x62;
470 source[u_strlen(p)+1] = 0;
471
472
473 u_strcpy(target,q);
474 target[u_strlen(q)] = 0x61;
475 target[u_strlen(q)+1] = 0;
476
477 doTest(col, source, target, UCOL_GREATER);
478
479 /*
480 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
481 */
482 }
483
testTertiary(UCollator * col,const UChar * p,const UChar * q)484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
485 UChar source[256] = { '\0'};
486 UChar target[256] = { '\0'};
487
488 /*log_verbose("Testing tertiary\n");*/
489
490 doTest(col, p, q, UCOL_LESS);
491 /*
492 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
493 */
494 source[0] = 0x0020;
495 u_strcpy(source+1,p);
496 target[0]= 0x002D;
497 u_strcpy(target+1,q);
498
499 doTest(col, source, target, UCOL_LESS);
500 /*
501 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
502 */
503
504 u_strcpy(source,p);
505 source[u_strlen(p)] = 0xE0;
506 source[u_strlen(p)+1] = 0;
507
508 u_strcpy(target,q);
509 target[u_strlen(q)] = 0x61;
510 target[u_strlen(q)+1] = 0;
511
512 doTest(col, source, target, UCOL_GREATER);
513
514 /*
515 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
516 */
517 }
518
testEquality(UCollator * col,const UChar * p,const UChar * q)519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
520 /*
521 UChar source[256] = { '\0'};
522 UChar target[256] = { '\0'};
523 */
524
525 doTest(col, p, q, UCOL_EQUAL);
526 /*
527 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
528 */
529 }
530
testCollator(UCollator * coll,UErrorCode * status)531 static void testCollator(UCollator *coll, UErrorCode *status) {
532 const UChar *rules = NULL, *current = NULL;
533 int32_t ruleLen = 0;
534 uint32_t strength = 0;
535 uint32_t chOffset = 0; uint32_t chLen = 0;
536 uint32_t exOffset = 0; uint32_t exLen = 0;
537 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
538 uint32_t firstEx = 0;
539 /* uint32_t rExpsLen = 0; */
540 uint32_t firstLen = 0;
541 UBool varT = FALSE; UBool top_ = TRUE;
542 uint16_t specs = 0;
543 UBool startOfRules = TRUE;
544 UBool lastReset = FALSE;
545 UBool before = FALSE;
546 uint32_t beforeStrength = 0;
547 UColTokenParser src;
548 UColOptionSet opts;
549
550 UChar first[256];
551 UChar second[256];
552 UChar tempB[256];
553 uint32_t tempLen;
554 UChar *rulesCopy = NULL;
555 UParseError parseError;
556
557 uprv_memset(&src, 0, sizeof(UColTokenParser));
558
559 src.opts = &opts;
560
561 rules = ucol_getRules(coll, &ruleLen);
562 if(U_SUCCESS(*status) && ruleLen > 0) {
563 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
564 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
565 src.current = src.source = rulesCopy;
566 src.end = rulesCopy+ruleLen;
567 src.extraCurrent = src.end;
568 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
569 *first = *second = 0;
570
571 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
572 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
573 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
574 strength = src.parsedToken.strength;
575 chOffset = src.parsedToken.charsOffset;
576 chLen = src.parsedToken.charsLen;
577 exOffset = src.parsedToken.extensionOffset;
578 exLen = src.parsedToken.extensionLen;
579 prefixOffset = src.parsedToken.prefixOffset;
580 prefixLen = src.parsedToken.prefixLen;
581 specs = src.parsedToken.flags;
582
583 startOfRules = FALSE;
584 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
585 (void)varT; /* Suppress set but not used warning. */
586 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
587 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
588 second[0] = 0;
589 } else {
590 u_strncpy(second,src.source+chOffset, chLen);
591 second[chLen] = 0;
592
593 if(exLen > 0 && firstEx == 0) {
594 u_strncat(first, src.source+exOffset, exLen);
595 first[firstLen+exLen] = 0;
596 }
597
598 if(lastReset == TRUE && prefixLen != 0) {
599 u_strncpy(first+prefixLen, first, firstLen);
600 u_strncpy(first, src.source+prefixOffset, prefixLen);
601 first[firstLen+prefixLen] = 0;
602 firstLen = firstLen+prefixLen;
603 }
604
605 if(before == TRUE) { /* swap first and second */
606 u_strcpy(tempB, first);
607 u_strcpy(first, second);
608 u_strcpy(second, tempB);
609
610 tempLen = firstLen;
611 firstLen = chLen;
612 chLen = tempLen;
613
614 tempLen = firstEx;
615 firstEx = exLen;
616 exLen = tempLen;
617 if(beforeStrength < strength) {
618 strength = beforeStrength;
619 }
620 }
621 }
622 lastReset = FALSE;
623
624 switch(strength){
625 case UCOL_IDENTICAL:
626 testEquality(coll,first,second);
627 break;
628 case UCOL_PRIMARY:
629 testPrimary(coll,first,second);
630 break;
631 case UCOL_SECONDARY:
632 testSecondary(coll,first,second);
633 break;
634 case UCOL_TERTIARY:
635 testTertiary(coll,first,second);
636 break;
637 case UCOL_TOK_RESET:
638 lastReset = TRUE;
639 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
640 if(before) {
641 beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
642 }
643 break;
644 default:
645 break;
646 }
647
648 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
649 before = FALSE;
650 } else {
651 firstLen = chLen;
652 firstEx = exLen;
653 u_strcpy(first, second);
654 }
655 }
656 uprv_free(src.source);
657 uprv_free(src.reorderCodes);
658 }
659 }
660
ucaTest(void * collator,const int object,const UChar * source,const int sLen,const UChar * target,const int tLen)661 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
662 UCollator *UCA = (UCollator *)collator;
663 return ucol_strcoll(UCA, source, sLen, target, tLen);
664 }
665
666 /*
667 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
668 #if U_PLATFORM_HAS_WIN32_API
669 LCID lcid = (LCID)collator;
670 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
671 #else
672 return 0;
673 #endif
674 }
675 */
676
swampEarlier(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)677 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
678 UChar s1, UChar s2,
679 const UChar *s, const uint32_t sLen,
680 const UChar *t, const uint32_t tLen) {
681 UChar source[256] = {0};
682 UChar target[256] = {0};
683
684 source[0] = s1;
685 u_strcpy(source+1, s);
686 target[0] = s2;
687 u_strcpy(target+1, t);
688
689 return func(collator, opts, source, sLen+1, target, tLen+1);
690 }
691
swampLater(tst_strcoll * func,void * collator,int opts,UChar s1,UChar s2,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen)692 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
693 UChar s1, UChar s2,
694 const UChar *s, const uint32_t sLen,
695 const UChar *t, const uint32_t tLen) {
696 UChar source[256] = {0};
697 UChar target[256] = {0};
698
699 u_strcpy(source, s);
700 source[sLen] = s1;
701 u_strcpy(target, t);
702 target[tLen] = s2;
703
704 return func(collator, opts, source, sLen+1, target, tLen+1);
705 }
706
probeStrength(tst_strcoll * func,void * collator,int opts,const UChar * s,const uint32_t sLen,const UChar * t,const uint32_t tLen,UCollationResult result)707 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
708 const UChar *s, const uint32_t sLen,
709 const UChar *t, const uint32_t tLen,
710 UCollationResult result) {
711 /*UChar fPrimary = 0x6d;*/
712 /*UChar sPrimary = 0x6e;*/
713 UChar fSecondary = 0x310d;
714 UChar sSecondary = 0x31a3;
715 UChar fTertiary = 0x310f;
716 UChar sTertiary = 0x31b7;
717
718 UCollationResult oposite;
719 if(result == UCOL_EQUAL) {
720 return UCOL_IDENTICAL;
721 } else if(result == UCOL_GREATER) {
722 oposite = UCOL_LESS;
723 } else {
724 oposite = UCOL_GREATER;
725 }
726
727 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
728 return UCOL_PRIMARY;
729 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
730 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
731 return UCOL_SECONDARY;
732 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
733 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
734 return UCOL_TERTIARY;
735 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
736 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
737 return UCOL_QUATERNARY;
738 } else {
739 return UCOL_IDENTICAL;
740 }
741 }
742
getRelationSymbol(UCollationResult res,uint32_t strength,char * buffer)743 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
744 uint32_t i = 0;
745
746 if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
747 buffer[0] = '=';
748 buffer[1] = '=';
749 buffer[2] = '\0';
750 } else if(res == UCOL_GREATER) {
751 for(i = 0; i<strength+1; i++) {
752 buffer[i] = '>';
753 }
754 buffer[strength+1] = '\0';
755 } else {
756 for(i = 0; i<strength+1; i++) {
757 buffer[i] = '<';
758 }
759 buffer[strength+1] = '\0';
760 }
761
762 return buffer;
763 }
764
765
766
logFailure(const char * platform,const char * test,const UChar * source,const uint32_t sLen,const UChar * target,const uint32_t tLen,UCollationResult realRes,uint32_t realStrength,UCollationResult expRes,uint32_t expStrength,UBool error)767 static void logFailure (const char *platform, const char *test,
768 const UChar *source, const uint32_t sLen,
769 const UChar *target, const uint32_t tLen,
770 UCollationResult realRes, uint32_t realStrength,
771 UCollationResult expRes, uint32_t expStrength, UBool error) {
772
773 uint32_t i = 0;
774
775 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
776 static int32_t maxOutputLength = 0;
777 int32_t outputLength;
778
779 *sEsc = *tEsc = *s = *t = 0;
780 if(error == TRUE) {
781 log_err("Difference between expected and generated order. Run test with -v for more info\n");
782 } else if(getTestOption(VERBOSITY_OPTION) == 0) {
783 return;
784 }
785 for(i = 0; i<sLen; i++) {
786 sprintf(b, "%04X", source[i]);
787 strcat(sEsc, "\\u");
788 strcat(sEsc, b);
789 strcat(s, b);
790 strcat(s, " ");
791 if(source[i] < 0x80) {
792 sprintf(b, "(%c)", source[i]);
793 strcat(sEsc, b);
794 }
795 }
796 for(i = 0; i<tLen; i++) {
797 sprintf(b, "%04X", target[i]);
798 strcat(tEsc, "\\u");
799 strcat(tEsc, b);
800 strcat(t, b);
801 strcat(t, " ");
802 if(target[i] < 0x80) {
803 sprintf(b, "(%c)", target[i]);
804 strcat(tEsc, b);
805 }
806 }
807 /*
808 strcpy(output, "[[ ");
809 strcat(output, sEsc);
810 strcat(output, getRelationSymbol(expRes, expStrength, relation));
811 strcat(output, tEsc);
812
813 strcat(output, " : ");
814
815 strcat(output, sEsc);
816 strcat(output, getRelationSymbol(realRes, realStrength, relation));
817 strcat(output, tEsc);
818 strcat(output, " ]] ");
819
820 log_verbose("%s", output);
821 */
822
823
824 strcpy(output, "DIFF: ");
825
826 strcat(output, s);
827 strcat(output, " : ");
828 strcat(output, t);
829
830 strcat(output, test);
831 strcat(output, ": ");
832
833 strcat(output, sEsc);
834 strcat(output, getRelationSymbol(expRes, expStrength, relation));
835 strcat(output, tEsc);
836
837 strcat(output, " ");
838
839 strcat(output, platform);
840 strcat(output, ": ");
841
842 strcat(output, sEsc);
843 strcat(output, getRelationSymbol(realRes, realStrength, relation));
844 strcat(output, tEsc);
845
846 outputLength = (int32_t)strlen(output);
847 if(outputLength > maxOutputLength) {
848 maxOutputLength = outputLength;
849 U_ASSERT(outputLength < sizeof(output));
850 }
851
852 log_verbose("%s\n", output);
853
854 }
855
856 /*
857 static void printOutRules(const UChar *rules) {
858 uint32_t len = u_strlen(rules);
859 uint32_t i = 0;
860 char toPrint;
861 uint32_t line = 0;
862
863 fprintf(stdout, "Rules:");
864
865 for(i = 0; i<len; i++) {
866 if(rules[i]<0x7f && rules[i]>=0x20) {
867 toPrint = (char)rules[i];
868 if(toPrint == '&') {
869 line = 1;
870 fprintf(stdout, "\n&");
871 } else if(toPrint == ';') {
872 fprintf(stdout, "<<");
873 line+=2;
874 } else if(toPrint == ',') {
875 fprintf(stdout, "<<<");
876 line+=3;
877 } else {
878 fprintf(stdout, "%c", toPrint);
879 line++;
880 }
881 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
882 fprintf(stdout, "\\u%04X", rules[i]);
883 line+=6;
884 }
885 if(line>72) {
886 fprintf(stdout, "\n");
887 line = 0;
888 }
889 }
890
891 log_verbose("\n");
892
893 }
894 */
895
testSwitch(tst_strcoll * func,void * collator,int opts,uint32_t strength,const UChar * first,const UChar * second,const char * msg,UBool error)896 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
897 uint32_t diffs = 0;
898 UCollationResult realResult;
899 uint32_t realStrength;
900
901 uint32_t sLen = u_strlen(first);
902 uint32_t tLen = u_strlen(second);
903
904 realResult = func(collator, opts, first, sLen, second, tLen);
905 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
906
907 if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
908 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
909 diffs++;
910 } else if(realResult != UCOL_LESS || realStrength != strength) {
911 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
912 diffs++;
913 }
914 return diffs;
915 }
916
917
testAgainstUCA(UCollator * coll,UCollator * UCA,const char * refName,UBool error,UErrorCode * status)918 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
919 const UChar *rules = NULL, *current = NULL;
920 int32_t ruleLen = 0;
921 uint32_t strength = 0;
922 uint32_t chOffset = 0; uint32_t chLen = 0;
923 uint32_t exOffset = 0; uint32_t exLen = 0;
924 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
925 /* uint32_t rExpsLen = 0; */
926 uint32_t firstLen = 0, secondLen = 0;
927 UBool varT = FALSE; UBool top_ = TRUE;
928 uint16_t specs = 0;
929 UBool startOfRules = TRUE;
930 UColTokenParser src;
931 UColOptionSet opts;
932
933 UChar first[256];
934 UChar second[256];
935 UChar *rulesCopy = NULL;
936
937 uint32_t UCAdiff = 0;
938 uint32_t Windiff = 1;
939 UParseError parseError;
940
941 (void)top_; /* Suppress set but not used warnings. */
942 (void)varT;
943 (void)secondLen;
944 (void)prefixLen;
945 (void)prefixOffset;
946
947 uprv_memset(&src, 0, sizeof(UColTokenParser));
948 src.opts = &opts;
949
950 rules = ucol_getRules(coll, &ruleLen);
951
952 /*printOutRules(rules);*/
953
954 if(U_SUCCESS(*status) && ruleLen > 0) {
955 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
956 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
957 src.current = src.source = rulesCopy;
958 src.end = rulesCopy+ruleLen;
959 src.extraCurrent = src.end;
960 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
961 *first = *second = 0;
962
963 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
964 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
965 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
966 strength = src.parsedToken.strength;
967 chOffset = src.parsedToken.charsOffset;
968 chLen = src.parsedToken.charsLen;
969 exOffset = src.parsedToken.extensionOffset;
970 exLen = src.parsedToken.extensionLen;
971 prefixOffset = src.parsedToken.prefixOffset;
972 prefixLen = src.parsedToken.prefixLen;
973 specs = src.parsedToken.flags;
974
975 startOfRules = FALSE;
976 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
977 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
978
979 u_strncpy(second,src.source+chOffset, chLen);
980 second[chLen] = 0;
981 secondLen = chLen;
982
983 if(exLen > 0) {
984 u_strncat(first, src.source+exOffset, exLen);
985 first[firstLen+exLen] = 0;
986 firstLen += exLen;
987 }
988
989 if(strength != UCOL_TOK_RESET) {
990 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
991 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
992 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
993 }
994 }
995
996
997 firstLen = chLen;
998 u_strcpy(first, second);
999
1000 }
1001 if(UCAdiff != 0 && Windiff != 0) {
1002 log_verbose("\n");
1003 }
1004 if(UCAdiff == 0) {
1005 log_verbose("No immediate difference with %s!\n", refName);
1006 }
1007 if(Windiff == 0) {
1008 log_verbose("No immediate difference with Win32!\n");
1009 }
1010 uprv_free(src.source);
1011 uprv_free(src.reorderCodes);
1012 }
1013 }
1014
1015 /*
1016 * Takes two CEs (lead and continuation) and
1017 * compares them as CEs should be compared:
1018 * primary vs. primary, secondary vs. secondary
1019 * tertiary vs. tertiary
1020 */
compareCEs(uint32_t s1,uint32_t s2,uint32_t t1,uint32_t t2)1021 static int32_t compareCEs(uint32_t s1, uint32_t s2,
1022 uint32_t t1, uint32_t t2) {
1023 uint32_t s = 0, t = 0;
1024 if(s1 == t1 && s2 == t2) {
1025 return 0;
1026 }
1027 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1028 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1029 if(s < t) {
1030 return -1;
1031 } else if(s > t) {
1032 return 1;
1033 } else {
1034 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1035 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1036 if(s < t) {
1037 return -1;
1038 } else if(s > t) {
1039 return 1;
1040 } else {
1041 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1042 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1043 if(s < t) {
1044 return -1;
1045 } else {
1046 return 1;
1047 }
1048 }
1049 }
1050 }
1051
1052 typedef struct {
1053 uint32_t startCE;
1054 uint32_t startContCE;
1055 uint32_t limitCE;
1056 uint32_t limitContCE;
1057 } indirectBoundaries;
1058
1059 /* these values are used for finding CE values for indirect positioning. */
1060 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1061 /* values. It only works for resets and you cannot tailor indirect names */
1062 /* An indirect name can define either an anchor point or a range. An */
1063 /* anchor point behaves in exactly the same way as a code point in reset */
1064 /* would, except that it cannot be tailored. A range (we currently only */
1065 /* know for the [top] range will explicitly set the upper bound for */
1066 /* generated CEs, thus allowing for better control over how many CEs can */
1067 /* be squeezed between in the range without performance penalty. */
1068 /* In that respect, we use [top] for tailoring of locales that use CJK */
1069 /* characters. Other indirect values are currently a pure convenience, */
1070 /* they can be used to assure that the CEs will be always positioned in */
1071 /* the same place relative to a point with known properties (e.g. first */
1072 /* primary ignorable). */
1073 static indirectBoundaries ucolIndirectBoundaries[15];
1074 static UBool indirectBoundariesSet = FALSE;
setIndirectBoundaries(uint32_t indexR,uint32_t * start,uint32_t * end)1075 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1076 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1077 /* to initalize here. */
1078 ucolIndirectBoundaries[indexR].startCE = start[0];
1079 ucolIndirectBoundaries[indexR].startContCE = start[1];
1080 if(end) {
1081 ucolIndirectBoundaries[indexR].limitCE = end[0];
1082 ucolIndirectBoundaries[indexR].limitContCE = end[1];
1083 } else {
1084 ucolIndirectBoundaries[indexR].limitCE = 0;
1085 ucolIndirectBoundaries[indexR].limitContCE = 0;
1086 }
1087 }
1088
testCEs(UCollator * coll,UErrorCode * status)1089 static void testCEs(UCollator *coll, UErrorCode *status) {
1090 const UChar *rules = NULL, *current = NULL;
1091 int32_t ruleLen = 0;
1092
1093 uint32_t strength = 0;
1094 uint32_t maxStrength = UCOL_IDENTICAL;
1095 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1096 uint32_t lastCE;
1097 uint32_t lastContCE;
1098
1099 int32_t result = 0;
1100 uint32_t chOffset = 0; uint32_t chLen = 0;
1101 uint32_t exOffset = 0; uint32_t exLen = 0;
1102 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1103 uint32_t oldOffset = 0;
1104
1105 /* uint32_t rExpsLen = 0; */
1106 /* uint32_t firstLen = 0; */
1107 uint16_t specs = 0;
1108 UBool varT = FALSE; UBool top_ = TRUE;
1109 UBool startOfRules = TRUE;
1110 UBool before = FALSE;
1111 UColTokenParser src;
1112 UColOptionSet opts;
1113 UParseError parseError;
1114 UChar *rulesCopy = NULL;
1115 collIterate *c = uprv_new_collIterate(status);
1116 UCAConstants *consts = NULL;
1117 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1118 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1119 const char *colLoc;
1120 UCollator *UCA = ucol_open("root", status);
1121
1122 (void)varT; /* Suppress set but not used warnings. */
1123 (void)prefixLen;
1124 (void)prefixOffset;
1125 (void)exLen;
1126 (void)exOffset;
1127
1128 if (U_FAILURE(*status)) {
1129 log_err("Could not open root collator %s\n", u_errorName(*status));
1130 uprv_delete_collIterate(c);
1131 return;
1132 }
1133
1134 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1135 if (U_FAILURE(*status)) {
1136 log_err("Could not get collator name: %s\n", u_errorName(*status));
1137 ucol_close(UCA);
1138 uprv_delete_collIterate(c);
1139 return;
1140 }
1141
1142 uprv_memset(&src, 0, sizeof(UColTokenParser));
1143
1144 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1145 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1146 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1147 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1148 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1149
1150 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1151
1152 src.opts = &opts;
1153
1154 rules = ucol_getRules(coll, &ruleLen);
1155
1156 src.invUCA = ucol_initInverseUCA(status);
1157
1158 if(indirectBoundariesSet == FALSE) {
1159 /* UCOL_RESET_TOP_VALUE */
1160 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1161 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1162 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1163 /* UCOL_LAST_PRIMARY_IGNORABLE */
1164 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1165 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1166 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1167 /* UCOL_LAST_SECONDARY_IGNORABLE */
1168 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1169 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1170 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1171 /* UCOL_LAST_TERTIARY_IGNORABLE */
1172 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1173 /* UCOL_FIRST_VARIABLE */
1174 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1175 /* UCOL_LAST_VARIABLE */
1176 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1177 /* UCOL_FIRST_NON_VARIABLE */
1178 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1179 /* UCOL_LAST_NON_VARIABLE */
1180 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1181 /* UCOL_FIRST_IMPLICIT */
1182 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1183 /* UCOL_LAST_IMPLICIT */
1184 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1185 /* UCOL_FIRST_TRAILING */
1186 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1187 /* UCOL_LAST_TRAILING */
1188 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1189 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1190 indirectBoundariesSet = TRUE;
1191 }
1192
1193
1194 if(U_SUCCESS(*status) && ruleLen > 0) {
1195 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1196 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1197 src.current = src.source = rulesCopy;
1198 src.end = rulesCopy+ruleLen;
1199 src.extraCurrent = src.end;
1200 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1201
1202 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1203 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1204 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1205 strength = src.parsedToken.strength;
1206 chOffset = src.parsedToken.charsOffset;
1207 chLen = src.parsedToken.charsLen;
1208 exOffset = src.parsedToken.extensionOffset;
1209 exLen = src.parsedToken.extensionLen;
1210 prefixOffset = src.parsedToken.prefixOffset;
1211 prefixLen = src.parsedToken.prefixLen;
1212 specs = src.parsedToken.flags;
1213
1214 startOfRules = FALSE;
1215 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1216 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1217
1218 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1219
1220 currCE = ucol_getNextCE(coll, c, status);
1221 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1222 log_verbose("Thai prevowel detected. Will pick next CE\n");
1223 currCE = ucol_getNextCE(coll, c, status);
1224 }
1225
1226 currContCE = ucol_getNextCE(coll, c, status);
1227 if(!isContinuation(currContCE)) {
1228 currContCE = 0;
1229 }
1230
1231 /* we need to repack CEs here */
1232
1233 if(strength == UCOL_TOK_RESET) {
1234 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1235 if(top_ == TRUE) {
1236 int32_t tokenIndex = src.parsedToken.indirectIndex;
1237
1238 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1239 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1240 } else {
1241 nextCE = baseCE = currCE;
1242 nextContCE = baseContCE = currContCE;
1243 }
1244 maxStrength = UCOL_IDENTICAL;
1245 } else {
1246 if(strength < maxStrength) {
1247 maxStrength = strength;
1248 if(baseCE == UCOL_RESET_TOP_VALUE) {
1249 log_verbose("Resetting to [top]\n");
1250 nextCE = UCOL_NEXT_TOP_VALUE;
1251 nextContCE = UCOL_NEXT_TOP_CONT;
1252 } else {
1253 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1254 }
1255 if(result < 0) {
1256 if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1257 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1258 return;
1259 } else {
1260 log_err("%s: couldn't find the CE\n", colLoc);
1261 return;
1262 }
1263 }
1264 }
1265
1266 currCE &= 0xFFFFFF3F;
1267 currContCE &= 0xFFFFFFBF;
1268
1269 if(maxStrength == UCOL_IDENTICAL) {
1270 if(baseCE != currCE || baseContCE != currContCE) {
1271 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1272 }
1273 } else {
1274 if(strength == UCOL_IDENTICAL) {
1275 if(lastCE != currCE || lastContCE != currContCE) {
1276 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc);
1277 }
1278 } else {
1279 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1280 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1281 log_err("%s: current CE is not less than base CE\n", colLoc);
1282 }
1283 if(!before) {
1284 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1285 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1286 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1287 }
1288 } else {
1289 before = FALSE;
1290 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1291 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1292 log_err("%s: sequence of generated CEs is broken\n", colLoc);
1293 }
1294 }
1295 }
1296 }
1297
1298 }
1299
1300 oldOffset = chOffset;
1301 lastCE = currCE & 0xFFFFFF3F;
1302 lastContCE = currContCE & 0xFFFFFFBF;
1303 }
1304 uprv_free(src.source);
1305 uprv_free(src.reorderCodes);
1306 }
1307 ucol_close(UCA);
1308 uprv_delete_collIterate(c);
1309 }
1310
1311 #if 0
1312 /* these locales are now picked from index RB */
1313 static const char* localesToTest[] = {
1314 "ar", "bg", "ca", "cs", "da",
1315 "el", "en_BE", "en_US_POSIX",
1316 "es", "et", "fi", "fr", "hi",
1317 "hr", "hu", "is", "iw", "ja",
1318 "ko", "lt", "lv", "mk", "mt",
1319 "nb", "nn", "nn_NO", "pl", "ro",
1320 "ru", "sh", "sk", "sl", "sq",
1321 "sr", "sv", "th", "tr", "uk",
1322 "vi", "zh", "zh_TW"
1323 };
1324 #endif
1325
1326 static const char* rulesToTest[] = {
1327 /* Funky fa rule */
1328 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1329 /*"& Z < p, P",*/
1330 /* Cui Mins rules */
1331 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1332 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1333 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1334 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1335 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1336 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1337 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1338 };
1339
1340
TestCollations(void)1341 static void TestCollations(void) {
1342 int32_t noOfLoc = uloc_countAvailable();
1343 int32_t i = 0, j = 0;
1344
1345 UErrorCode status = U_ZERO_ERROR;
1346 char cName[256];
1347 UChar name[256];
1348 int32_t nameSize;
1349
1350
1351 const char *locName = NULL;
1352 UCollator *coll = NULL;
1353 UCollator *UCA = ucol_open("", &status);
1354 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1355 if (U_FAILURE(status)) {
1356 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1357 return;
1358 }
1359 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1360
1361 for(i = 0; i<noOfLoc; i++) {
1362 status = U_ZERO_ERROR;
1363 locName = uloc_getAvailable(i);
1364 if(uprv_strcmp("ja", locName) == 0) {
1365 log_verbose("Don't know how to test prefixes\n");
1366 continue;
1367 }
1368 if(hasCollationElements(locName)) {
1369 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1370 for(j = 0; j<nameSize; j++) {
1371 cName[j] = (char)name[j];
1372 }
1373 cName[nameSize] = 0;
1374 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1375 coll = ucol_open(locName, &status);
1376 if(U_SUCCESS(status)) {
1377 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1378 ucol_close(coll);
1379 } else {
1380 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1381 status = U_ZERO_ERROR;
1382 }
1383 }
1384 }
1385 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1386 ucol_close(UCA);
1387 }
1388
RamsRulesTest(void)1389 static void RamsRulesTest(void) {
1390 UErrorCode status = U_ZERO_ERROR;
1391 int32_t i = 0;
1392 UCollator *coll = NULL;
1393 UChar rule[2048];
1394 uint32_t ruleLen;
1395 int32_t noOfLoc = uloc_countAvailable();
1396 const char *locName = NULL;
1397
1398 log_verbose("RamsRulesTest\n");
1399
1400 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1401 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1402 return;
1403 }
1404
1405 for(i = 0; i<noOfLoc; i++) {
1406 locName = uloc_getAvailable(i);
1407 if(hasCollationElements(locName)) {
1408 if (uprv_strcmp("ja", locName)==0) {
1409 log_verbose("Don't know how to test Japanese because of prefixes\n");
1410 continue;
1411 }
1412 if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1413 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1414 continue;
1415 }
1416 if (uprv_strcmp("bn", locName)==0 ||
1417 uprv_strcmp("bs", locName)==0 || /* Add due to import per cldrbug 5647 */
1418 uprv_strcmp("bs_Cyrl", locName)==0 || /* Add due to import per cldrbug 5647 */
1419 uprv_strcmp("en_US_POSIX", locName)==0 ||
1420 uprv_strcmp("fa", locName)==0 || /* Add in #10222 with CLDR 24 integration */
1421 uprv_strcmp("fa_AF", locName)==0 || /* Add due to import per cldrbug 5647 */
1422 uprv_strcmp("gl", locName)==0 || /* Add due to import per cldrbug 5647 */
1423 uprv_strcmp("gl_ES", locName)==0 || /* Add due to import per cldrbug 5647 */
1424 uprv_strcmp("he", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1425 uprv_strcmp("he_IL", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1426 uprv_strcmp("km", locName)==0 ||
1427 uprv_strcmp("km_KH", locName)==0 ||
1428 uprv_strcmp("my", locName)==0 ||
1429 uprv_strcmp("ps", locName)==0 || /* Add in #10222 with CLDR 24 integration */
1430 uprv_strcmp("si", locName)==0 ||
1431 uprv_strcmp("si_LK", locName)==0 ||
1432 uprv_strcmp("sr_Latn", locName)==0 || /* Add due to import per cldrbug 5647 */
1433 uprv_strcmp("th", locName)==0 ||
1434 uprv_strcmp("th_TH", locName)==0 ||
1435 uprv_strcmp("zh", locName)==0 ||
1436 uprv_strcmp("zh_Hant", locName)==0
1437 ) {
1438 if(log_knownIssue("6040", NULL)) {
1439 log_verbose("Can't test %s - TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1440 continue;
1441 }
1442 }
1443 log_verbose("Testing locale %s\n", locName);
1444 status = U_ZERO_ERROR;
1445 coll = ucol_open(locName, &status);
1446 if(U_SUCCESS(status)) {
1447 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1448 if(coll->image->jamoSpecial == TRUE) {
1449 log_err("%s has special JAMOs\n", locName);
1450 }
1451 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1452 testCollator(coll, &status);
1453 testCEs(coll, &status);
1454 } else {
1455 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1456 }
1457 ucol_close(coll);
1458 } else {
1459 log_err("Could not open %s: %s\n", locName, u_errorName(status));
1460 }
1461 }
1462 }
1463
1464 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1465 log_verbose("Testing rule: %s\n", rulesToTest[i]);
1466 ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1467 status = U_ZERO_ERROR;
1468 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1469 if(U_SUCCESS(status)) {
1470 testCollator(coll, &status);
1471 testCEs(coll, &status);
1472 ucol_close(coll);
1473 } else {
1474 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1475 }
1476 }
1477
1478 }
1479
IsTailoredTest(void)1480 static void IsTailoredTest(void) {
1481 UErrorCode status = U_ZERO_ERROR;
1482 uint32_t i = 0;
1483 UCollator *coll = NULL;
1484 UChar rule[2048];
1485 UChar tailored[2048];
1486 UChar notTailored[2048];
1487 uint32_t ruleLen, tailoredLen, notTailoredLen;
1488
1489 log_verbose("IsTailoredTest\n");
1490
1491 u_uastrcpy(rule, "&Z < A, B, C;c < d");
1492 ruleLen = u_strlen(rule);
1493
1494 u_uastrcpy(tailored, "ABCcd");
1495 tailoredLen = u_strlen(tailored);
1496
1497 u_uastrcpy(notTailored, "ZabD");
1498 notTailoredLen = u_strlen(notTailored);
1499
1500 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1501 if(U_SUCCESS(status)) {
1502 for(i = 0; i<tailoredLen; i++) {
1503 if(!ucol_isTailored(coll, tailored[i], &status)) {
1504 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1505 }
1506 }
1507 for(i = 0; i<notTailoredLen; i++) {
1508 if(ucol_isTailored(coll, notTailored[i], &status)) {
1509 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1510 }
1511 }
1512 ucol_close(coll);
1513 }
1514 else {
1515 log_err_status(status, "Can't tailor rules\n");
1516 }
1517 /* Code coverage */
1518 status = U_ZERO_ERROR;
1519 coll = ucol_open("ja", &status);
1520 if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1521 log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1522 }
1523 ucol_close(coll);
1524 }
1525
1526
1527 const static char chTest[][20] = {
1528 "c",
1529 "C",
1530 "ca", "cb", "cx", "cy", "CZ",
1531 "c\\u030C", "C\\u030C",
1532 "h",
1533 "H",
1534 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1535 "ch", "cH", "Ch", "CH",
1536 "cha", "charly", "che", "chh", "chch", "chr",
1537 "i", "I", "iarly",
1538 "r", "R",
1539 "r\\u030C", "R\\u030C",
1540 "s",
1541 "S",
1542 "s\\u030C", "S\\u030C",
1543 "z", "Z",
1544 "z\\u030C", "Z\\u030C"
1545 };
1546
TestChMove(void)1547 static void TestChMove(void) {
1548 UChar t1[256] = {0};
1549 UChar t2[256] = {0};
1550
1551 uint32_t i = 0, j = 0;
1552 uint32_t size = 0;
1553 UErrorCode status = U_ZERO_ERROR;
1554
1555 UCollator *coll = ucol_open("cs", &status);
1556
1557 if(U_SUCCESS(status)) {
1558 size = sizeof(chTest)/sizeof(chTest[0]);
1559 for(i = 0; i < size-1; i++) {
1560 for(j = i+1; j < size; j++) {
1561 u_unescape(chTest[i], t1, 256);
1562 u_unescape(chTest[j], t2, 256);
1563 doTest(coll, t1, t2, UCOL_LESS);
1564 }
1565 }
1566 }
1567 else {
1568 log_data_err("Can't open collator");
1569 }
1570 ucol_close(coll);
1571 }
1572
1573
1574
1575
1576 const static char impTest[][20] = {
1577 "\\u4e00",
1578 "a",
1579 "A",
1580 "b",
1581 "B",
1582 "\\u4e01"
1583 };
1584
1585
TestImplicitTailoring(void)1586 static void TestImplicitTailoring(void) {
1587 static const struct {
1588 const char *rules;
1589 const char *data[10];
1590 const uint32_t len;
1591 } tests[] = {
1592 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1593 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1594 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1595 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1596 };
1597
1598 int32_t i = 0;
1599
1600 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1601 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1602 }
1603
1604 /*
1605 UChar t1[256] = {0};
1606 UChar t2[256] = {0};
1607
1608 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1609
1610 uint32_t i = 0, j = 0;
1611 uint32_t size = 0;
1612 uint32_t ruleLen = 0;
1613 UErrorCode status = U_ZERO_ERROR;
1614 UCollator *coll = NULL;
1615 ruleLen = u_unescape(rule, t1, 256);
1616
1617 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1618
1619 if(U_SUCCESS(status)) {
1620 size = sizeof(impTest)/sizeof(impTest[0]);
1621 for(i = 0; i < size-1; i++) {
1622 for(j = i+1; j < size; j++) {
1623 u_unescape(impTest[i], t1, 256);
1624 u_unescape(impTest[j], t2, 256);
1625 doTest(coll, t1, t2, UCOL_LESS);
1626 }
1627 }
1628 }
1629 else {
1630 log_err("Can't open collator");
1631 }
1632 ucol_close(coll);
1633 */
1634 }
1635
TestFCDProblem(void)1636 static void TestFCDProblem(void) {
1637 UChar t1[256] = {0};
1638 UChar t2[256] = {0};
1639
1640 const char *s1 = "\\u0430\\u0306\\u0325";
1641 const char *s2 = "\\u04D1\\u0325";
1642
1643 UErrorCode status = U_ZERO_ERROR;
1644 UCollator *coll = ucol_open("", &status);
1645 u_unescape(s1, t1, 256);
1646 u_unescape(s2, t2, 256);
1647
1648 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1649 doTest(coll, t1, t2, UCOL_EQUAL);
1650
1651 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1652 doTest(coll, t1, t2, UCOL_EQUAL);
1653
1654 ucol_close(coll);
1655 }
1656
1657 /*
1658 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1659 We're only using NFC/NFD in this test.
1660 */
1661 #define NORM_BUFFER_TEST_LEN 18
1662 typedef struct {
1663 UChar32 u;
1664 UChar NFC[NORM_BUFFER_TEST_LEN];
1665 UChar NFD[NORM_BUFFER_TEST_LEN];
1666 } tester;
1667
TestComposeDecompose(void)1668 static void TestComposeDecompose(void) {
1669 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1670 static const UChar UNICODESET_STR[] = {
1671 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1672 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1673 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1674 };
1675 int32_t noOfLoc;
1676 int32_t i = 0, j = 0;
1677
1678 UErrorCode status = U_ZERO_ERROR;
1679 const char *locName = NULL;
1680 uint32_t nfcSize;
1681 uint32_t nfdSize;
1682 tester **t;
1683 uint32_t noCases = 0;
1684 UCollator *coll = NULL;
1685 UChar32 u = 0;
1686 UChar comp[NORM_BUFFER_TEST_LEN];
1687 uint32_t len = 0;
1688 UCollationElements *iter;
1689 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1690 int32_t charsToTestSize;
1691
1692 noOfLoc = uloc_countAvailable();
1693
1694 coll = ucol_open("", &status);
1695 if (U_FAILURE(status)) {
1696 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1697 return;
1698 }
1699 charsToTestSize = uset_size(charsToTest);
1700 if (charsToTestSize <= 0) {
1701 log_err("Set was zero. Missing data?\n");
1702 return;
1703 }
1704 t = (tester **)malloc(charsToTestSize * sizeof(tester *));
1705 t[0] = (tester *)malloc(sizeof(tester));
1706 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1707
1708 for(u = 0; u < charsToTestSize; u++) {
1709 UChar32 ch = uset_charAt(charsToTest, u);
1710 len = 0;
1711 U16_APPEND_UNSAFE(comp, len, ch);
1712 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1713 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1714
1715 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1716 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1717 t[noCases]->u = ch;
1718 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1719 u_strncpy(t[noCases]->NFC, comp, len);
1720 t[noCases]->NFC[len] = 0;
1721 }
1722 noCases++;
1723 t[noCases] = (tester *)malloc(sizeof(tester));
1724 uprv_memset(t[noCases], 0, sizeof(tester));
1725 }
1726 }
1727 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1728 uset_close(charsToTest);
1729 charsToTest = NULL;
1730
1731 for(u=0; u<(UChar32)noCases; u++) {
1732 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1733 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1734 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1735 }
1736 }
1737 /*
1738 for(u = 0; u < charsToTestSize; u++) {
1739 if(!(u&0xFFFF)) {
1740 log_verbose("%08X ", u);
1741 }
1742 uprv_memset(t[noCases], 0, sizeof(tester));
1743 t[noCases]->u = u;
1744 len = 0;
1745 U16_APPEND_UNSAFE(comp, len, u);
1746 comp[len] = 0;
1747 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1748 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1749 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1750 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1751 }
1752 */
1753
1754 ucol_close(coll);
1755
1756 log_verbose("Testing locales, number of cases = %i\n", noCases);
1757 for(i = 0; i<noOfLoc; i++) {
1758 status = U_ZERO_ERROR;
1759 locName = uloc_getAvailable(i);
1760 if(hasCollationElements(locName)) {
1761 char cName[256];
1762 UChar name[256];
1763 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1764
1765 for(j = 0; j<nameSize; j++) {
1766 cName[j] = (char)name[j];
1767 }
1768 cName[nameSize] = 0;
1769 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1770
1771 coll = ucol_open(locName, &status);
1772 ucol_setStrength(coll, UCOL_IDENTICAL);
1773 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1774
1775 for(u=0; u<(UChar32)noCases; u++) {
1776 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1777 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1778 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1779 log_verbose("Testing NFC\n");
1780 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1781 backAndForth(iter);
1782 log_verbose("Testing NFD\n");
1783 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1784 backAndForth(iter);
1785 }
1786 }
1787 ucol_closeElements(iter);
1788 ucol_close(coll);
1789 }
1790 }
1791 for(u = 0; u <= (UChar32)noCases; u++) {
1792 free(t[u]);
1793 }
1794 free(t);
1795 }
1796
TestEmptyRule(void)1797 static void TestEmptyRule(void) {
1798 UErrorCode status = U_ZERO_ERROR;
1799 UChar rulez[] = { 0 };
1800 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1801
1802 ucol_close(coll);
1803 }
1804
TestUCARules(void)1805 static void TestUCARules(void) {
1806 UErrorCode status = U_ZERO_ERROR;
1807 UChar b[256];
1808 UChar *rules = b;
1809 uint32_t ruleLen = 0;
1810 UCollator *UCAfromRules = NULL;
1811 UCollator *coll = ucol_open("", &status);
1812 if(status == U_FILE_ACCESS_ERROR) {
1813 log_data_err("Is your data around?\n");
1814 return;
1815 } else if(U_FAILURE(status)) {
1816 log_err("Error opening collator\n");
1817 return;
1818 }
1819 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1820
1821 log_verbose("TestUCARules\n");
1822 if(ruleLen > 256) {
1823 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1824 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1825 }
1826 log_verbose("Rules length is %d\n", ruleLen);
1827 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1828 if(U_SUCCESS(status)) {
1829 ucol_close(UCAfromRules);
1830 } else {
1831 log_verbose("Unable to create a collator from UCARules!\n");
1832 }
1833 /*
1834 u_unescape(blah, b, 256);
1835 ucol_getSortKey(coll, b, 1, res, 256);
1836 */
1837 ucol_close(coll);
1838 if(rules != b) {
1839 free(rules);
1840 }
1841 }
1842
1843
1844 /* Pinyin tonal order */
1845 /*
1846 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1847 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1848 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1849 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1850 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1851 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1852 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1853 .. (\u00fc)
1854
1855 However, in testing we got the following order:
1856 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1857 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1858 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1859 .. (\u0113)
1860 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1861 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1862 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1863 .. (\u01d8)
1864 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1865 */
1866
TestBefore(void)1867 static void TestBefore(void) {
1868 const static char *data[] = {
1869 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1870 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1871 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1872 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1873 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1874 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1875 };
1876 genericRulesStarter(
1877 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1878 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1879 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1880 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1881 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1882 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1883 data, sizeof(data)/sizeof(data[0]));
1884 }
1885
1886 #if 0
1887 /* superceded by TestBeforePinyin */
1888 static void TestJ784(void) {
1889 const static char *data[] = {
1890 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1891 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1892 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1893 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1894 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1895 "\\u00fc",
1896 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1897 };
1898 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1899 }
1900 #endif
1901
1902 #if 0
1903 /* superceded by the changes to the lv locale */
1904 static void TestJ831(void) {
1905 const static char *data[] = {
1906 "I",
1907 "i",
1908 "Y",
1909 "y"
1910 };
1911 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1912 }
1913 #endif
1914
TestJ815(void)1915 static void TestJ815(void) {
1916 const static char *data[] = {
1917 "aa",
1918 "Aa",
1919 "ab",
1920 "Ab",
1921 "ad",
1922 "Ad",
1923 "ae",
1924 "Ae",
1925 "\\u00e6",
1926 "\\u00c6",
1927 "af",
1928 "Af",
1929 "b",
1930 "B"
1931 };
1932 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1933 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1934 }
1935
1936
1937 /*
1938 "& a < b < c < d& r < c", "& a < b < d& r < c",
1939 "& a < b < c < d& c < m", "& a < b < c < m < d",
1940 "& a < b < c < d& a < m", "& a < m < b < c < d",
1941 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1942 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1943 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1944 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1945 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1946 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1947 */
TestRedundantRules(void)1948 static void TestRedundantRules(void) {
1949 int32_t i;
1950
1951 static const struct {
1952 const char *rules;
1953 const char *expectedRules;
1954 const char *testdata[8];
1955 uint32_t testdatalen;
1956 } tests[] = {
1957 /* this test conflicts with positioning of CODAN placeholder */
1958 /*{
1959 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1960 "&\\u2089<<<x",
1961 {"\\u2089", "x"}, 2
1962 }, */
1963 /* this test conflicts with the [before x] syntax tightening */
1964 /*{
1965 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1966 "&\\u0252<<<x",
1967 {"\\u0252", "x"}, 2
1968 }, */
1969 /* this test conflicts with the [before x] syntax tightening */
1970 /*{
1971 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1972 "& a <<< x < b <<< c << d <<< e",
1973 {"a", "x", "b", "c", "d", "e"}, 6
1974 }, */
1975 {
1976 "& a < b < c < d& [before 1] c < m",
1977 "& a < b < m < c < d",
1978 {"a", "b", "m", "c", "d"}, 5
1979 },
1980 {
1981 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1982 "& a < b <<< c << d <<< x <<< e",
1983 {"a", "b", "c", "d", "x", "e"}, 6
1984 },
1985 /* this test conflicts with the [before x] syntax tightening */
1986 /* {
1987 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1988 "& a < b <<< c <<< x << d <<< e",
1989 {"a", "b", "c", "x", "d", "e"},, 6
1990 }, */
1991 {
1992 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1993 "& a < b <<< c << d <<< e <<< f < x < g",
1994 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1995 },
1996 {
1997 "& a <<< b << c < d& a < m",
1998 "& a <<< b << c < m < d",
1999 {"a", "b", "c", "m", "d"}, 5
2000 },
2001 {
2002 "&a<b<<b\\u0301 &z<b",
2003 "&a<b\\u0301 &z<b",
2004 {"a", "b\\u0301", "z", "b"}, 4
2005 },
2006 {
2007 "&z<m<<<q<<<m",
2008 "&z<q<<<m",
2009 {"z", "q", "m"},3
2010 },
2011 {
2012 "&z<<<m<q<<<m",
2013 "&z<q<<<m",
2014 {"z", "q", "m"}, 3
2015 },
2016 {
2017 "& a < b < c < d& r < c",
2018 "& a < b < d& r < c",
2019 {"a", "b", "d"}, 3
2020 },
2021 {
2022 "& a < b < c < d& r < c",
2023 "& a < b < d& r < c",
2024 {"r", "c"}, 2
2025 },
2026 {
2027 "& a < b < c < d& c < m",
2028 "& a < b < c < m < d",
2029 {"a", "b", "c", "m", "d"}, 5
2030 },
2031 {
2032 "& a < b < c < d& a < m",
2033 "& a < m < b < c < d",
2034 {"a", "m", "b", "c", "d"}, 5
2035 }
2036 };
2037
2038
2039 UCollator *credundant = NULL;
2040 UCollator *cresulting = NULL;
2041 UErrorCode status = U_ZERO_ERROR;
2042 UChar rlz[2048] = { 0 };
2043 uint32_t rlen = 0;
2044
2045 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2046 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2047 rlen = u_unescape(tests[i].rules, rlz, 2048);
2048
2049 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2050 if(status == U_FILE_ACCESS_ERROR) {
2051 log_data_err("Is your data around?\n");
2052 return;
2053 } else if(U_FAILURE(status)) {
2054 log_err("Error opening collator\n");
2055 return;
2056 }
2057
2058 rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2059 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2060
2061 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2062
2063 ucol_close(credundant);
2064 ucol_close(cresulting);
2065
2066 log_verbose("testing using data\n");
2067
2068 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2069 }
2070
2071 }
2072
TestExpansionSyntax(void)2073 static void TestExpansionSyntax(void) {
2074 int32_t i;
2075
2076 const static char *rules[] = {
2077 "&AE <<< a << b <<< c &d <<< f",
2078 "&AE <<< a <<< b << c << d < e < f <<< g",
2079 "&AE <<< B <<< C / D <<< F"
2080 };
2081
2082 const static char *expectedRules[] = {
2083 "&A <<< a / E << b / E <<< c /E &d <<< f",
2084 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2085 "&A <<< B / E <<< C / ED <<< F / E"
2086 };
2087
2088 const static char *testdata[][8] = {
2089 {"AE", "a", "b", "c"},
2090 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2091 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2092 };
2093
2094 const static uint32_t testdatalen[] = {
2095 4,
2096 8,
2097 3
2098 };
2099
2100
2101
2102 UCollator *credundant = NULL;
2103 UCollator *cresulting = NULL;
2104 UErrorCode status = U_ZERO_ERROR;
2105 UChar rlz[2048] = { 0 };
2106 uint32_t rlen = 0;
2107
2108 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2109 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2110 rlen = u_unescape(rules[i], rlz, 2048);
2111
2112 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2113 if(status == U_FILE_ACCESS_ERROR) {
2114 log_data_err("Is your data around?\n");
2115 return;
2116 } else if(U_FAILURE(status)) {
2117 log_err("Error opening collator\n");
2118 return;
2119 }
2120 rlen = u_unescape(expectedRules[i], rlz, 2048);
2121 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2122
2123 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2124 /* as a hard error test, but only in information mode */
2125 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2126
2127 ucol_close(credundant);
2128 ucol_close(cresulting);
2129
2130 log_verbose("testing using data\n");
2131
2132 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2133 }
2134 }
2135
TestCase(void)2136 static void TestCase(void)
2137 {
2138 const static UChar gRules[MAX_TOKEN_LEN] =
2139 /*" & 0 < 1,\u2461<a,A"*/
2140 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2141
2142 const static UChar testCase[][MAX_TOKEN_LEN] =
2143 {
2144 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2145 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2146 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2147 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2148 };
2149
2150 const static UCollationResult caseTestResults[][9] =
2151 {
2152 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2153 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2154 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2155 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2156 };
2157
2158 const static UColAttributeValue caseTestAttributes[][2] =
2159 {
2160 { UCOL_LOWER_FIRST, UCOL_OFF},
2161 { UCOL_UPPER_FIRST, UCOL_OFF},
2162 { UCOL_LOWER_FIRST, UCOL_ON},
2163 { UCOL_UPPER_FIRST, UCOL_ON}
2164 };
2165 int32_t i,j,k;
2166 UErrorCode status = U_ZERO_ERROR;
2167 UCollationElements *iter;
2168 UCollator *myCollation;
2169 myCollation = ucol_open("en_US", &status);
2170
2171 if(U_FAILURE(status)){
2172 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2173 return;
2174 }
2175 log_verbose("Testing different case settings\n");
2176 ucol_setStrength(myCollation, UCOL_TERTIARY);
2177
2178 for(k = 0; k<4; k++) {
2179 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2180 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2181 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2182 for (i = 0; i < 3 ; i++) {
2183 for(j = i+1; j<4; j++) {
2184 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2185 }
2186 }
2187 }
2188 ucol_close(myCollation);
2189
2190 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2191 if(U_FAILURE(status)){
2192 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2193 return;
2194 }
2195 log_verbose("Testing different case settings with custom rules\n");
2196 ucol_setStrength(myCollation, UCOL_TERTIARY);
2197
2198 for(k = 0; k<4; k++) {
2199 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2200 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2201 for (i = 0; i < 3 ; i++) {
2202 for(j = i+1; j<4; j++) {
2203 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2204 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2205 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2206 backAndForth(iter);
2207 ucol_closeElements(iter);
2208 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2209 backAndForth(iter);
2210 ucol_closeElements(iter);
2211 }
2212 }
2213 }
2214 ucol_close(myCollation);
2215 {
2216 const static char *lowerFirst[] = {
2217 "h",
2218 "H",
2219 "ch",
2220 "Ch",
2221 "CH",
2222 "cha",
2223 "chA",
2224 "Cha",
2225 "ChA",
2226 "CHa",
2227 "CHA",
2228 "i",
2229 "I"
2230 };
2231
2232 const static char *upperFirst[] = {
2233 "H",
2234 "h",
2235 "CH",
2236 "Ch",
2237 "ch",
2238 "CHA",
2239 "CHa",
2240 "ChA",
2241 "Cha",
2242 "chA",
2243 "cha",
2244 "I",
2245 "i"
2246 };
2247 log_verbose("mixed case test\n");
2248 log_verbose("lower first, case level off\n");
2249 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2250 log_verbose("upper first, case level off\n");
2251 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2252 log_verbose("lower first, case level on\n");
2253 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2254 log_verbose("upper first, case level on\n");
2255 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2256 }
2257
2258 }
2259
TestIncrementalNormalize(void)2260 static void TestIncrementalNormalize(void) {
2261
2262 /*UChar baseA =0x61;*/
2263 UChar baseA =0x41;
2264 /* UChar baseB = 0x42;*/
2265 static const UChar ccMix[] = {0x316, 0x321, 0x300};
2266 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2267 /*
2268 0x316 is combining grave accent below, cc=220
2269 0x321 is combining palatalized hook below, cc=202
2270 0x300 is combining grave accent, cc=230
2271 */
2272
2273 #define MAXSLEN 2000
2274 /*int maxSLen = 64000;*/
2275 int sLen;
2276 int i;
2277
2278 UCollator *coll;
2279 UErrorCode status = U_ZERO_ERROR;
2280 UCollationResult result;
2281
2282 int32_t myQ = getTestOption(QUICK_OPTION);
2283
2284 if(getTestOption(QUICK_OPTION) < 0) {
2285 setTestOption(QUICK_OPTION, 1);
2286 }
2287
2288 {
2289 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2290 /* most buffers along the way.*/
2291 UChar strA[MAXSLEN+1];
2292 UChar strB[MAXSLEN+1];
2293
2294 coll = ucol_open("en_US", &status);
2295 if(status == U_FILE_ACCESS_ERROR) {
2296 log_data_err("Is your data around?\n");
2297 return;
2298 } else if(U_FAILURE(status)) {
2299 log_err("Error opening collator\n");
2300 return;
2301 }
2302 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2303
2304 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2305 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2306 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2307 for (sLen = 500; sLen<501; sLen++) {
2308 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2309 strA[0] = baseA;
2310 strB[0] = baseA;
2311 for (i=1; i<=sLen-1; i++) {
2312 strA[i] = ccMix[i % 3];
2313 strB[sLen-i] = ccMix[i % 3];
2314 }
2315 strA[sLen] = 0;
2316 strB[sLen] = 0;
2317
2318 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
2319 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
2320 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
2321 doTest(coll, strA, strB, UCOL_EQUAL);
2322 }
2323 }
2324
2325 setTestOption(QUICK_OPTION, myQ);
2326
2327
2328 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2329 /* of the string. Checks a couple of edge cases.*/
2330
2331 {
2332 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2333 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2334 ucol_setStrength(coll, UCOL_TERTIARY);
2335 doTest(coll, strA, strB, UCOL_EQUAL);
2336 }
2337
2338 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2339
2340 {
2341 /* New UCA 3.1.1.
2342 * test below used a code point from Desseret, which sorts differently
2343 * than d800 dc00
2344 */
2345 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2346 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2347 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2348 ucol_setStrength(coll, UCOL_TERTIARY);
2349 doTest(coll, strA, strB, UCOL_GREATER);
2350 }
2351
2352 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2353
2354 {
2355 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2356 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2357 char sortKeyA[50];
2358 char sortKeyAz[50];
2359 char sortKeyB[50];
2360 char sortKeyBz[50];
2361 int r;
2362
2363 /* there used to be -3 here. Hmmmm.... */
2364 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2365 result = ucol_strcoll(coll, strA, 3, strB, 3);
2366 if (result != UCOL_GREATER) {
2367 log_err("ERROR 1 in test 4\n");
2368 }
2369 result = ucol_strcoll(coll, strA, -1, strB, -1);
2370 if (result != UCOL_EQUAL) {
2371 log_err("ERROR 2 in test 4\n");
2372 }
2373
2374 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2375 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2376 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2377 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2378
2379 r = strcmp(sortKeyA, sortKeyAz);
2380 if (r <= 0) {
2381 log_err("Error 3 in test 4\n");
2382 }
2383 r = strcmp(sortKeyA, sortKeyB);
2384 if (r <= 0) {
2385 log_err("Error 4 in test 4\n");
2386 }
2387 r = strcmp(sortKeyAz, sortKeyBz);
2388 if (r != 0) {
2389 log_err("Error 5 in test 4\n");
2390 }
2391
2392 ucol_setStrength(coll, UCOL_IDENTICAL);
2393 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2394 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2395 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2396 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2397
2398 r = strcmp(sortKeyA, sortKeyAz);
2399 if (r <= 0) {
2400 log_err("Error 6 in test 4\n");
2401 }
2402 r = strcmp(sortKeyA, sortKeyB);
2403 if (r <= 0) {
2404 log_err("Error 7 in test 4\n");
2405 }
2406 r = strcmp(sortKeyAz, sortKeyBz);
2407 if (r != 0) {
2408 log_err("Error 8 in test 4\n");
2409 }
2410 ucol_setStrength(coll, UCOL_TERTIARY);
2411 }
2412
2413
2414 /* Test 5: Null characters in non-normal source strings.*/
2415
2416 {
2417 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2418 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2419 char sortKeyA[50];
2420 char sortKeyAz[50];
2421 char sortKeyB[50];
2422 char sortKeyBz[50];
2423 int r;
2424
2425 result = ucol_strcoll(coll, strA, 6, strB, 6);
2426 if (result != UCOL_GREATER) {
2427 log_err("ERROR 1 in test 5\n");
2428 }
2429 result = ucol_strcoll(coll, strA, -1, strB, -1);
2430 if (result != UCOL_EQUAL) {
2431 log_err("ERROR 2 in test 5\n");
2432 }
2433
2434 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2435 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2436 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2437 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2438
2439 r = strcmp(sortKeyA, sortKeyAz);
2440 if (r <= 0) {
2441 log_err("Error 3 in test 5\n");
2442 }
2443 r = strcmp(sortKeyA, sortKeyB);
2444 if (r <= 0) {
2445 log_err("Error 4 in test 5\n");
2446 }
2447 r = strcmp(sortKeyAz, sortKeyBz);
2448 if (r != 0) {
2449 log_err("Error 5 in test 5\n");
2450 }
2451
2452 ucol_setStrength(coll, UCOL_IDENTICAL);
2453 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2454 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2455 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2456 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2457
2458 r = strcmp(sortKeyA, sortKeyAz);
2459 if (r <= 0) {
2460 log_err("Error 6 in test 5\n");
2461 }
2462 r = strcmp(sortKeyA, sortKeyB);
2463 if (r <= 0) {
2464 log_err("Error 7 in test 5\n");
2465 }
2466 r = strcmp(sortKeyAz, sortKeyBz);
2467 if (r != 0) {
2468 log_err("Error 8 in test 5\n");
2469 }
2470 ucol_setStrength(coll, UCOL_TERTIARY);
2471 }
2472
2473
2474 /* Test 6: Null character as base of a non-normal combining sequence.*/
2475
2476 {
2477 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2478 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2479
2480 result = ucol_strcoll(coll, strA, 5, strB, 5);
2481 if (result != UCOL_LESS) {
2482 log_err("Error 1 in test 6\n");
2483 }
2484 result = ucol_strcoll(coll, strA, -1, strB, -1);
2485 if (result != UCOL_EQUAL) {
2486 log_err("Error 2 in test 6\n");
2487 }
2488 }
2489
2490 ucol_close(coll);
2491 }
2492
2493
2494
2495 #if 0
2496 static void TestGetCaseBit(void) {
2497 static const char *caseBitData[] = {
2498 "a", "A", "ch", "Ch", "CH",
2499 "\\uFF9E", "\\u0009"
2500 };
2501
2502 static const uint8_t results[] = {
2503 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2504 UCOL_UPPER_CASE, UCOL_LOWER_CASE
2505 };
2506
2507 uint32_t i, blen = 0;
2508 UChar b[256] = {0};
2509 UErrorCode status = U_ZERO_ERROR;
2510 UCollator *UCA = ucol_open("", &status);
2511 uint8_t res = 0;
2512
2513 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2514 blen = u_unescape(caseBitData[i], b, 256);
2515 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2516 if(results[i] != res) {
2517 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2518 }
2519 }
2520 }
2521 #endif
2522
TestHangulTailoring(void)2523 static void TestHangulTailoring(void) {
2524 static const char *koreanData[] = {
2525 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2526 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2527 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2528 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2529 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2530 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2531 };
2532
2533 const char *rules =
2534 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2535 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2536 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2537 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2538 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2539 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2540
2541
2542 UErrorCode status = U_ZERO_ERROR;
2543 UChar rlz[2048] = { 0 };
2544 uint32_t rlen = u_unescape(rules, rlz, 2048);
2545
2546 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2547 if(status == U_FILE_ACCESS_ERROR) {
2548 log_data_err("Is your data around?\n");
2549 return;
2550 } else if(U_FAILURE(status)) {
2551 log_err("Error opening collator\n");
2552 return;
2553 }
2554
2555 log_verbose("Using start of korean rules\n");
2556
2557 if(U_SUCCESS(status)) {
2558 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2559 } else {
2560 log_err("Unable to open collator with rules %s\n", rules);
2561 }
2562
2563 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2564 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */
2565 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2566
2567 ucol_close(coll);
2568
2569 log_verbose("Using ko__LOTUS locale\n");
2570 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2571 }
2572
TestCompressOverlap(void)2573 static void TestCompressOverlap(void) {
2574 UChar secstr[150];
2575 UChar tertstr[150];
2576 UErrorCode status = U_ZERO_ERROR;
2577 UCollator *coll;
2578 char result[200];
2579 uint32_t resultlen;
2580 int count = 0;
2581 char *tempptr;
2582
2583 coll = ucol_open("", &status);
2584
2585 if (U_FAILURE(status)) {
2586 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2587 return;
2588 }
2589 while (count < 149) {
2590 secstr[count] = 0x0020; /* [06, 05, 05] */
2591 tertstr[count] = 0x0020;
2592 count ++;
2593 }
2594
2595 /* top down compression ----------------------------------- */
2596 secstr[count] = 0x0332; /* [, 87, 05] */
2597 tertstr[count] = 0x3000; /* [06, 05, 07] */
2598
2599 /* no compression secstr should have 150 secondary bytes, tertstr should
2600 have 150 tertiary bytes.
2601 with correct overlapping compression, secstr should have 4 secondary
2602 bytes, tertstr should have > 2 tertiary bytes */
2603 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2604 (void)resultlen; /* Suppress set but not used warning. */
2605 tempptr = uprv_strchr(result, 1) + 1;
2606 while (*(tempptr + 1) != 1) {
2607 /* the last secondary collation element is not checked since it is not
2608 part of the compression */
2609 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2610 log_err("Secondary compression overlapped\n");
2611 }
2612 tempptr ++;
2613 }
2614
2615 /* tertiary top/bottom/common for en_US is similar to the secondary
2616 top/bottom/common */
2617 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2618 tempptr = uprv_strrchr(result, 1) + 1;
2619 while (*(tempptr + 1) != 0) {
2620 /* the last secondary collation element is not checked since it is not
2621 part of the compression */
2622 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2623 log_err("Tertiary compression overlapped\n");
2624 }
2625 tempptr ++;
2626 }
2627
2628 /* bottom up compression ------------------------------------- */
2629 secstr[count] = 0;
2630 tertstr[count] = 0;
2631 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2632 tempptr = uprv_strchr(result, 1) + 1;
2633 while (*(tempptr + 1) != 1) {
2634 /* the last secondary collation element is not checked since it is not
2635 part of the compression */
2636 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2637 log_err("Secondary compression overlapped\n");
2638 }
2639 tempptr ++;
2640 }
2641
2642 /* tertiary top/bottom/common for en_US is similar to the secondary
2643 top/bottom/common */
2644 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2645 tempptr = uprv_strrchr(result, 1) + 1;
2646 while (*(tempptr + 1) != 0) {
2647 /* the last secondary collation element is not checked since it is not
2648 part of the compression */
2649 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2650 log_err("Tertiary compression overlapped\n");
2651 }
2652 tempptr ++;
2653 }
2654
2655 ucol_close(coll);
2656 }
2657
TestCyrillicTailoring(void)2658 static void TestCyrillicTailoring(void) {
2659 static const char *test[] = {
2660 "\\u0410b",
2661 "\\u0410\\u0306a",
2662 "\\u04d0A"
2663 };
2664
2665 /* Russian overrides contractions, so this test is not valid anymore */
2666 /*genericLocaleStarter("ru", test, 3);*/
2667
2668 genericLocaleStarter("root", test, 3);
2669 genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2670 genericRulesStarter("&Z < \\u0410", test, 3);
2671 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2672 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2673 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2674 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2675 }
2676
TestSuppressContractions(void)2677 static void TestSuppressContractions(void) {
2678
2679 static const char *testNoCont2[] = {
2680 "\\u0410\\u0302a",
2681 "\\u0410\\u0306b",
2682 "\\u0410c"
2683 };
2684 static const char *testNoCont[] = {
2685 "a\\u0410",
2686 "A\\u0410\\u0306",
2687 "\\uFF21\\u0410\\u0302"
2688 };
2689
2690 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2691 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2692 }
2693
TestContraction(void)2694 static void TestContraction(void) {
2695 const static char *testrules[] = {
2696 "&A = AB / B",
2697 "&A = A\\u0306/\\u0306",
2698 "&c = ch / h"
2699 };
2700 const static UChar testdata[][2] = {
2701 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2702 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2703 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2704 };
2705 const static UChar testdata2[][2] = {
2706 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2707 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2708 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2709 };
2710 const static char *testrules3[] = {
2711 "&z < xyz &xyzw << B",
2712 "&z < xyz &xyz << B / w",
2713 "&z < ch &achm << B",
2714 "&z < ch &a << B / chm",
2715 "&\\ud800\\udc00w << B",
2716 "&\\ud800\\udc00 << B / w",
2717 "&a\\ud800\\udc00m << B",
2718 "&a << B / \\ud800\\udc00m",
2719 };
2720
2721 UErrorCode status = U_ZERO_ERROR;
2722 UCollator *coll;
2723 UChar rule[256] = {0};
2724 uint32_t rlen = 0;
2725 int i;
2726
2727 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2728 UCollationElements *iter1;
2729 int j = 0;
2730 log_verbose("Rule %s for testing\n", testrules[i]);
2731 rlen = u_unescape(testrules[i], rule, 32);
2732 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2733 if (U_FAILURE(status)) {
2734 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2735 return;
2736 }
2737 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2738 if (U_FAILURE(status)) {
2739 log_err("Collation iterator creation failed\n");
2740 return;
2741 }
2742 while (j < 2) {
2743 UCollationElements *iter2 = ucol_openElements(coll,
2744 &(testdata[i][j]),
2745 1, &status);
2746 uint32_t ce;
2747 if (U_FAILURE(status)) {
2748 log_err("Collation iterator creation failed\n");
2749 return;
2750 }
2751 ce = ucol_next(iter2, &status);
2752 while (ce != UCOL_NULLORDER) {
2753 if ((uint32_t)ucol_next(iter1, &status) != ce) {
2754 log_err("Collation elements in contraction split does not match\n");
2755 return;
2756 }
2757 ce = ucol_next(iter2, &status);
2758 }
2759 j ++;
2760 ucol_closeElements(iter2);
2761 }
2762 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2763 log_err("Collation elements not exhausted\n");
2764 return;
2765 }
2766 ucol_closeElements(iter1);
2767 ucol_close(coll);
2768 }
2769
2770 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2771 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2772 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2773 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2774 testdata2[0][0], testdata2[0][1], testdata2[1][0],
2775 testdata2[1][1]);
2776 return;
2777 }
2778 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2779 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2780 testdata2[1][0], testdata2[1][1], testdata2[2][0],
2781 testdata2[2][1]);
2782 return;
2783 }
2784 ucol_close(coll);
2785
2786 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2787 UCollator *coll1,
2788 *coll2;
2789 UCollationElements *iter1,
2790 *iter2;
2791 UChar ch = 0x0042 /* 'B' */;
2792 uint32_t ce;
2793 rlen = u_unescape(testrules3[i], rule, 32);
2794 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2795 rlen = u_unescape(testrules3[i + 1], rule, 32);
2796 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2797 if (U_FAILURE(status)) {
2798 log_err("Collator creation failed %s\n", testrules[i]);
2799 return;
2800 }
2801 iter1 = ucol_openElements(coll1, &ch, 1, &status);
2802 iter2 = ucol_openElements(coll2, &ch, 1, &status);
2803 if (U_FAILURE(status)) {
2804 log_err("Collation iterator creation failed\n");
2805 return;
2806 }
2807 ce = ucol_next(iter1, &status);
2808 if (U_FAILURE(status)) {
2809 log_err("Retrieving ces failed\n");
2810 return;
2811 }
2812 while (ce != UCOL_NULLORDER) {
2813 if (ce != (uint32_t)ucol_next(iter2, &status)) {
2814 log_err("CEs does not match\n");
2815 return;
2816 }
2817 ce = ucol_next(iter1, &status);
2818 if (U_FAILURE(status)) {
2819 log_err("Retrieving ces failed\n");
2820 return;
2821 }
2822 }
2823 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2824 log_err("CEs not exhausted\n");
2825 return;
2826 }
2827 ucol_closeElements(iter1);
2828 ucol_closeElements(iter2);
2829 ucol_close(coll1);
2830 ucol_close(coll2);
2831 }
2832 }
2833
TestExpansion(void)2834 static void TestExpansion(void) {
2835 const static char *testrules[] = {
2836 "&J << K / B & K << M",
2837 "&J << K / B << M"
2838 };
2839 const static UChar testdata[][3] = {
2840 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2841 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2842 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2843 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2844 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2845 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2846 };
2847
2848 UErrorCode status = U_ZERO_ERROR;
2849 UCollator *coll;
2850 UChar rule[256] = {0};
2851 uint32_t rlen = 0;
2852 int i;
2853
2854 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2855 int j = 0;
2856 log_verbose("Rule %s for testing\n", testrules[i]);
2857 rlen = u_unescape(testrules[i], rule, 32);
2858 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2859 if (U_FAILURE(status)) {
2860 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2861 return;
2862 }
2863
2864 for (j = 0; j < 5; j ++) {
2865 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2866 }
2867 ucol_close(coll);
2868 }
2869 }
2870
2871 #if 0
2872 /* this test tests the current limitations of the engine */
2873 /* it always fail, so it is disabled by default */
2874 static void TestLimitations(void) {
2875 /* recursive expansions */
2876 {
2877 static const char *rule = "&a=b/c&d=c/e";
2878 static const char *tlimit01[] = {"add","b","adf"};
2879 static const char *tlimit02[] = {"aa","b","af"};
2880 log_verbose("recursive expansions\n");
2881 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2882 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2883 }
2884 /* contractions spanning expansions */
2885 {
2886 static const char *rule = "&a<<<c/e&g<<<eh";
2887 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2888 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2889 log_verbose("contractions spanning expansions\n");
2890 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2891 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2892 }
2893 /* normalization: nulls in contractions */
2894 {
2895 static const char *rule = "&a<<<\\u0000\\u0302";
2896 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2897 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2898 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2899 static const UColAttributeValue valOn[] = { UCOL_ON };
2900 static const UColAttributeValue valOff[] = { UCOL_OFF };
2901
2902 log_verbose("NULL in contractions\n");
2903 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2904 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2905 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2906 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2907
2908 }
2909 /* normalization: contractions spanning normalization */
2910 {
2911 static const char *rule = "&a<<<\\u0000\\u0302";
2912 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2913 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2914 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2915 static const UColAttributeValue valOn[] = { UCOL_ON };
2916 static const UColAttributeValue valOff[] = { UCOL_OFF };
2917
2918 log_verbose("contractions spanning normalization\n");
2919 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2920 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2921 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2922 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2923
2924 }
2925 /* variable top: */
2926 {
2927 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2928 static const char *rule = "&\\u2010<x<[variable top]=z";
2929 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2930 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2931 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2932 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2933 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2934 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2935 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2936
2937 log_verbose("variable top\n");
2938 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2939 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2940 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2941 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2942 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2943
2944 }
2945 /* case level */
2946 {
2947 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2948 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2949 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2950 static const UColAttribute att[] = { UCOL_CASE_FIRST};
2951 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2952 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2953 log_verbose("case level\n");
2954 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2955 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2956 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2957 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2958 }
2959
2960 }
2961 #endif
2962
TestBocsuCoverage(void)2963 static void TestBocsuCoverage(void) {
2964 UErrorCode status = U_ZERO_ERROR;
2965 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2966 UChar test[256] = {0};
2967 uint32_t tlen = u_unescape(testString, test, 32);
2968 uint8_t key[256] = {0};
2969 uint32_t klen = 0;
2970
2971 UCollator *coll = ucol_open("", &status);
2972 if(U_SUCCESS(status)) {
2973 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2974
2975 klen = ucol_getSortKey(coll, test, tlen, key, 256);
2976 (void)klen; /* Suppress set but not used warning. */
2977
2978 ucol_close(coll);
2979 } else {
2980 log_data_err("Couldn't open UCA\n");
2981 }
2982 }
2983
TestVariableTopSetting(void)2984 static void TestVariableTopSetting(void) {
2985 UErrorCode status = U_ZERO_ERROR;
2986 const UChar *current = NULL;
2987 uint32_t varTopOriginal = 0, varTop1, varTop2;
2988 UCollator *coll = ucol_open("", &status);
2989 if(U_SUCCESS(status)) {
2990
2991 uint32_t strength = 0;
2992 uint16_t specs = 0;
2993 uint32_t chOffset = 0;
2994 uint32_t chLen = 0;
2995 uint32_t exOffset = 0;
2996 uint32_t exLen = 0;
2997 uint32_t oldChOffset = 0;
2998 uint32_t oldChLen = 0;
2999 uint32_t oldExOffset = 0;
3000 uint32_t oldExLen = 0;
3001 uint32_t prefixOffset = 0;
3002 uint32_t prefixLen = 0;
3003
3004 UBool startOfRules = TRUE;
3005 UColTokenParser src;
3006 UColOptionSet opts;
3007
3008 UChar *rulesCopy = NULL;
3009 uint32_t rulesLen;
3010
3011 UCollationResult result;
3012
3013 UChar first[256] = { 0 };
3014 UChar second[256] = { 0 };
3015 UParseError parseError;
3016 int32_t myQ = getTestOption(QUICK_OPTION);
3017
3018 (void)prefixLen; /* Suppress set but not used warnings. */
3019 (void)prefixOffset;
3020 (void)specs;
3021
3022 uprv_memset(&src, 0, sizeof(UColTokenParser));
3023
3024 src.opts = &opts;
3025
3026 if(getTestOption(QUICK_OPTION) <= 0) {
3027 setTestOption(QUICK_OPTION, 1);
3028 }
3029
3030 /* this test will fail when normalization is turned on */
3031 /* therefore we always turn off exhaustive mode for it */
3032 { /* QUICK > 0*/
3033 log_verbose("Slide variable top over UCARules\n");
3034 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
3035 rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3036 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3037
3038 if(U_SUCCESS(status) && rulesLen > 0) {
3039 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3040 src.current = src.source = rulesCopy;
3041 src.end = rulesCopy+rulesLen;
3042 src.extraCurrent = src.end;
3043 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3044
3045 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3046 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3047 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3048 strength = src.parsedToken.strength;
3049 chOffset = src.parsedToken.charsOffset;
3050 chLen = src.parsedToken.charsLen;
3051 exOffset = src.parsedToken.extensionOffset;
3052 exLen = src.parsedToken.extensionLen;
3053 prefixOffset = src.parsedToken.prefixOffset;
3054 prefixLen = src.parsedToken.prefixLen;
3055 specs = src.parsedToken.flags;
3056
3057 startOfRules = FALSE;
3058 {
3059 log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3060 }
3061 if(strength == UCOL_PRIMARY) {
3062 status = U_ZERO_ERROR;
3063 varTopOriginal = ucol_getVariableTop(coll, &status);
3064 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3065 if(U_FAILURE(status)) {
3066 char buffer[256];
3067 char *buf = buffer;
3068 uint32_t i = 0, j;
3069 uint32_t CE = UCOL_NO_MORE_CES;
3070
3071 /* before we start screaming, let's see if there is a problem with the rules */
3072 UErrorCode collIterateStatus = U_ZERO_ERROR;
3073 collIterate *s = uprv_new_collIterate(&collIterateStatus);
3074 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3075
3076 CE = ucol_getNextCE(coll, s, &status);
3077 (void)CE; /* Suppress set but not used warning. */
3078
3079 for(i = 0; i < oldChLen; i++) {
3080 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3081 buf += j;
3082 }
3083 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3084 log_verbose("= Expected failure for %s =", buffer);
3085 } else {
3086 if(uprv_collIterateAtEnd(s)) {
3087 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3088 oldChOffset, u_errorName(status), buffer);
3089 } else {
3090 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3091 buffer);
3092 }
3093 }
3094 uprv_delete_collIterate(s);
3095 }
3096 varTop2 = ucol_getVariableTop(coll, &status);
3097 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3098 log_err("cannot retrieve set varTop value!\n");
3099 continue;
3100 }
3101
3102 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3103
3104 u_strncpy(first, src.source+oldChOffset, oldChLen);
3105 u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3106 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3107 first[2*oldChLen+chLen] = 0;
3108
3109 if(oldExLen == 0) {
3110 u_strncpy(second, src.source+chOffset, chLen);
3111 second[chLen] = 0;
3112 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3113 u_strncpy(second, src.source+oldExOffset, oldExLen);
3114 u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3115 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3116 second[2*oldExLen+chLen] = 0;
3117 }
3118 result = ucol_strcoll(coll, first, -1, second, -1);
3119 if(result == UCOL_EQUAL) {
3120 doTest(coll, first, second, UCOL_EQUAL);
3121 } else {
3122 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3123 }
3124 }
3125 }
3126 if(strength != UCOL_TOK_RESET) {
3127 oldChOffset = chOffset;
3128 oldChLen = chLen;
3129 oldExOffset = exOffset;
3130 oldExLen = exLen;
3131 }
3132 }
3133 status = U_ZERO_ERROR;
3134 }
3135 else {
3136 log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3137 return;
3138 }
3139 if (U_FAILURE(status)) {
3140 log_err("Error parsing rules %s\n", u_errorName(status));
3141 return;
3142 }
3143 status = U_ZERO_ERROR;
3144 }
3145
3146 setTestOption(QUICK_OPTION, myQ);
3147
3148 log_verbose("Testing setting variable top to contractions\n");
3149 {
3150 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3151 int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
3152 while(*conts != 0) {
3153 /*
3154 * A continuation is NUL-terminated and NUL-padded
3155 * except if it has the maximum length.
3156 */
3157 int32_t contractionLength = maxUCAContractionLength;
3158 while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
3159 --contractionLength;
3160 }
3161 if(*(conts+1)==0) { /* pre-context */
3162 varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
3163 } else {
3164 varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
3165 }
3166 if(U_FAILURE(status)) {
3167 if(status == U_PRIMARY_TOO_LONG_ERROR) {
3168 /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3169 * therefore it is not an error when it complains about them. */
3170 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3171 *conts, *(conts+1), *(conts+2));
3172 } else {
3173 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3174 *conts, *(conts+1), *(conts+2), u_errorName(status));
3175 }
3176 status = U_ZERO_ERROR;
3177 }
3178 conts+=maxUCAContractionLength;
3179 }
3180
3181 status = U_ZERO_ERROR;
3182
3183 first[0] = 0x0040;
3184 first[1] = 0x0050;
3185 first[2] = 0x0000;
3186
3187 ucol_setVariableTop(coll, first, -1, &status);
3188
3189 if(U_SUCCESS(status)) {
3190 log_err("Invalid contraction succeded in setting variable top!\n");
3191 }
3192
3193 }
3194
3195 log_verbose("Test restoring variable top\n");
3196
3197 status = U_ZERO_ERROR;
3198 ucol_restoreVariableTop(coll, varTopOriginal, &status);
3199 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3200 log_err("Couldn't restore old variable top\n");
3201 }
3202
3203 log_verbose("Testing calling with error set\n");
3204
3205 status = U_INTERNAL_PROGRAM_ERROR;
3206 varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3207 varTop2 = ucol_getVariableTop(coll, &status);
3208 ucol_restoreVariableTop(coll, varTop2, &status);
3209 varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3210 varTop2 = ucol_getVariableTop(NULL, &status);
3211 ucol_restoreVariableTop(NULL, varTop2, &status);
3212 if(status != U_INTERNAL_PROGRAM_ERROR) {
3213 log_err("Bad reaction to passed error!\n");
3214 }
3215 uprv_free(src.source);
3216 ucol_close(coll);
3217 } else {
3218 log_data_err("Couldn't open UCA collator\n");
3219 }
3220
3221 }
3222
TestNonChars(void)3223 static void TestNonChars(void) {
3224 static const char *test[] = {
3225 "\\u0000", /* ignorable */
3226 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
3227 "\\uFDD0", "\\uFDEF",
3228 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
3229 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
3230 "\\U0003FFFE", "\\U0003FFFF",
3231 "\\U0004FFFE", "\\U0004FFFF",
3232 "\\U0005FFFE", "\\U0005FFFF",
3233 "\\U0006FFFE", "\\U0006FFFF",
3234 "\\U0007FFFE", "\\U0007FFFF",
3235 "\\U0008FFFE", "\\U0008FFFF",
3236 "\\U0009FFFE", "\\U0009FFFF",
3237 "\\U000AFFFE", "\\U000AFFFF",
3238 "\\U000BFFFE", "\\U000BFFFF",
3239 "\\U000CFFFE", "\\U000CFFFF",
3240 "\\U000DFFFE", "\\U000DFFFF",
3241 "\\U000EFFFE", "\\U000EFFFF",
3242 "\\U000FFFFE", "\\U000FFFFF",
3243 "\\U0010FFFE", "\\U0010FFFF",
3244 "\\uFFFF" /* special character with maximum primary weight */
3245 };
3246 UErrorCode status = U_ZERO_ERROR;
3247 UCollator *coll = ucol_open("en_US", &status);
3248
3249 log_verbose("Test non characters\n");
3250
3251 if(U_SUCCESS(status)) {
3252 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3253 } else {
3254 log_err_status(status, "Unable to open collator\n");
3255 }
3256
3257 ucol_close(coll);
3258 }
3259
TestExtremeCompression(void)3260 static void TestExtremeCompression(void) {
3261 static char *test[4];
3262 int32_t j = 0, i = 0;
3263
3264 for(i = 0; i<4; i++) {
3265 test[i] = (char *)malloc(2048*sizeof(char));
3266 }
3267
3268 for(j = 20; j < 500; j++) {
3269 for(i = 0; i<4; i++) {
3270 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3271 test[i][j-1] = (char)('a'+i);
3272 test[i][j] = 0;
3273 }
3274 genericLocaleStarter("en_US", (const char **)test, 4);
3275 }
3276
3277
3278 for(i = 0; i<4; i++) {
3279 free(test[i]);
3280 }
3281 }
3282
3283 #if 0
3284 static void TestExtremeCompression(void) {
3285 static char *test[4];
3286 int32_t j = 0, i = 0;
3287 UErrorCode status = U_ZERO_ERROR;
3288 UCollator *coll = ucol_open("en_US", status);
3289 for(i = 0; i<4; i++) {
3290 test[i] = (char *)malloc(2048*sizeof(char));
3291 }
3292 for(j = 10; j < 2048; j++) {
3293 for(i = 0; i<4; i++) {
3294 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3295 test[i][j-1] = (char)('a'+i);
3296 test[i][j] = 0;
3297 }
3298 }
3299 genericLocaleStarter("en_US", (const char **)test, 4);
3300
3301 for(j = 10; j < 2048; j++) {
3302 for(i = 0; i<1; i++) {
3303 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3304 test[i][j] = 0;
3305 }
3306 }
3307 for(i = 0; i<4; i++) {
3308 free(test[i]);
3309 }
3310 }
3311 #endif
3312
TestSurrogates(void)3313 static void TestSurrogates(void) {
3314 static const char *test[] = {
3315 "z","\\ud900\\udc25", "\\ud805\\udc50",
3316 "\\ud800\\udc00y", "\\ud800\\udc00r",
3317 "\\ud800\\udc00f", "\\ud800\\udc00",
3318 "\\ud800\\udc00c", "\\ud800\\udc00b",
3319 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3320 "\\ud800\\udc00a",
3321 "c", "b"
3322 };
3323
3324 static const char *rule =
3325 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3326 "< \\ud800\\udc00y < \\ud800\\udc00r"
3327 "< \\ud800\\udc00f << \\ud800\\udc00"
3328 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3329 "< \\ud800\\udc00a < c < b" ;
3330
3331 genericRulesStarter(rule, test, 14);
3332 }
3333
3334 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)3335 static void TestPrefix(void) {
3336 uint32_t i;
3337
3338 static const struct {
3339 const char *rules;
3340 const char *data[50];
3341 const uint32_t len;
3342 } tests[] = {
3343 { "&z <<< z|a",
3344 {"zz", "za"}, 2 },
3345
3346 { "&z <<< z| a",
3347 {"zz", "za"}, 2 },
3348 { "[strength I]"
3349 "&a=\\ud900\\udc25"
3350 "&z<<<\\ud900\\udc25|a",
3351 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3352 };
3353
3354
3355 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3356 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3357 }
3358 }
3359
3360 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3361 /* JIS X 4061 collation order implementation */
TestNewJapanese(void)3362 static void TestNewJapanese(void) {
3363
3364 static const char * const test1[] = {
3365 "\\u30b7\\u30e3\\u30fc\\u30ec",
3366 "\\u30b7\\u30e3\\u30a4",
3367 "\\u30b7\\u30e4\\u30a3",
3368 "\\u30b7\\u30e3\\u30ec",
3369 "\\u3061\\u3087\\u3053",
3370 "\\u3061\\u3088\\u3053",
3371 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3372 "\\u3066\\u30fc\\u305f",
3373 "\\u30c6\\u30fc\\u30bf",
3374 "\\u30c6\\u30a7\\u30bf",
3375 "\\u3066\\u3048\\u305f",
3376 "\\u3067\\u30fc\\u305f",
3377 "\\u30c7\\u30fc\\u30bf",
3378 "\\u30c7\\u30a7\\u30bf",
3379 "\\u3067\\u3048\\u305f",
3380 "\\u3066\\u30fc\\u305f\\u30fc",
3381 "\\u30c6\\u30fc\\u30bf\\u30a1",
3382 "\\u30c6\\u30a7\\u30bf\\u30fc",
3383 "\\u3066\\u3047\\u305f\\u3041",
3384 "\\u3066\\u3048\\u305f\\u30fc",
3385 "\\u3067\\u30fc\\u305f\\u30fc",
3386 "\\u30c7\\u30fc\\u30bf\\u30a1",
3387 "\\u3067\\u30a7\\u305f\\u30a1",
3388 "\\u30c7\\u3047\\u30bf\\u3041",
3389 "\\u30c7\\u30a8\\u30bf\\u30a2",
3390 "\\u3072\\u3086",
3391 "\\u3073\\u3085\\u3042",
3392 "\\u3074\\u3085\\u3042",
3393 "\\u3073\\u3085\\u3042\\u30fc",
3394 "\\u30d3\\u30e5\\u30a2\\u30fc",
3395 "\\u3074\\u3085\\u3042\\u30fc",
3396 "\\u30d4\\u30e5\\u30a2\\u30fc",
3397 "\\u30d2\\u30e5\\u30a6",
3398 "\\u30d2\\u30e6\\u30a6",
3399 "\\u30d4\\u30e5\\u30a6\\u30a2",
3400 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3401 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3402 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3403 "\\u3072\\u3085\\u3093",
3404 "\\u3074\\u3085\\u3093",
3405 "\\u3075\\u30fc\\u308a",
3406 "\\u30d5\\u30fc\\u30ea",
3407 "\\u3075\\u3045\\u308a",
3408 "\\u3075\\u30a5\\u308a",
3409 "\\u3075\\u30a5\\u30ea",
3410 "\\u30d5\\u30a6\\u30ea",
3411 "\\u3076\\u30fc\\u308a",
3412 "\\u30d6\\u30fc\\u30ea",
3413 "\\u3076\\u3045\\u308a",
3414 "\\u30d6\\u30a5\\u308a",
3415 "\\u3077\\u3046\\u308a",
3416 "\\u30d7\\u30a6\\u30ea",
3417 "\\u3075\\u30fc\\u308a\\u30fc",
3418 "\\u30d5\\u30a5\\u30ea\\u30fc",
3419 "\\u3075\\u30a5\\u308a\\u30a3",
3420 "\\u30d5\\u3045\\u308a\\u3043",
3421 "\\u30d5\\u30a6\\u30ea\\u30fc",
3422 "\\u3075\\u3046\\u308a\\u3043",
3423 "\\u30d6\\u30a6\\u30ea\\u30a4",
3424 "\\u3077\\u30fc\\u308a\\u30fc",
3425 "\\u3077\\u30a5\\u308a\\u30a4",
3426 "\\u3077\\u3046\\u308a\\u30fc",
3427 "\\u30d7\\u30a6\\u30ea\\u30a4",
3428 "\\u30d5\\u30fd",
3429 "\\u3075\\u309e",
3430 "\\u3076\\u309d",
3431 "\\u3076\\u3075",
3432 "\\u3076\\u30d5",
3433 "\\u30d6\\u3075",
3434 "\\u30d6\\u30d5",
3435 "\\u3076\\u309e",
3436 "\\u3076\\u3077",
3437 "\\u30d6\\u3077",
3438 "\\u3077\\u309d",
3439 "\\u30d7\\u30fd",
3440 "\\u3077\\u3075",
3441 };
3442
3443 static const char *test2[] = {
3444 "\\u306f\\u309d", /* H\\u309d */
3445 "\\u30cf\\u30fd", /* K\\u30fd */
3446 "\\u306f\\u306f", /* HH */
3447 "\\u306f\\u30cf", /* HK */
3448 "\\u30cf\\u30cf", /* KK */
3449 "\\u306f\\u309e", /* H\\u309e */
3450 "\\u30cf\\u30fe", /* K\\u30fe */
3451 "\\u306f\\u3070", /* HH\\u309b */
3452 "\\u30cf\\u30d0", /* KK\\u309b */
3453 "\\u306f\\u3071", /* HH\\u309c */
3454 "\\u30cf\\u3071", /* KH\\u309c */
3455 "\\u30cf\\u30d1", /* KK\\u309c */
3456 "\\u3070\\u309d", /* H\\u309b\\u309d */
3457 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3458 "\\u3070\\u306f", /* H\\u309bH */
3459 "\\u30d0\\u30cf", /* K\\u309bK */
3460 "\\u3070\\u309e", /* H\\u309b\\u309e */
3461 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3462 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3463 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3464 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3465 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3466 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3467 "\\u3071\\u309d", /* H\\u309c\\u309d */
3468 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3469 "\\u3071\\u306f", /* H\\u309cH */
3470 "\\u30d1\\u30cf", /* K\\u309cK */
3471 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3472 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3473 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3474 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3475 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3476 };
3477 /*
3478 static const char *test3[] = {
3479 "\\u221er\\u221e",
3480 "\\u221eR#",
3481 "\\u221et\\u221e",
3482 "#r\\u221e",
3483 "#R#",
3484 "#t%",
3485 "#T%",
3486 "8t\\u221e",
3487 "8T\\u221e",
3488 "8t#",
3489 "8T#",
3490 "8t%",
3491 "8T%",
3492 "8t8",
3493 "8T8",
3494 "\\u03c9r\\u221e",
3495 "\\u03a9R%",
3496 "rr\\u221e",
3497 "rR\\u221e",
3498 "Rr\\u221e",
3499 "RR\\u221e",
3500 "RT%",
3501 "rt8",
3502 "tr\\u221e",
3503 "tr8",
3504 "TR8",
3505 "tt8",
3506 "\\u30b7\\u30e3\\u30fc\\u30ec",
3507 };
3508 */
3509 static const UColAttribute att[] = { UCOL_STRENGTH };
3510 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3511
3512 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3513 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3514
3515 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3516 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3517 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3518 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3519 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3520 }
3521
TestStrCollIdenticalPrefix(void)3522 static void TestStrCollIdenticalPrefix(void) {
3523 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3524 const char* test[] = {
3525 "ab\\ud9b0\\udc70",
3526 "ab\\ud9b0\\udc71"
3527 };
3528 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3529 }
3530 /* Contractions should have all their canonically equivalent */
3531 /* strings included */
TestContractionClosure(void)3532 static void TestContractionClosure(void) {
3533 static const struct {
3534 const char *rules;
3535 const char *data[10];
3536 const uint32_t len;
3537 } tests[] = {
3538 { "&b=\\u00e4\\u00e4",
3539 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3540 { "&b=\\u00C5",
3541 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3542 };
3543 uint32_t i;
3544
3545
3546 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3547 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3548 }
3549 }
3550
3551 /* This tests also fails*/
TestBeforePrefixFailure(void)3552 static void TestBeforePrefixFailure(void) {
3553 static const struct {
3554 const char *rules;
3555 const char *data[10];
3556 const uint32_t len;
3557 } tests[] = {
3558 { "&g <<< a"
3559 "&[before 3]\\uff41 <<< x",
3560 {"x", "\\uff41"}, 2 },
3561 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3562 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3563 "&[before 3]\\u30a7<<<\\u30a9",
3564 {"\\u30a9", "\\u30a7"}, 2 },
3565 { "&[before 3]\\u30a7<<<\\u30a9"
3566 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3567 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3568 {"\\u30a9", "\\u30a7"}, 2 },
3569 };
3570 uint32_t i;
3571
3572
3573 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3574 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3575 }
3576
3577 #if 0
3578 const char* rule1 =
3579 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3580 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3581 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3582 const char* rule2 =
3583 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3584 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3585 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3586 const char* test[] = {
3587 "\\u30c6\\u30fc\\u30bf",
3588 "\\u30c6\\u30a7\\u30bf",
3589 };
3590 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3591 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3592 /* this piece of code should be in some sort of verbose mode */
3593 /* it gets the collation elements for elements and prints them */
3594 /* This is useful when trying to see whether the problem is */
3595 {
3596 UErrorCode status = U_ZERO_ERROR;
3597 uint32_t i = 0;
3598 UCollationElements *it = NULL;
3599 uint32_t CE;
3600 UChar string[256];
3601 uint32_t uStringLen;
3602 UCollator *coll = NULL;
3603
3604 uStringLen = u_unescape(rule1, string, 256);
3605
3606 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3607
3608 /*coll = ucol_open("ja_JP_JIS", &status);*/
3609 it = ucol_openElements(coll, string, 0, &status);
3610
3611 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3612 log_verbose("%s\n", test[i]);
3613 uStringLen = u_unescape(test[i], string, 256);
3614 ucol_setText(it, string, uStringLen, &status);
3615
3616 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3617 log_verbose("%08X\n", CE);
3618 }
3619 log_verbose("\n");
3620
3621 }
3622
3623 ucol_closeElements(it);
3624 ucol_close(coll);
3625 }
3626 #endif
3627 }
3628
TestPrefixCompose(void)3629 static void TestPrefixCompose(void) {
3630 const char* rule1 =
3631 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3632 /*
3633 const char* test[] = {
3634 "\\u30c6\\u30fc\\u30bf",
3635 "\\u30c6\\u30a7\\u30bf",
3636 };
3637 */
3638 {
3639 UErrorCode status = U_ZERO_ERROR;
3640 /*uint32_t i = 0;*/
3641 /*UCollationElements *it = NULL;*/
3642 /* uint32_t CE;*/
3643 UChar string[256];
3644 uint32_t uStringLen;
3645 UCollator *coll = NULL;
3646
3647 uStringLen = u_unescape(rule1, string, 256);
3648
3649 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3650 ucol_close(coll);
3651 }
3652
3653
3654 }
3655
3656 /*
3657 [last variable] last variable value
3658 [last primary ignorable] largest CE for primary ignorable
3659 [last secondary ignorable] largest CE for secondary ignorable
3660 [last tertiary ignorable] largest CE for tertiary ignorable
3661 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3662 */
3663
TestRuleOptions(void)3664 static void TestRuleOptions(void) {
3665 /* values here are hardcoded and are correct for the current UCA
3666 * when the UCA changes, one might be forced to change these
3667 * values.
3668 */
3669
3670 /*
3671 * These strings contain the last character before [variable top]
3672 * and the first and second characters (by primary weights) after it.
3673 * See FractionalUCA.txt. For example:
3674 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3675 [variable top = 0C FE]
3676 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3677 and
3678 00B4; [0D 0C, 05, 05]
3679 *
3680 * Note: Starting with UCA 6.0, the [variable top] collation element
3681 * is not the weight of any character or string,
3682 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3683 */
3684 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3685 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3686 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3687
3688 /*
3689 * This string has to match the character that has the [last regular] weight
3690 * which changes with each UCA version.
3691 * See the bottom of FractionalUCA.txt which says something like
3692 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3693 *
3694 * Note: Starting with UCA 6.0, the [last regular] collation element
3695 * is not the weight of any character or string,
3696 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3697 */
3698 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3699
3700 static const struct {
3701 const char *rules;
3702 const char *data[10];
3703 const uint32_t len;
3704 } tests[] = {
3705 /* - all befores here amount to zero */
3706 { "&[before 3][first tertiary ignorable]<<<a",
3707 { "\\u0000", "a"}, 2
3708 }, /* you cannot go before first tertiary ignorable */
3709
3710 { "&[before 3][last tertiary ignorable]<<<a",
3711 { "\\u0000", "a"}, 2
3712 }, /* you cannot go before last tertiary ignorable */
3713
3714 { "&[before 3][first secondary ignorable]<<<a",
3715 { "\\u0000", "a"}, 2
3716 }, /* you cannot go before first secondary ignorable */
3717
3718 { "&[before 3][last secondary ignorable]<<<a",
3719 { "\\u0000", "a"}, 2
3720 }, /* you cannot go before first secondary ignorable */
3721
3722 /* 'normal' befores */
3723
3724 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3725 { "c", "b", "\\u0332", "a" }, 4
3726 },
3727
3728 /* we don't have a code point that corresponds to
3729 * the last primary ignorable
3730 */
3731 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3732 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3733 },
3734
3735 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3736 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3737 },
3738
3739 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3740 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3741 },
3742
3743 { "&[first regular]<a"
3744 "&[before 1][first regular]<b",
3745 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3746 },
3747
3748 { "&[before 1][last regular]<b"
3749 "&[last regular]<a",
3750 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3751 },
3752
3753 { "&[before 1][first implicit]<b"
3754 "&[first implicit]<a",
3755 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3756 },
3757
3758 { "&[before 1][last implicit]<b"
3759 "&[last implicit]<a",
3760 { "b", "\\U0010FFFD", "a" }, 3
3761 },
3762
3763 { "&[last variable]<z"
3764 "&[last primary ignorable]<x"
3765 "&[last secondary ignorable]<<y"
3766 "&[last tertiary ignorable]<<<w"
3767 "&[top]<u",
3768 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3769 }
3770
3771 };
3772 uint32_t i;
3773
3774 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3775 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3776 }
3777 }
3778
3779
TestOptimize(void)3780 static void TestOptimize(void) {
3781 /* this is not really a test - just trying out
3782 * whether copying of UCA contents will fail
3783 * Cannot really test, since the functionality
3784 * remains the same.
3785 */
3786 static const struct {
3787 const char *rules;
3788 const char *data[10];
3789 const uint32_t len;
3790 } tests[] = {
3791 /* - all befores here amount to zero */
3792 { "[optimize [\\uAC00-\\uD7FF]]",
3793 { "a", "b"}, 2}
3794 };
3795 uint32_t i;
3796
3797 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3798 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3799 }
3800 }
3801
3802 /*
3803 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3804 weiv ucol_strcollIter?
3805 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3806 weiv these are the input strings?
3807 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3808 weiv will check - could be a problem with utf-8 iterator
3809 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3810 weiv hmmm
3811 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3812 weiv that doesn't sound right
3813 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3814 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3815 cycheng@ca.ibm.c... yes
3816 weiv and then do the comparison
3817 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3818 weiv utf-16 strings look like a little endian ones in the example you sent me
3819 weiv It could be a bug - let me try to test it out
3820 cycheng@ca.ibm.c... ok
3821 cycheng@ca.ibm.c... we can wait till the conf. call
3822 cycheng@ca.ibm.c... next weke
3823 weiv that would be great
3824 weiv hmmm
3825 weiv I might be wrong
3826 weiv let me play with it some more
3827 cycheng@ca.ibm.c... ok
3828 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3829 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3830 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3831 weiv ok
3832 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3833 weiv thanks
3834 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3835 */
3836 #if 0
3837 static void Alexis(void) {
3838 UErrorCode status = U_ZERO_ERROR;
3839 UCollator *coll = ucol_open("", &status);
3840
3841
3842 const char utf16be[2][4] = {
3843 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3844 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3845 };
3846
3847 const char utf8[2][4] = {
3848 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3849 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3850 };
3851
3852 UCharIterator iterU161, iterU162;
3853 UCharIterator iterU81, iterU82;
3854
3855 UCollationResult resU16, resU8;
3856
3857 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3858 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3859
3860 uiter_setUTF8(&iterU81, utf8[0], 4);
3861 uiter_setUTF8(&iterU82, utf8[1], 4);
3862
3863 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3864
3865 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3866 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3867
3868
3869 if(resU16 != resU8) {
3870 log_err("different results\n");
3871 }
3872
3873 ucol_close(coll);
3874 }
3875 #endif
3876
3877 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)3878 static void Alexis2(void) {
3879 UErrorCode status = U_ZERO_ERROR;
3880 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3881 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3882 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3883 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3884
3885 UConverter *conv = NULL;
3886
3887 UCharIterator U16BEItS, U16BEItT;
3888 UCharIterator U8ItS, U8ItT;
3889
3890 UCollationResult resU16, resU16BE, resU8;
3891
3892 static const char* const pairs[][2] = {
3893 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3894 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3895 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3896 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3897 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3898 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3899 { "\\u0020", "\\u0020\\u0000"}
3900 /*
3901 5F20 (my result here)
3902 5F204E008E3F
3903 5F20 (your result here)
3904 */
3905 };
3906
3907 int32_t i = 0;
3908
3909 UCollator *coll = ucol_open("", &status);
3910 if(status == U_FILE_ACCESS_ERROR) {
3911 log_data_err("Is your data around?\n");
3912 return;
3913 } else if(U_FAILURE(status)) {
3914 log_err("Error opening collator\n");
3915 return;
3916 }
3917 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3918 conv = ucnv_open("UTF16BE", &status);
3919 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3920 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3921 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3922
3923 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3924
3925 log_verbose("Result of strcoll is %i\n", resU16);
3926
3927 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3928 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3929 (void)U16BELenS; /* Suppress set but not used warnings. */
3930 (void)U16BELenT;
3931
3932 /* use the original sizes, as the result from converter is in bytes */
3933 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3934 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3935
3936 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3937
3938 log_verbose("Result of U16BE is %i\n", resU16BE);
3939
3940 if(resU16 != resU16BE) {
3941 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3942 }
3943
3944 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3945 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3946
3947 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3948 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3949
3950 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3951
3952 if(resU16 != resU8) {
3953 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3954 }
3955
3956 }
3957
3958 ucol_close(coll);
3959 ucnv_close(conv);
3960 }
3961
TestHebrewUCA(void)3962 static void TestHebrewUCA(void) {
3963 UErrorCode status = U_ZERO_ERROR;
3964 static const char *first[] = {
3965 "d790d6b8d79cd795d6bcd7a9",
3966 "d790d79cd79ed7a7d799d799d7a1",
3967 "d790d6b4d79ed795d6bcd7a9",
3968 };
3969
3970 char utf8String[3][256];
3971 UChar utf16String[3][256];
3972
3973 int32_t i = 0, j = 0;
3974 int32_t sizeUTF8[3];
3975 int32_t sizeUTF16[3];
3976
3977 UCollator *coll = ucol_open("", &status);
3978 if (U_FAILURE(status)) {
3979 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3980 return;
3981 }
3982 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3983
3984 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3985 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3986 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3987 log_verbose("%i: ");
3988 for(j = 0; j < sizeUTF16[i]; j++) {
3989 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3990 log_verbose("%04X", utf16String[i][j]);
3991 }
3992 log_verbose("\n");
3993 }
3994 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3995 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3996 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3997 }
3998 }
3999
4000 ucol_close(coll);
4001
4002 }
4003
TestPartialSortKeyTermination(void)4004 static void TestPartialSortKeyTermination(void) {
4005 static const char* cases[] = {
4006 "\\u1234\\u1234\\udc00",
4007 "\\udc00\\ud800\\ud800"
4008 };
4009
4010 int32_t i = sizeof(UCollator);
4011
4012 UErrorCode status = U_ZERO_ERROR;
4013
4014 UCollator *coll = ucol_open("", &status);
4015
4016 UCharIterator iter;
4017
4018 UChar currCase[256];
4019 int32_t length = 0;
4020 int32_t pKeyLen = 0;
4021
4022 uint8_t key[256];
4023
4024 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
4025 uint32_t state[2] = {0, 0};
4026 length = u_unescape(cases[i], currCase, 256);
4027 uiter_setString(&iter, currCase, length);
4028 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
4029 (void)pKeyLen; /* Suppress set but not used warning. */
4030
4031 log_verbose("Done\n");
4032
4033 }
4034 ucol_close(coll);
4035 }
4036
TestSettings(void)4037 static void TestSettings(void) {
4038 static const char* cases[] = {
4039 "apple",
4040 "Apple"
4041 };
4042
4043 static const char* locales[] = {
4044 "",
4045 "en"
4046 };
4047
4048 UErrorCode status = U_ZERO_ERROR;
4049
4050 int32_t i = 0, j = 0;
4051
4052 UChar source[256], target[256];
4053 int32_t sLen = 0, tLen = 0;
4054
4055 UCollator *collateObject = NULL;
4056 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4057 collateObject = ucol_open(locales[i], &status);
4058 ucol_setStrength(collateObject, UCOL_PRIMARY);
4059 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4060 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4061 sLen = u_unescape(cases[j-1], source, 256);
4062 source[sLen] = 0;
4063 tLen = u_unescape(cases[j], target, 256);
4064 source[tLen] = 0;
4065 doTest(collateObject, source, target, UCOL_EQUAL);
4066 }
4067 ucol_close(collateObject);
4068 }
4069 }
4070
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)4071 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4072 UErrorCode status = U_ZERO_ERROR;
4073 int32_t errorNo = 0;
4074 const UChar *sourceRules = NULL;
4075 int32_t sourceRulesLen = 0;
4076 UParseError parseError;
4077 UColAttributeValue french = UCOL_OFF;
4078
4079 if(!ucol_equals(source, target)) {
4080 log_err("Same collators, different address not equal\n");
4081 errorNo++;
4082 }
4083 ucol_close(target);
4084 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4085 target = ucol_safeClone(source, NULL, NULL, &status);
4086 if(U_FAILURE(status)) {
4087 log_err("Error creating clone\n");
4088 errorNo++;
4089 return errorNo;
4090 }
4091 if(!ucol_equals(source, target)) {
4092 log_err("Collator different from it's clone\n");
4093 errorNo++;
4094 }
4095 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4096 if(french == UCOL_ON) {
4097 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4098 } else {
4099 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4100 }
4101 if(U_FAILURE(status)) {
4102 log_err("Error setting attributes\n");
4103 errorNo++;
4104 return errorNo;
4105 }
4106 if(ucol_equals(source, target)) {
4107 log_err("Collators same even when options changed\n");
4108 errorNo++;
4109 }
4110 ucol_close(target);
4111
4112 sourceRules = ucol_getRules(source, &sourceRulesLen);
4113 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4114 if(U_FAILURE(status)) {
4115 log_err("Error instantiating target from rules - %s\n", u_errorName(status));
4116 errorNo++;
4117 return errorNo;
4118 }
4119 if(!ucol_equals(source, target)) {
4120 log_err("Collator different from collator that was created from the same rules\n");
4121 errorNo++;
4122 }
4123 ucol_close(target);
4124 }
4125 return errorNo;
4126 }
4127
4128
TestEquals(void)4129 static void TestEquals(void) {
4130 /* ucol_equals is not currently a public API. There is a chance that it will become
4131 * something like this, but currently it is only used by RuleBasedCollator::operator==
4132 */
4133 /* test whether the two collators instantiated from the same locale are equal */
4134 UErrorCode status = U_ZERO_ERROR;
4135 UParseError parseError;
4136 int32_t noOfLoc = uloc_countAvailable();
4137 const char *locName = NULL;
4138 UCollator *source = NULL, *target = NULL;
4139 int32_t i = 0;
4140
4141 const char* rules[] = {
4142 "&l < lj <<< Lj <<< LJ",
4143 "&n < nj <<< Nj <<< NJ",
4144 "&ae <<< \\u00e4",
4145 "&AE <<< \\u00c4"
4146 };
4147 /*
4148 const char* badRules[] = {
4149 "&l <<< Lj",
4150 "&n < nj <<< nJ <<< NJ",
4151 "&a <<< \\u00e4",
4152 "&AE <<< \\u00c4 <<< x"
4153 };
4154 */
4155
4156 UChar sourceRules[1024], targetRules[1024];
4157 int32_t sourceRulesSize = 0, targetRulesSize = 0;
4158 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4159
4160 for(i = 0; i < rulesSize; i++) {
4161 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4162 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4163 }
4164
4165 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4166 if(status == U_FILE_ACCESS_ERROR) {
4167 log_data_err("Is your data around?\n");
4168 return;
4169 } else if(U_FAILURE(status)) {
4170 log_err("Error opening collator\n");
4171 return;
4172 }
4173 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4174 if(!ucol_equals(source, target)) {
4175 log_err("Equivalent collators not equal!\n");
4176 }
4177 ucol_close(source);
4178 ucol_close(target);
4179
4180 source = ucol_open("root", &status);
4181 target = ucol_open("root", &status);
4182 log_verbose("Testing root\n");
4183 if(!ucol_equals(source, source)) {
4184 log_err("Same collator not equal\n");
4185 }
4186 if(TestEqualsForCollator(locName, source, target)) {
4187 log_err("Errors for root\n", locName);
4188 }
4189 ucol_close(source);
4190
4191 for(i = 0; i<noOfLoc; i++) {
4192 status = U_ZERO_ERROR;
4193 locName = uloc_getAvailable(i);
4194 /*if(hasCollationElements(locName)) {*/
4195 log_verbose("Testing equality for locale %s\n", locName);
4196 source = ucol_open(locName, &status);
4197 target = ucol_open(locName, &status);
4198 if (U_FAILURE(status)) {
4199 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
4200 continue;
4201 }
4202 if(TestEqualsForCollator(locName, source, target)) {
4203 log_err("Errors for locale %s\n", locName);
4204 }
4205 ucol_close(source);
4206 /*}*/
4207 }
4208 }
4209
TestJ2726(void)4210 static void TestJ2726(void) {
4211 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4212 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4213 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4214 UErrorCode status = U_ZERO_ERROR;
4215 UCollator *coll = ucol_open("en", &status);
4216 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4217 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4218 doTest(coll, a, aSpace, UCOL_EQUAL);
4219 doTest(coll, aSpace, a, UCOL_EQUAL);
4220 doTest(coll, a, spaceA, UCOL_EQUAL);
4221 doTest(coll, spaceA, a, UCOL_EQUAL);
4222 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4223 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4224 ucol_close(coll);
4225 }
4226
NullRule(void)4227 static void NullRule(void) {
4228 UChar r[3] = {0};
4229 UErrorCode status = U_ZERO_ERROR;
4230 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4231 if(U_SUCCESS(status)) {
4232 log_err("This should have been an error!\n");
4233 ucol_close(coll);
4234 } else {
4235 status = U_ZERO_ERROR;
4236 }
4237 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4238 if(U_FAILURE(status)) {
4239 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4240 } else {
4241 ucol_close(coll);
4242 }
4243 }
4244
4245 /**
4246 * Test for CollationElementIterator previous and next for the whole set of
4247 * unicode characters with normalization on.
4248 */
TestNumericCollation(void)4249 static void TestNumericCollation(void)
4250 {
4251 UErrorCode status = U_ZERO_ERROR;
4252
4253 const static char *basicTestStrings[]={
4254 "hello1",
4255 "hello2",
4256 "hello2002",
4257 "hello2003",
4258 "hello123456",
4259 "hello1234567",
4260 "hello10000000",
4261 "hello100000000",
4262 "hello1000000000",
4263 "hello10000000000",
4264 };
4265
4266 const static char *preZeroTestStrings[]={
4267 "avery10000",
4268 "avery010000",
4269 "avery0010000",
4270 "avery00010000",
4271 "avery000010000",
4272 "avery0000010000",
4273 "avery00000010000",
4274 "avery000000010000",
4275 };
4276
4277 const static char *thirtyTwoBitNumericStrings[]={
4278 "avery42949672960",
4279 "avery42949672961",
4280 "avery42949672962",
4281 "avery429496729610"
4282 };
4283
4284 const static char *longNumericStrings[]={
4285 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4286 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4287 are treated as multiple collation elements. */
4288 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4289 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4290 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4291 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4292 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4293 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4294 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4295 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4296 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4297 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4298 };
4299
4300 const static char *supplementaryDigits[] = {
4301 "\\uD835\\uDFCE", /* 0 */
4302 "\\uD835\\uDFCF", /* 1 */
4303 "\\uD835\\uDFD0", /* 2 */
4304 "\\uD835\\uDFD1", /* 3 */
4305 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4306 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4307 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4308 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4309 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4310 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4311 };
4312
4313 const static char *foreignDigits[] = {
4314 "\\u0661",
4315 "\\u0662",
4316 "\\u0663",
4317 "\\u0661\\u0660",
4318 "\\u0661\\u0662",
4319 "\\u0661\\u0663",
4320 "\\u0662\\u0660",
4321 "\\u0662\\u0662",
4322 "\\u0662\\u0663",
4323 "\\u0663\\u0660",
4324 "\\u0663\\u0662",
4325 "\\u0663\\u0663"
4326 };
4327
4328 const static char *evenZeroes[] = {
4329 "2000",
4330 "2001",
4331 "2002",
4332 "2003"
4333 };
4334
4335 UColAttribute att = UCOL_NUMERIC_COLLATION;
4336 UColAttributeValue val = UCOL_ON;
4337
4338 /* Open our collator. */
4339 UCollator* coll = ucol_open("root", &status);
4340 if (U_FAILURE(status)){
4341 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4342 myErrorName(status));
4343 return;
4344 }
4345 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4346 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4347 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4348 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4349 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4350 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4351
4352 /* Setting up our collator to do digits. */
4353 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4354 if (U_FAILURE(status)){
4355 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4356 myErrorName(status));
4357 return;
4358 }
4359
4360 /*
4361 Testing that prepended zeroes still yield the correct collation behavior.
4362 We expect that every element in our strings array will be equal.
4363 */
4364 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4365
4366 ucol_close(coll);
4367 }
4368
TestTibetanConformance(void)4369 static void TestTibetanConformance(void)
4370 {
4371 const char* test[] = {
4372 "\\u0FB2\\u0591\\u0F71\\u0061",
4373 "\\u0FB2\\u0F71\\u0061"
4374 };
4375
4376 UErrorCode status = U_ZERO_ERROR;
4377 UCollator *coll = ucol_open("", &status);
4378 UChar source[100];
4379 UChar target[100];
4380 int result;
4381 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4382 if (U_SUCCESS(status)) {
4383 u_unescape(test[0], source, 100);
4384 u_unescape(test[1], target, 100);
4385 doTest(coll, source, target, UCOL_EQUAL);
4386 result = ucol_strcoll(coll, source, -1, target, -1);
4387 log_verbose("result %d\n", result);
4388 if (UCOL_EQUAL != result) {
4389 log_err("Tibetan comparison error\n");
4390 }
4391 }
4392 ucol_close(coll);
4393
4394 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4395 }
4396
TestPinyinProblem(void)4397 static void TestPinyinProblem(void) {
4398 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4399 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4400 }
4401
4402 #define TST_UCOL_MAX_INPUT 0x220001
4403 #define topByte 0xFF000000;
4404 #define bottomByte 0xFF;
4405 #define fourBytes 0xFFFFFFFF;
4406
4407
showImplicit(UChar32 i)4408 static void showImplicit(UChar32 i) {
4409 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4410 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4411 }
4412 }
4413
TestImplicitGeneration(void)4414 static void TestImplicitGeneration(void) {
4415 UErrorCode status = U_ZERO_ERROR;
4416 UChar32 last = 0;
4417 UChar32 current;
4418 UChar32 i = 0, j = 0;
4419 UChar32 roundtrip = 0;
4420 UChar32 lastBottom = 0;
4421 UChar32 currentBottom = 0;
4422 UChar32 lastTop = 0;
4423 UChar32 currentTop = 0;
4424
4425 UCollator *coll = ucol_open("root", &status);
4426 if(U_FAILURE(status)) {
4427 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4428 return;
4429 }
4430
4431 uprv_uca_getRawFromImplicit(0xE20303E7);
4432
4433 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4434 current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4435
4436 /* check that it round-trips AND that all intervening ones are illegal*/
4437 roundtrip = uprv_uca_getRawFromImplicit(current);
4438 if (roundtrip != i) {
4439 log_err("No roundtrip %08X\n", i);
4440 }
4441 if (last != 0) {
4442 for (j = last + 1; j < current; ++j) {
4443 roundtrip = uprv_uca_getRawFromImplicit(j);
4444 /* raise an error if it *doesn't* find an error*/
4445 if (roundtrip != -1) {
4446 log_err("Fails to recognize illegal %08X\n", j);
4447 }
4448 }
4449 }
4450 /* now do other consistency checks*/
4451 lastBottom = last & bottomByte;
4452 currentBottom = current & bottomByte;
4453 lastTop = last & topByte;
4454 currentTop = current & topByte;
4455 (void)lastBottom; /* Suppress set but not used warnings. */
4456 (void)currentBottom;
4457
4458 /* print out some values for spot-checking*/
4459 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4460 showImplicit(i-3);
4461 showImplicit(i-2);
4462 showImplicit(i-1);
4463 showImplicit(i);
4464 showImplicit(i+1);
4465 showImplicit(i+2);
4466 }
4467 last = current;
4468
4469 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4470 log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4471 }
4472 }
4473 showImplicit(TST_UCOL_MAX_INPUT-2);
4474 showImplicit(TST_UCOL_MAX_INPUT-1);
4475 showImplicit(TST_UCOL_MAX_INPUT);
4476 ucol_close(coll);
4477 }
4478
4479 /**
4480 * Iterate through the given iterator, checking to see that all the strings
4481 * in the expected array are present.
4482 * @param expected array of strings we expect to see, or NULL
4483 * @param expectedCount number of elements of expected, or 0
4484 */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)4485 static int32_t checkUEnumeration(const char* msg,
4486 UEnumeration* iter,
4487 const char** expected,
4488 int32_t expectedCount) {
4489 UErrorCode ec = U_ZERO_ERROR;
4490 int32_t i = 0, n, j, bit;
4491 int32_t seenMask = 0;
4492
4493 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4494 n = uenum_count(iter, &ec);
4495 if (!assertSuccess("count", &ec)) return -1;
4496 log_verbose("%s = [", msg);
4497 for (;; ++i) {
4498 const char* s = uenum_next(iter, NULL, &ec);
4499 if (!assertSuccess("snext", &ec) || s == NULL) break;
4500 if (i != 0) log_verbose(",");
4501 log_verbose("%s", s);
4502 /* check expected list */
4503 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4504 if ((seenMask&bit) == 0 &&
4505 uprv_strcmp(s, expected[j]) == 0) {
4506 seenMask |= bit;
4507 break;
4508 }
4509 }
4510 }
4511 log_verbose("] (%d)\n", i);
4512 assertTrue("count verified", i==n);
4513 /* did we see all expected strings? */
4514 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4515 if ((seenMask&bit)!=0) {
4516 log_verbose("Ok: \"%s\" seen\n", expected[j]);
4517 } else {
4518 log_err("FAIL: \"%s\" not seen\n", expected[j]);
4519 }
4520 }
4521 return n;
4522 }
4523
4524 /**
4525 * Test new API added for separate collation tree.
4526 */
TestSeparateTrees(void)4527 static void TestSeparateTrees(void) {
4528 UErrorCode ec = U_ZERO_ERROR;
4529 UEnumeration *e = NULL;
4530 int32_t n = -1;
4531 UBool isAvailable;
4532 char loc[256];
4533
4534 static const char* AVAIL[] = { "en", "de" };
4535
4536 static const char* KW[] = { "collation" };
4537
4538 static const char* KWVAL[] = { "phonebook", "stroke" };
4539
4540 #if !UCONFIG_NO_SERVICE
4541 e = ucol_openAvailableLocales(&ec);
4542 if (e != NULL) {
4543 assertSuccess("ucol_openAvailableLocales", &ec);
4544 assertTrue("ucol_openAvailableLocales!=0", e!=0);
4545 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4546 (void)n; /* Suppress set but not used warnings. */
4547 /* Don't need to check n because we check list */
4548 uenum_close(e);
4549 } else {
4550 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4551 }
4552 #endif
4553
4554 e = ucol_getKeywords(&ec);
4555 if (e != NULL) {
4556 assertSuccess("ucol_getKeywords", &ec);
4557 assertTrue("ucol_getKeywords!=0", e!=0);
4558 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4559 /* Don't need to check n because we check list */
4560 uenum_close(e);
4561 } else {
4562 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4563 }
4564
4565 e = ucol_getKeywordValues(KW[0], &ec);
4566 if (e != NULL) {
4567 assertSuccess("ucol_getKeywordValues", &ec);
4568 assertTrue("ucol_getKeywordValues!=0", e!=0);
4569 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4570 /* Don't need to check n because we check list */
4571 uenum_close(e);
4572 } else {
4573 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4574 }
4575
4576 /* Try setting a warning before calling ucol_getKeywordValues */
4577 ec = U_USING_FALLBACK_WARNING;
4578 e = ucol_getKeywordValues(KW[0], &ec);
4579 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4580 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4581 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4582 /* Don't need to check n because we check list */
4583 uenum_close(e);
4584 }
4585
4586 /*
4587 U_DRAFT int32_t U_EXPORT2
4588 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4589 const char* locale, UBool* isAvailable,
4590 UErrorCode* status);
4591 }
4592 */
4593 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4594 &isAvailable, &ec);
4595 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4596 assertEquals("getFunctionalEquivalent(de)", "root", loc);
4597 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4598 isAvailable == TRUE);
4599 }
4600
4601 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4602 &isAvailable, &ec);
4603 if (assertSuccess("getFunctionalEquivalent", &ec)) {
4604 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
4605 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4606 isAvailable == TRUE);
4607 }
4608 }
4609
4610 /* supercedes TestJ784 */
TestBeforePinyin(void)4611 static void TestBeforePinyin(void) {
4612 const static char rules[] = {
4613 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4614 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4615 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4616 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4617 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4618 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4619 };
4620
4621 const static char *test[] = {
4622 "l\\u0101",
4623 "la",
4624 "l\\u0101n",
4625 "lan ",
4626 "l\\u0113",
4627 "le",
4628 "l\\u0113n",
4629 "len"
4630 };
4631
4632 const static char *test2[] = {
4633 "x\\u0101",
4634 "x\\u0100",
4635 "X\\u0101",
4636 "X\\u0100",
4637 "x\\u00E1",
4638 "x\\u00C1",
4639 "X\\u00E1",
4640 "X\\u00C1",
4641 "x\\u01CE",
4642 "x\\u01CD",
4643 "X\\u01CE",
4644 "X\\u01CD",
4645 "x\\u00E0",
4646 "x\\u00C0",
4647 "X\\u00E0",
4648 "X\\u00C0",
4649 "xa",
4650 "xA",
4651 "Xa",
4652 "XA",
4653 "x\\u0101x",
4654 "x\\u0100x",
4655 "x\\u00E1x",
4656 "x\\u00C1x",
4657 "x\\u01CEx",
4658 "x\\u01CDx",
4659 "x\\u00E0x",
4660 "x\\u00C0x",
4661 "xax",
4662 "xAx"
4663 };
4664
4665 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4666 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4667 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4668 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4669 }
4670
TestBeforeTightening(void)4671 static void TestBeforeTightening(void) {
4672 static const struct {
4673 const char *rules;
4674 UErrorCode expectedStatus;
4675 } tests[] = {
4676 { "&[before 1]a<x", U_ZERO_ERROR },
4677 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4678 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4679 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4680 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4681 { "&[before 2]a<<x",U_ZERO_ERROR },
4682 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4683 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4684 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
4685 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
4686 { "&[before 3]a<<<x",U_ZERO_ERROR },
4687 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
4688 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4689 };
4690
4691 int32_t i = 0;
4692
4693 UErrorCode status = U_ZERO_ERROR;
4694 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4695 uint32_t rlen = 0;
4696
4697 UCollator *coll = NULL;
4698
4699
4700 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4701 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4702 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4703 if(status != tests[i].expectedStatus) {
4704 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4705 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4706 }
4707 ucol_close(coll);
4708 status = U_ZERO_ERROR;
4709 }
4710
4711 }
4712
4713 /*
4714 &m < a
4715 &[before 1] a < x <<< X << q <<< Q < z
4716 assert: m <<< M < x <<< X << q <<< Q < z < a < n
4717
4718 &m < a
4719 &[before 2] a << x <<< X << q <<< Q < z
4720 assert: m <<< M < x <<< X << q <<< Q << a < z < n
4721
4722 &m < a
4723 &[before 3] a <<< x <<< X << q <<< Q < z
4724 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4725
4726
4727 &m << a
4728 &[before 1] a < x <<< X << q <<< Q < z
4729 assert: x <<< X << q <<< Q < z < m <<< M << a < n
4730
4731 &m << a
4732 &[before 2] a << x <<< X << q <<< Q < z
4733 assert: m <<< M << x <<< X << q <<< Q << a < z < n
4734
4735 &m << a
4736 &[before 3] a <<< x <<< X << q <<< Q < z
4737 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4738
4739
4740 &m <<< a
4741 &[before 1] a < x <<< X << q <<< Q < z
4742 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4743
4744 &m <<< a
4745 &[before 2] a << x <<< X << q <<< Q < z
4746 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
4747
4748 &m <<< a
4749 &[before 3] a <<< x <<< X << q <<< Q < z
4750 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
4751
4752
4753 &[before 1] s < x <<< X << q <<< Q < z
4754 assert: r <<< R < x <<< X << q <<< Q < z < s < n
4755
4756 &[before 2] s << x <<< X << q <<< Q < z
4757 assert: r <<< R < x <<< X << q <<< Q << s < z < n
4758
4759 &[before 3] s <<< x <<< X << q <<< Q < z
4760 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4761
4762
4763 &[before 1] \u24DC < x <<< X << q <<< Q < z
4764 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4765
4766 &[before 2] \u24DC << x <<< X << q <<< Q < z
4767 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4768
4769 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
4770 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
4771 */
4772
4773
4774 #if 0
4775 /* requires features not yet supported */
4776 static void TestMoreBefore(void) {
4777 static const struct {
4778 const char* rules;
4779 const char* order[16];
4780 int32_t size;
4781 } tests[] = {
4782 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4783 { "m","M","x","X","q","Q","z","a","n" }, 9},
4784 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4785 { "m","M","x","X","q","Q","a","z","n" }, 9},
4786 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4787 { "m","M","x","X","a","q","Q","z","n" }, 9},
4788 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4789 { "x","X","q","Q","z","m","M","a","n" }, 9},
4790 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4791 { "m","M","x","X","q","Q","a","z","n" }, 9},
4792 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4793 { "m","M","x","X","a","q","Q","z","n" }, 9},
4794 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4795 { "x","X","q","Q","z","n","m","a","M" }, 9},
4796 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4797 { "x","X","q","Q","m","a","M","z","n" }, 9},
4798 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4799 { "m","x","X","a","M","q","Q","z","n" }, 9},
4800 { "&[before 1] s < x <<< X << q <<< Q < z",
4801 { "r","R","x","X","q","Q","z","s","n" }, 9},
4802 { "&[before 2] s << x <<< X << q <<< Q < z",
4803 { "r","R","x","X","q","Q","s","z","n" }, 9},
4804 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4805 { "r","R","x","X","s","q","Q","z","n" }, 9},
4806 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4807 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4808 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4809 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4810 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4811 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4812 };
4813
4814 int32_t i = 0;
4815
4816 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4817 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4818 }
4819 }
4820 #endif
4821
TestTailorNULL(void)4822 static void TestTailorNULL( void ) {
4823 const static char* rule = "&a <<< '\\u0000'";
4824 UErrorCode status = U_ZERO_ERROR;
4825 UChar rlz[RULE_BUFFER_LEN] = { 0 };
4826 uint32_t rlen = 0;
4827 UChar a = 1, null = 0;
4828 UCollationResult res = UCOL_EQUAL;
4829
4830 UCollator *coll = NULL;
4831
4832
4833 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4834 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4835
4836 if(U_FAILURE(status)) {
4837 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4838 } else {
4839 res = ucol_strcoll(coll, &a, 1, &null, 1);
4840
4841 if(res != UCOL_LESS) {
4842 log_err("NULL was not tailored properly!\n");
4843 }
4844 }
4845
4846 ucol_close(coll);
4847 }
4848
4849 static void
TestUpperFirstQuaternary(void)4850 TestUpperFirstQuaternary(void)
4851 {
4852 const char* tests[] = { "B", "b", "Bb", "bB" };
4853 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4854 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4855 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4856 }
4857
4858 static void
TestJ4960(void)4859 TestJ4960(void)
4860 {
4861 const char* tests[] = { "\\u00e2T", "aT" };
4862 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4863 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4864 const char* tests2[] = { "a", "A" };
4865 const char* rule = "&[first tertiary ignorable]=A=a";
4866 UColAttribute att2[] = { UCOL_CASE_LEVEL };
4867 UColAttributeValue attVals2[] = { UCOL_ON };
4868 /* Test whether we correctly ignore primary ignorables on case level when */
4869 /* we have only primary & case level */
4870 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4871 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4872 /* and case level */
4873 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4874 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4875 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4876 }
4877
4878 static void
TestJ5223(void)4879 TestJ5223(void)
4880 {
4881 static const char *test = "this is a test string";
4882 UChar ustr[256];
4883 int32_t ustr_length = u_unescape(test, ustr, 256);
4884 unsigned char sortkey[256];
4885 int32_t sortkey_length;
4886 UErrorCode status = U_ZERO_ERROR;
4887 static UCollator *coll = NULL;
4888 coll = ucol_open("root", &status);
4889 if(U_FAILURE(status)) {
4890 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4891 return;
4892 }
4893 ucol_setStrength(coll, UCOL_PRIMARY);
4894 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4895 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4896 if (U_FAILURE(status)) {
4897 log_err("Failed setting atributes\n");
4898 return;
4899 }
4900 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4901 if (sortkey_length > 256) return;
4902
4903 /* we mark the position where the null byte should be written in advance */
4904 sortkey[sortkey_length-1] = 0xAA;
4905
4906 /* we set the buffer size one byte higher than needed */
4907 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4908 sortkey_length+1);
4909
4910 /* no error occurs (for me) */
4911 if (sortkey[sortkey_length-1] == 0xAA) {
4912 log_err("Hit bug at first try\n");
4913 }
4914
4915 /* we mark the position where the null byte should be written again */
4916 sortkey[sortkey_length-1] = 0xAA;
4917
4918 /* this time we set the buffer size to the exact amount needed */
4919 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4920 sortkey_length);
4921
4922 /* now the trailing null byte is not written */
4923 if (sortkey[sortkey_length-1] == 0xAA) {
4924 log_err("Hit bug at second try\n");
4925 }
4926
4927 ucol_close(coll);
4928 }
4929
4930 /* Regression test for Thai partial sort key problem */
4931 static void
TestJ5232(void)4932 TestJ5232(void)
4933 {
4934 const static char *test[] = {
4935 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4936 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4937 };
4938
4939 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4940 }
4941
4942 static void
TestJ5367(void)4943 TestJ5367(void)
4944 {
4945 const static char *test[] = { "a", "y" };
4946 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4947 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4948 }
4949
4950 static void
TestVI5913(void)4951 TestVI5913(void)
4952 {
4953 UErrorCode status = U_ZERO_ERROR;
4954 int32_t i, j;
4955 UCollator *coll =NULL;
4956 uint8_t resColl[100], expColl[100];
4957 int32_t rLen, tLen, ruleLen, sLen, kLen;
4958 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
4959 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4960 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
4961 static const UChar tData[][20]={
4962 {0x1EAC, 0},
4963 {0x0041, 0x0323, 0x0302, 0},
4964 {0x1EA0, 0x0302, 0},
4965 {0x00C2, 0x0323, 0},
4966 {0x1ED8, 0}, /* O with dot and circumflex */
4967 {0x1ECC, 0x0302, 0},
4968 {0x1EB7, 0},
4969 {0x1EA1, 0x0306, 0},
4970 };
4971 static const UChar tailorData[][20]={
4972 {0x1FA2, 0}, /* Omega with 3 combining marks */
4973 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4974 {0x1FF3, 0x0313, 0x0300, 0},
4975 {0x1F60, 0x0300, 0x0345, 0},
4976 {0x1F62, 0x0345, 0},
4977 {0x1FA0, 0x0300, 0},
4978 };
4979 static const UChar tailorData2[][20]={
4980 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
4981 {0x0073, 0x0323, 0x030C, 0},
4982 {0x0073, 0x030C, 0x0323, 0},
4983 };
4984 static const UChar tailorData3[][20]={
4985 {0x007a, 0}, /* z */
4986 {0x0061, 0x0065, 0}, /* a + e */
4987 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4988 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
4989 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4990 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
4991 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
4992 {0x00EA, 0}, /* e with circumflex */
4993 };
4994
4995 /* Test Vietnamese sort. */
4996 coll = ucol_open("vi", &status);
4997 if(U_FAILURE(status)) {
4998 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4999 return;
5000 }
5001 log_verbose("\n\nVI collation:");
5002 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
5003 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
5004 }
5005 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
5006 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
5007 }
5008 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
5009 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
5010 }
5011 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
5012 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5013 }
5014
5015 for (j=0; j<8; j++) {
5016 tLen = u_strlen(tData[j]);
5017 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
5018 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5019 for(i = 0; i<rLen; i++) {
5020 log_verbose(" %02X", resColl[i]);
5021 }
5022 }
5023
5024 ucol_close(coll);
5025
5026 /* Test Romanian sort. */
5027 coll = ucol_open("ro", &status);
5028 log_verbose("\n\nRO collation:");
5029 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
5030 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
5031 }
5032 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
5033 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
5034 }
5035 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
5036 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5037 }
5038
5039 for (j=4; j<8; j++) {
5040 tLen = u_strlen(tData[j]);
5041 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
5042 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5043 for(i = 0; i<rLen; i++) {
5044 log_verbose(" %02X", resColl[i]);
5045 }
5046 }
5047 ucol_close(coll);
5048
5049 /* Test the precomposed Greek character with 3 combining marks. */
5050 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5051 ruleLen = u_strlen(rule);
5052 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5053 if (U_FAILURE(status)) {
5054 log_err("ucol_openRules failed with %s\n", u_errorName(status));
5055 return;
5056 }
5057 sLen = u_strlen(tailorData[0]);
5058 for (j=1; j<6; j++) {
5059 tLen = u_strlen(tailorData[j]);
5060 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
5061 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5062 }
5063 }
5064 /* Test getSortKey. */
5065 tLen = u_strlen(tailorData[0]);
5066 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5067 for (j=0; j<6; j++) {
5068 tLen = u_strlen(tailorData[j]);
5069 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5070 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5071 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5072 for(i = 0; i<rLen; i++) {
5073 log_err(" %02X", resColl[i]);
5074 }
5075 }
5076 }
5077 ucol_close(coll);
5078
5079 log_verbose("\n\nTailoring test for s with caron:");
5080 ruleLen = u_strlen(rule2);
5081 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5082 tLen = u_strlen(tailorData2[0]);
5083 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5084 for (j=1; j<3; j++) {
5085 tLen = u_strlen(tailorData2[j]);
5086 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5087 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5088 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5089 for(i = 0; i<rLen; i++) {
5090 log_err(" %02X", resColl[i]);
5091 }
5092 }
5093 }
5094 ucol_close(coll);
5095
5096 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5097 ruleLen = u_strlen(rule3);
5098 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5099 tLen = u_strlen(tailorData3[3]);
5100 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5101 for (j=4; j<6; j++) {
5102 tLen = u_strlen(tailorData3[j]);
5103 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5104
5105 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5106 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5107 for(i = 0; i<rLen; i++) {
5108 log_err(" %02X", resColl[i]);
5109 }
5110 }
5111
5112 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
5113 for(i = 0; i<rLen; i++) {
5114 log_verbose(" %02X", resColl[i]);
5115 }
5116 }
5117 ucol_close(coll);
5118 }
5119
5120 static void
TestTailor6179(void)5121 TestTailor6179(void)
5122 {
5123 UErrorCode status = U_ZERO_ERROR;
5124 int32_t i;
5125 UCollator *coll =NULL;
5126 uint8_t resColl[100];
5127 int32_t rLen, tLen, ruleLen;
5128 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
5129 static const UChar rule1[]={
5130 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5131 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5132 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5133 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5134 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5135 static const UChar rule2[]={
5136 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5137 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5138 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5139 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5140 0x3C,0x3C,0x20,0x62,0};
5141
5142 static const UChar tData1[][4]={
5143 {0x61, 0},
5144 {0x62, 0},
5145 { 0xFDD0,0x009E, 0}
5146 };
5147 static const UChar tData2[][4]={
5148 {0x61, 0},
5149 {0x62, 0},
5150 { 0xFDD0,0x009E, 0}
5151 };
5152
5153 /*
5154 * These values from FractionalUCA.txt will change,
5155 * and need to be updated here.
5156 */
5157 static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
5158 static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
5159 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5160 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5161
5162 /* Test [Last Primary ignorable] */
5163
5164 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
5165 ruleLen = u_strlen(rule1);
5166 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5167 if (U_FAILURE(status)) {
5168 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5169 return;
5170 }
5171 tLen = u_strlen(tData1[0]);
5172 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5173 if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
5174 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
5175 for(i = 0; i<rLen; i++) {
5176 log_err(" %02X", resColl[i]);
5177 }
5178 log_err("\n");
5179 }
5180 tLen = u_strlen(tData1[1]);
5181 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5182 if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
5183 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
5184 for(i = 0; i<rLen; i++) {
5185 log_err(" %02X", resColl[i]);
5186 }
5187 log_err("\n");
5188 }
5189 ucol_close(coll);
5190
5191
5192 /* Test [Last Secondary ignorable] */
5193 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
5194 ruleLen = u_strlen(rule1);
5195 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5196 if (U_FAILURE(status)) {
5197 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
5198 return;
5199 }
5200 tLen = u_strlen(tData2[0]);
5201 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5202 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
5203 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
5204 for(i = 0; i<rLen; i++) {
5205 log_err(" %02X", resColl[i]);
5206 }
5207 log_err("\n");
5208 }
5209 if(!log_knownIssue("8982", "debug and fix")) { /* TODO: debug & fix, see ticket #8982 */
5210 tLen = u_strlen(tData2[1]);
5211 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5212 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
5213 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
5214 for(i = 0; i<rLen; i++) {
5215 log_err(" %02X", resColl[i]);
5216 }
5217 log_err("\n");
5218 }
5219 }
5220 ucol_close(coll);
5221 }
5222
5223 static void
TestUCAPrecontext(void)5224 TestUCAPrecontext(void)
5225 {
5226 UErrorCode status = U_ZERO_ERROR;
5227 int32_t i, j;
5228 UCollator *coll =NULL;
5229 uint8_t resColl[100], prevColl[100];
5230 int32_t rLen, tLen, ruleLen;
5231 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5232 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5233 /* & l middle-dot << a a is an expansion. */
5234
5235 UChar tData1[][20]={
5236 { 0xb7, 0}, /* standalone middle dot(0xb7) */
5237 { 0x387, 0}, /* standalone middle dot(0x387) */
5238 { 0x61, 0}, /* a */
5239 { 0x6C, 0}, /* l */
5240 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
5241 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
5242 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5243 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
5244 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5245 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
5246 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
5247 };
5248
5249 log_verbose("\n\nEN collation:");
5250 coll = ucol_open("en", &status);
5251 if (U_FAILURE(status)) {
5252 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5253 return;
5254 }
5255 for (j=0; j<11; j++) {
5256 tLen = u_strlen(tData1[j]);
5257 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5258 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5259 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5260 j, tData1[j]);
5261 }
5262 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5263 for(i = 0; i<rLen; i++) {
5264 log_verbose(" %02X", resColl[i]);
5265 }
5266 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5267 }
5268 ucol_close(coll);
5269
5270
5271 log_verbose("\n\nJA collation:");
5272 coll = ucol_open("ja", &status);
5273 if (U_FAILURE(status)) {
5274 log_err("Tailoring test: &z <<a|- failed!");
5275 return;
5276 }
5277 for (j=0; j<11; j++) {
5278 tLen = u_strlen(tData1[j]);
5279 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5280 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5281 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5282 j, tData1[j]);
5283 }
5284 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5285 for(i = 0; i<rLen; i++) {
5286 log_verbose(" %02X", resColl[i]);
5287 }
5288 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5289 }
5290 ucol_close(coll);
5291
5292
5293 log_verbose("\n\nTailoring test: & middle dot < a ");
5294 ruleLen = u_strlen(rule1);
5295 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5296 if (U_FAILURE(status)) {
5297 log_err("Tailoring test: & middle dot < a failed!");
5298 return;
5299 }
5300 for (j=0; j<11; j++) {
5301 tLen = u_strlen(tData1[j]);
5302 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5303 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5304 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5305 j, tData1[j]);
5306 }
5307 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5308 for(i = 0; i<rLen; i++) {
5309 log_verbose(" %02X", resColl[i]);
5310 }
5311 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5312 }
5313 ucol_close(coll);
5314
5315
5316 log_verbose("\n\nTailoring test: & l middle-dot << a ");
5317 ruleLen = u_strlen(rule2);
5318 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5319 if (U_FAILURE(status)) {
5320 log_err("Tailoring test: & l middle-dot << a failed!");
5321 return;
5322 }
5323 for (j=0; j<11; j++) {
5324 tLen = u_strlen(tData1[j]);
5325 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5326 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5327 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5328 j, tData1[j]);
5329 }
5330 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5331 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5332 j, tData1[j]);
5333 }
5334 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
5335 for(i = 0; i<rLen; i++) {
5336 log_verbose(" %02X", resColl[i]);
5337 }
5338 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5339 }
5340 ucol_close(coll);
5341 }
5342
5343 static void
TestOutOfBuffer5468(void)5344 TestOutOfBuffer5468(void)
5345 {
5346 static const char *test = "\\u4e00";
5347 UChar ustr[256];
5348 int32_t ustr_length = u_unescape(test, ustr, 256);
5349 unsigned char shortKeyBuf[1];
5350 int32_t sortkey_length;
5351 UErrorCode status = U_ZERO_ERROR;
5352 static UCollator *coll = NULL;
5353
5354 coll = ucol_open("root", &status);
5355 if(U_FAILURE(status)) {
5356 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5357 return;
5358 }
5359 ucol_setStrength(coll, UCOL_PRIMARY);
5360 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5361 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5362 if (U_FAILURE(status)) {
5363 log_err("Failed setting atributes\n");
5364 return;
5365 }
5366
5367 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5368 if (sortkey_length != 4) {
5369 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
5370 }
5371 log_verbose("length of sortKey is %d", sortkey_length);
5372 ucol_close(coll);
5373 }
5374
5375 #define TSKC_DATA_SIZE 5
5376 #define TSKC_BUF_SIZE 50
5377 static void
TestSortKeyConsistency(void)5378 TestSortKeyConsistency(void)
5379 {
5380 UErrorCode icuRC = U_ZERO_ERROR;
5381 UCollator* ucol;
5382 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5383
5384 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5385 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5386 int32_t i, j, i2;
5387
5388 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5389 if (U_FAILURE(icuRC))
5390 {
5391 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5392 return;
5393 }
5394
5395 for (i = 0; i < TSKC_DATA_SIZE; i++)
5396 {
5397 UCharIterator uiter;
5398 uint32_t state[2] = { 0, 0 };
5399 int32_t dataLen = i+1;
5400 for (j=0; j<TSKC_BUF_SIZE; j++)
5401 bufFull[i][j] = bufPart[i][j] = 0;
5402
5403 /* Full sort key */
5404 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5405
5406 /* Partial sort key */
5407 uiter_setString(&uiter, data, dataLen);
5408 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5409 if (U_FAILURE(icuRC))
5410 {
5411 log_err("ucol_nextSortKeyPart failed\n");
5412 ucol_close(ucol);
5413 return;
5414 }
5415
5416 for (i2=0; i2<i; i2++)
5417 {
5418 UBool fullMatch = TRUE;
5419 UBool partMatch = TRUE;
5420 for (j=0; j<TSKC_BUF_SIZE; j++)
5421 {
5422 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5423 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5424 }
5425 if (fullMatch != partMatch) {
5426 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5427 : "partial key was consistent, but full key changed\n");
5428 ucol_close(ucol);
5429 return;
5430 }
5431 }
5432 }
5433
5434 /*=============================================*/
5435 ucol_close(ucol);
5436 }
5437
5438 /* ticket: 6101 */
TestCroatianSortKey(void)5439 static void TestCroatianSortKey(void) {
5440 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5441 UErrorCode status = U_ZERO_ERROR;
5442 UCollator *ucol;
5443 UCharIterator iter;
5444
5445 static const UChar text[] = { 0x0044, 0xD81A };
5446
5447 size_t length = sizeof(text)/sizeof(*text);
5448
5449 uint8_t textSortKey[32];
5450 size_t lenSortKey = 32;
5451 size_t actualSortKeyLen;
5452 uint32_t uStateInfo[2] = { 0, 0 };
5453
5454 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5455 if (U_FAILURE(status)) {
5456 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5457 return;
5458 }
5459
5460 uiter_setString(&iter, text, length);
5461
5462 actualSortKeyLen = ucol_nextSortKeyPart(
5463 ucol, &iter, (uint32_t*)uStateInfo,
5464 textSortKey, lenSortKey, &status
5465 );
5466
5467 if (actualSortKeyLen == lenSortKey) {
5468 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5469 }
5470
5471 ucol_close(ucol);
5472 }
5473
5474 /* ticket: 6140 */
5475 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5476 * they are both Hiragana and Katakana
5477 */
5478 #define SORTKEYLEN 50
TestHiragana(void)5479 static void TestHiragana(void) {
5480 UErrorCode status = U_ZERO_ERROR;
5481 UCollator* ucol;
5482 UCollationResult strcollresult;
5483 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5484 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5485 int32_t data1Len = sizeof(data1)/sizeof(*data1);
5486 int32_t data2Len = sizeof(data2)/sizeof(*data2);
5487 int32_t i, j;
5488 uint8_t sortKey1[SORTKEYLEN];
5489 uint8_t sortKey2[SORTKEYLEN];
5490
5491 UCharIterator uiter1;
5492 UCharIterator uiter2;
5493 uint32_t state1[2] = { 0, 0 };
5494 uint32_t state2[2] = { 0, 0 };
5495 int32_t keySize1;
5496 int32_t keySize2;
5497
5498 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5499 &status);
5500 if (U_FAILURE(status)) {
5501 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5502 return;
5503 }
5504
5505 /* Start of full sort keys */
5506 /* Full sort key1 */
5507 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5508 /* Full sort key2 */
5509 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5510 if (keySize1 == keySize2) {
5511 for (i = 0; i < keySize1; i++) {
5512 if (sortKey1[i] != sortKey2[i]) {
5513 log_err("Full sort keys are different. Should be equal.");
5514 }
5515 }
5516 } else {
5517 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5518 }
5519 /* End of full sort keys */
5520
5521 /* Start of partial sort keys */
5522 /* Partial sort key1 */
5523 uiter_setString(&uiter1, data1, data1Len);
5524 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5525 /* Partial sort key2 */
5526 uiter_setString(&uiter2, data2, data2Len);
5527 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5528 if (U_SUCCESS(status) && keySize1 == keySize2) {
5529 for (j = 0; j < keySize1; j++) {
5530 if (sortKey1[j] != sortKey2[j]) {
5531 log_err("Partial sort keys are different. Should be equal");
5532 }
5533 }
5534 } else {
5535 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5536 }
5537 /* End of partial sort keys */
5538
5539 /* Start of strcoll */
5540 /* Use ucol_strcoll() to determine ordering */
5541 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5542 if (strcollresult != UCOL_EQUAL) {
5543 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5544 }
5545
5546 ucol_close(ucol);
5547 }
5548
5549 /* Convenient struct for running collation tests */
5550 typedef struct {
5551 const UChar source[MAX_TOKEN_LEN]; /* String on left */
5552 const UChar target[MAX_TOKEN_LEN]; /* String on right */
5553 UCollationResult result; /* -1, 0 or +1, depending on collation */
5554 } OneTestCase;
5555
5556 /*
5557 * Utility function to test one collation test case.
5558 * @param testcases Array of test cases.
5559 * @param n_testcases Size of the array testcases.
5560 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
5561 * @param n_rules Size of the array str_rules.
5562 */
doTestOneTestCase(const OneTestCase testcases[],int n_testcases,const char * str_rules[],int n_rules)5563 static void doTestOneTestCase(const OneTestCase testcases[],
5564 int n_testcases,
5565 const char* str_rules[],
5566 int n_rules)
5567 {
5568 int rule_no, testcase_no;
5569 UChar rule[500];
5570 int32_t length = 0;
5571 UErrorCode status = U_ZERO_ERROR;
5572 UParseError parse_error;
5573 UCollator *myCollation;
5574
5575 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5576
5577 length = u_unescape(str_rules[rule_no], rule, 500);
5578 if (length == 0) {
5579 log_err("ERROR: The rule cannot be unescaped: %s\n");
5580 return;
5581 }
5582 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5583 if(U_FAILURE(status)){
5584 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5585 return;
5586 }
5587 log_verbose("Testing the <<* syntax\n");
5588 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5589 ucol_setStrength(myCollation, UCOL_TERTIARY);
5590 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5591 doTest(myCollation,
5592 testcases[testcase_no].source,
5593 testcases[testcase_no].target,
5594 testcases[testcase_no].result
5595 );
5596 }
5597 ucol_close(myCollation);
5598 }
5599 }
5600
5601 const static OneTestCase rangeTestcases[] = {
5602 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */
5603 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */
5604 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */
5605
5606 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */
5607 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */
5608 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */
5609 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */
5610 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */
5611
5612 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */
5613 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */
5614 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */
5615 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */
5616
5617 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */
5618 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */
5619 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */
5620 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */
5621 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */
5622 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */
5623 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */
5624 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */
5625 };
5626
5627 static int nRangeTestcases = LEN(rangeTestcases);
5628
5629 const static OneTestCase rangeTestcasesSupplemental[] = {
5630 { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */
5631 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */
5632 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
5633 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */
5634 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
5635 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
5636 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */
5637 };
5638
5639 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5640
5641 const static OneTestCase rangeTestcasesQwerty[] = {
5642 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */
5643 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */
5644
5645 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */
5646 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */
5647
5648 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */
5649 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */
5650
5651 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */
5652 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */
5653
5654 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5655 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */
5656 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5657 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */
5658 };
5659
5660 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5661
TestSameStrengthList(void)5662 static void TestSameStrengthList(void)
5663 {
5664 const char* strRules[] = {
5665 /* Normal */
5666 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
5667
5668 /* Lists */
5669 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5670 };
5671 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5672 }
5673
TestSameStrengthListQuoted(void)5674 static void TestSameStrengthListQuoted(void)
5675 {
5676 const char* strRules[] = {
5677 /* Lists with quoted characters */
5678 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5679 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5680
5681 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5682 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5683
5684 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
5685 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5686 };
5687 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5688 }
5689
TestSameStrengthListSupplemental(void)5690 static void TestSameStrengthListSupplemental(void)
5691 {
5692 const char* strRules[] = {
5693 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5694 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5695 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5696 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5697 };
5698 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5699 }
5700
TestSameStrengthListQwerty(void)5701 static void TestSameStrengthListQwerty(void)
5702 {
5703 const char* strRules[] = {
5704 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5705 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5706 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5707 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5708 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5709
5710 /* Quoted characters also will work if two quoted characters are not consecutive. */
5711 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5712
5713 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5714 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5715
5716 };
5717 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5718 }
5719
TestSameStrengthListQuotedQwerty(void)5720 static void TestSameStrengthListQuotedQwerty(void)
5721 {
5722 const char* strRules[] = {
5723 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5724 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5725 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
5726
5727 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5728 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5729 };
5730 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5731 }
5732
TestSameStrengthListRanges(void)5733 static void TestSameStrengthListRanges(void)
5734 {
5735 const char* strRules[] = {
5736 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5737 };
5738 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5739 }
5740
TestSameStrengthListSupplementalRanges(void)5741 static void TestSameStrengthListSupplementalRanges(void)
5742 {
5743 const char* strRules[] = {
5744 "&\\ufffe<*\\uffff-\\U00010002",
5745 };
5746 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5747 }
5748
TestSpecialCharacters(void)5749 static void TestSpecialCharacters(void)
5750 {
5751 const char* strRules[] = {
5752 /* Normal */
5753 "&';'<'+'<','<'-'<'&'<'*'",
5754
5755 /* List */
5756 "&';'<*'+,-&*'",
5757
5758 /* Range */
5759 "&';'<*'+'-'-&*'",
5760 };
5761
5762 const static OneTestCase specialCharacterStrings[] = {
5763 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */
5764 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */
5765 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */
5766 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */
5767 };
5768 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5769 }
5770
TestPrivateUseCharacters(void)5771 static void TestPrivateUseCharacters(void)
5772 {
5773 const char* strRules[] = {
5774 /* Normal */
5775 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5776 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5777 };
5778
5779 const static OneTestCase privateUseCharacterStrings[] = {
5780 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5781 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5782 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5783 { {0xe2da}, {0xe2db}, UCOL_LESS },
5784 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5785 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5786 };
5787 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5788 }
5789
TestPrivateUseCharactersInList(void)5790 static void TestPrivateUseCharactersInList(void)
5791 {
5792 const char* strRules[] = {
5793 /* List */
5794 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5795 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5796 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5797 };
5798
5799 const static OneTestCase privateUseCharacterStrings[] = {
5800 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5801 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5802 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5803 { {0xe2da}, {0xe2db}, UCOL_LESS },
5804 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5805 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5806 };
5807 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5808 }
5809
TestPrivateUseCharactersInRange(void)5810 static void TestPrivateUseCharactersInRange(void)
5811 {
5812 const char* strRules[] = {
5813 /* Range */
5814 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5815 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5816 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5817 };
5818
5819 const static OneTestCase privateUseCharacterStrings[] = {
5820 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5821 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5822 { {0xe2d9}, {0xe2da}, UCOL_LESS },
5823 { {0xe2da}, {0xe2db}, UCOL_LESS },
5824 { {0xe2db}, {0xe2dc}, UCOL_LESS },
5825 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5826 };
5827 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5828 }
5829
TestInvalidListsAndRanges(void)5830 static void TestInvalidListsAndRanges(void)
5831 {
5832 const char* invalidRules[] = {
5833 /* Range not in starred expression */
5834 "&\\ufffe<\\uffff-\\U00010002",
5835
5836 /* Range without start */
5837 "&a<*-c",
5838
5839 /* Range without end */
5840 "&a<*b-",
5841
5842 /* More than one hyphen */
5843 "&a<*b-g-l",
5844
5845 /* Range in the wrong order */
5846 "&a<*k-b",
5847
5848 };
5849
5850 UChar rule[500];
5851 UErrorCode status = U_ZERO_ERROR;
5852 UParseError parse_error;
5853 int n_rules = LEN(invalidRules);
5854 int rule_no;
5855 int length;
5856 UCollator *myCollation;
5857
5858 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5859
5860 length = u_unescape(invalidRules[rule_no], rule, 500);
5861 if (length == 0) {
5862 log_err("ERROR: The rule cannot be unescaped: %s\n");
5863 return;
5864 }
5865 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5866 (void)myCollation; /* Suppress set but not used warning. */
5867 if(!U_FAILURE(status)){
5868 log_err("ERROR: Could not cause a failure as expected: \n");
5869 }
5870 status = U_ZERO_ERROR;
5871 }
5872 }
5873
5874 /*
5875 * This test ensures that characters placed before a character in a different script have the same lead byte
5876 * in their collation key before and after script reordering.
5877 */
TestBeforeRuleWithScriptReordering(void)5878 static void TestBeforeRuleWithScriptReordering(void)
5879 {
5880 UParseError error;
5881 UErrorCode status = U_ZERO_ERROR;
5882 UCollator *myCollation;
5883 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5884 UChar rules[500];
5885 uint32_t rulesLength = 0;
5886 int32_t reorderCodes[1] = {USCRIPT_GREEK};
5887 UCollationResult collResult;
5888
5889 uint8_t baseKey[256];
5890 uint32_t baseKeyLength;
5891 uint8_t beforeKey[256];
5892 uint32_t beforeKeyLength;
5893
5894 UChar base[] = { 0x03b1 }; /* base */
5895 int32_t baseLen = sizeof(base)/sizeof(*base);
5896
5897 UChar before[] = { 0x0e01 }; /* ko kai */
5898 int32_t beforeLen = sizeof(before)/sizeof(*before);
5899
5900 /*UChar *data[] = { before, base };
5901 genericRulesStarter(srules, data, 2);*/
5902
5903 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5904
5905 (void)beforeKeyLength; /* Suppress set but not used warnings. */
5906 (void)baseKeyLength;
5907
5908 /* build collator */
5909 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5910
5911 rulesLength = u_unescape(srules, rules, LEN(rules));
5912 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5913 if(U_FAILURE(status)) {
5914 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5915 return;
5916 }
5917
5918 /* check collation results - before rule applied but not script reordering */
5919 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5920 if (collResult != UCOL_GREATER) {
5921 log_err("Collation result not correct before script reordering = %d\n", collResult);
5922 }
5923
5924 /* check the lead byte of the collation keys before script reordering */
5925 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5926 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5927 if (baseKey[0] != beforeKey[0]) {
5928 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5929 }
5930
5931 /* reorder the scripts */
5932 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5933 if(U_FAILURE(status)) {
5934 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5935 return;
5936 }
5937
5938 /* check collation results - before rule applied and after script reordering */
5939 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5940 if (collResult != UCOL_GREATER) {
5941 log_err("Collation result not correct after script reordering = %d\n", collResult);
5942 }
5943
5944 /* check the lead byte of the collation keys after script reordering */
5945 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5946 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5947 if (baseKey[0] != beforeKey[0]) {
5948 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5949 }
5950
5951 ucol_close(myCollation);
5952 }
5953
5954 /*
5955 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5956 */
TestNonLeadBytesDuringCollationReordering(void)5957 static void TestNonLeadBytesDuringCollationReordering(void)
5958 {
5959 UErrorCode status = U_ZERO_ERROR;
5960 UCollator *myCollation;
5961 int32_t reorderCodes[1] = {USCRIPT_GREEK};
5962
5963 uint8_t baseKey[256];
5964 uint32_t baseKeyLength;
5965 uint8_t reorderKey[256];
5966 uint32_t reorderKeyLength;
5967
5968 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5969
5970 uint32_t i;
5971
5972
5973 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5974
5975 /* build collator tertiary */
5976 myCollation = ucol_open("", &status);
5977 ucol_setStrength(myCollation, UCOL_TERTIARY);
5978 if(U_FAILURE(status)) {
5979 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5980 return;
5981 }
5982 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5983
5984 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5985 if(U_FAILURE(status)) {
5986 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5987 return;
5988 }
5989 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5990
5991 if (baseKeyLength != reorderKeyLength) {
5992 log_err("Key lengths not the same during reordering.\n");
5993 return;
5994 }
5995
5996 for (i = 1; i < baseKeyLength; i++) {
5997 if (baseKey[i] != reorderKey[i]) {
5998 log_err("Collation key bytes not the same at position %d.\n", i);
5999 return;
6000 }
6001 }
6002 ucol_close(myCollation);
6003
6004 /* build collator quaternary */
6005 myCollation = ucol_open("", &status);
6006 ucol_setStrength(myCollation, UCOL_QUATERNARY);
6007 if(U_FAILURE(status)) {
6008 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6009 return;
6010 }
6011 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
6012
6013 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6014 if(U_FAILURE(status)) {
6015 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6016 return;
6017 }
6018 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
6019
6020 if (baseKeyLength != reorderKeyLength) {
6021 log_err("Key lengths not the same during reordering.\n");
6022 return;
6023 }
6024
6025 for (i = 1; i < baseKeyLength; i++) {
6026 if (baseKey[i] != reorderKey[i]) {
6027 log_err("Collation key bytes not the same at position %d.\n", i);
6028 return;
6029 }
6030 }
6031 ucol_close(myCollation);
6032 }
6033
6034 /*
6035 * Test reordering API.
6036 */
TestReorderingAPI(void)6037 static void TestReorderingAPI(void)
6038 {
6039 UErrorCode status = U_ZERO_ERROR;
6040 UCollator *myCollation;
6041 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6042 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
6043 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6044 UCollationResult collResult;
6045 int32_t retrievedReorderCodesLength;
6046 int32_t retrievedReorderCodes[10];
6047 UChar greekString[] = { 0x03b1 };
6048 UChar punctuationString[] = { 0x203e };
6049 int loopIndex;
6050
6051 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6052
6053 /* build collator tertiary */
6054 myCollation = ucol_open("", &status);
6055 ucol_setStrength(myCollation, UCOL_TERTIARY);
6056 if(U_FAILURE(status)) {
6057 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6058 return;
6059 }
6060
6061 /* set the reorderding */
6062 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6063 if (U_FAILURE(status)) {
6064 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6065 return;
6066 }
6067
6068 /* get the reordering */
6069 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6070 if (status != U_BUFFER_OVERFLOW_ERROR) {
6071 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6072 return;
6073 }
6074 status = U_ZERO_ERROR;
6075 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6076 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6077 return;
6078 }
6079 /* now let's really get it */
6080 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6081 if (U_FAILURE(status)) {
6082 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6083 return;
6084 }
6085 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6086 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6087 return;
6088 }
6089 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6090 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6091 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6092 return;
6093 }
6094 }
6095 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6096 if (collResult != UCOL_LESS) {
6097 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6098 return;
6099 }
6100
6101 /* clear the reordering */
6102 ucol_setReorderCodes(myCollation, NULL, 0, &status);
6103 if (U_FAILURE(status)) {
6104 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6105 return;
6106 }
6107
6108 /* get the reordering again */
6109 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6110 if (retrievedReorderCodesLength != 0) {
6111 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6112 return;
6113 }
6114
6115 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6116 if (collResult != UCOL_GREATER) {
6117 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6118 return;
6119 }
6120
6121 /* test for error condition on duplicate reorder codes */
6122 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
6123 if (!U_FAILURE(status)) {
6124 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
6125 return;
6126 }
6127
6128 status = U_ZERO_ERROR;
6129 /* test for reorder codes after a reset code */
6130 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
6131 if (!U_FAILURE(status)) {
6132 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
6133 return;
6134 }
6135
6136 ucol_close(myCollation);
6137 }
6138
6139 /*
6140 * Test reordering API.
6141 */
TestReorderingAPIWithRuleCreatedCollator(void)6142 static void TestReorderingAPIWithRuleCreatedCollator(void)
6143 {
6144 UErrorCode status = U_ZERO_ERROR;
6145 UCollator *myCollation;
6146 UChar rules[90];
6147 static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
6148 static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6149 static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
6150 UCollationResult collResult;
6151 int32_t retrievedReorderCodesLength;
6152 int32_t retrievedReorderCodes[10];
6153 static const UChar greekString[] = { 0x03b1 };
6154 static const UChar punctuationString[] = { 0x203e };
6155 static const UChar hanString[] = { 0x65E5, 0x672C };
6156 int loopIndex;
6157
6158 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6159
6160 /* build collator from rules */
6161 u_uastrcpy(rules, "[reorder Hani Grek]");
6162 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
6163 if(U_FAILURE(status)) {
6164 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6165 return;
6166 }
6167
6168 /* get the reordering */
6169 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6170 if (U_FAILURE(status)) {
6171 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6172 return;
6173 }
6174 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
6175 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
6176 return;
6177 }
6178 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6179 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6180 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6181 return;
6182 }
6183 }
6184 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
6185 if (collResult != UCOL_GREATER) {
6186 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6187 return;
6188 }
6189
6190 /* set the reordering */
6191 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6192 if (U_FAILURE(status)) {
6193 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6194 return;
6195 }
6196
6197 /* get the reordering */
6198 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6199 if (status != U_BUFFER_OVERFLOW_ERROR) {
6200 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6201 return;
6202 }
6203 status = U_ZERO_ERROR;
6204 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6205 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6206 return;
6207 }
6208 /* now let's really get it */
6209 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6210 if (U_FAILURE(status)) {
6211 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6212 return;
6213 }
6214 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6215 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6216 return;
6217 }
6218 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6219 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6220 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6221 return;
6222 }
6223 }
6224 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6225 if (collResult != UCOL_LESS) {
6226 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6227 return;
6228 }
6229
6230 /* clear the reordering */
6231 ucol_setReorderCodes(myCollation, NULL, 0, &status);
6232 if (U_FAILURE(status)) {
6233 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6234 return;
6235 }
6236
6237 /* get the reordering again */
6238 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6239 if (retrievedReorderCodesLength != 0) {
6240 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6241 return;
6242 }
6243
6244 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6245 if (collResult != UCOL_GREATER) {
6246 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6247 return;
6248 }
6249
6250 /* reset the reordering */
6251 ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
6252 if (U_FAILURE(status)) {
6253 log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
6254 return;
6255 }
6256 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6257 if (U_FAILURE(status)) {
6258 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6259 return;
6260 }
6261 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
6262 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
6263 return;
6264 }
6265 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6266 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6267 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6268 return;
6269 }
6270 }
6271
6272 ucol_close(myCollation);
6273 }
6274
compareUScriptCodes(const void * a,const void * b)6275 static int compareUScriptCodes(const void * a, const void * b)
6276 {
6277 return ( *(int32_t*)a - *(int32_t*)b );
6278 }
6279
TestEquivalentReorderingScripts(void)6280 static void TestEquivalentReorderingScripts(void) {
6281 UErrorCode status = U_ZERO_ERROR;
6282 int32_t equivalentScripts[50];
6283 int32_t equivalentScriptsLength;
6284 int loopIndex;
6285 int32_t equivalentScriptsResult[] = {
6286 USCRIPT_BOPOMOFO,
6287 USCRIPT_LISU,
6288 USCRIPT_LYCIAN,
6289 USCRIPT_CARIAN,
6290 USCRIPT_LYDIAN,
6291 USCRIPT_YI,
6292 USCRIPT_OLD_ITALIC,
6293 USCRIPT_GOTHIC,
6294 USCRIPT_DESERET,
6295 USCRIPT_SHAVIAN,
6296 USCRIPT_OSMANYA,
6297 USCRIPT_LINEAR_B,
6298 USCRIPT_CYPRIOT,
6299 USCRIPT_OLD_SOUTH_ARABIAN,
6300 USCRIPT_AVESTAN,
6301 USCRIPT_IMPERIAL_ARAMAIC,
6302 USCRIPT_INSCRIPTIONAL_PARTHIAN,
6303 USCRIPT_INSCRIPTIONAL_PAHLAVI,
6304 USCRIPT_UGARITIC,
6305 USCRIPT_OLD_PERSIAN,
6306 USCRIPT_CUNEIFORM,
6307 USCRIPT_EGYPTIAN_HIEROGLYPHS,
6308 USCRIPT_PHONETIC_POLLARD,
6309 USCRIPT_SORA_SOMPENG,
6310 USCRIPT_MEROITIC_CURSIVE,
6311 USCRIPT_MEROITIC_HIEROGLYPHS
6312 };
6313
6314 qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
6315
6316 /* UScript.GOTHIC */
6317 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
6318 if (U_FAILURE(status)) {
6319 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6320 return;
6321 }
6322 /*
6323 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
6324 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
6325 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6326 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6327 }
6328 */
6329 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6330 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6331 return;
6332 }
6333 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6334 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6335 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6336 return;
6337 }
6338 }
6339
6340 /* UScript.SHAVIAN */
6341 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
6342 if (U_FAILURE(status)) {
6343 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6344 return;
6345 }
6346 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6347 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6348 return;
6349 }
6350 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6351 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6352 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6353 return;
6354 }
6355 }
6356 }
6357
TestReorderingAcrossCloning(void)6358 static void TestReorderingAcrossCloning(void)
6359 {
6360 UErrorCode status = U_ZERO_ERROR;
6361 UCollator *myCollation;
6362 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6363 UCollator *clonedCollation;
6364 int32_t retrievedReorderCodesLength;
6365 int32_t retrievedReorderCodes[10];
6366 int loopIndex;
6367
6368 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6369
6370 /* build collator tertiary */
6371 myCollation = ucol_open("", &status);
6372 ucol_setStrength(myCollation, UCOL_TERTIARY);
6373 if(U_FAILURE(status)) {
6374 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6375 return;
6376 }
6377
6378 /* set the reorderding */
6379 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6380 if (U_FAILURE(status)) {
6381 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6382 return;
6383 }
6384
6385 /* clone the collator */
6386 clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
6387 if (U_FAILURE(status)) {
6388 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
6389 return;
6390 }
6391
6392 /* get the reordering */
6393 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6394 if (U_FAILURE(status)) {
6395 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6396 return;
6397 }
6398 if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6399 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6400 return;
6401 }
6402 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6403 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6404 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6405 return;
6406 }
6407 }
6408
6409 /*uprv_free(buffer);*/
6410 ucol_close(myCollation);
6411 ucol_close(clonedCollation);
6412 }
6413
6414 /*
6415 * Utility function to test one collation reordering test case set.
6416 * @param testcases Array of test cases.
6417 * @param n_testcases Size of the array testcases.
6418 * @param reorderTokens Array of reordering codes.
6419 * @param reorderTokensLen Size of the array reorderTokens.
6420 */
doTestOneReorderingAPITestCase(const OneTestCase testCases[],uint32_t testCasesLen,const int32_t reorderTokens[],int32_t reorderTokensLen)6421 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6422 {
6423 uint32_t testCaseNum;
6424 UErrorCode status = U_ZERO_ERROR;
6425 UCollator *myCollation;
6426
6427 myCollation = ucol_open("", &status);
6428 if (U_FAILURE(status)) {
6429 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6430 return;
6431 }
6432 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6433 if(U_FAILURE(status)) {
6434 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6435 return;
6436 }
6437
6438 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6439 doTest(myCollation,
6440 testCases[testCaseNum].source,
6441 testCases[testCaseNum].target,
6442 testCases[testCaseNum].result
6443 );
6444 }
6445 ucol_close(myCollation);
6446 }
6447
TestGreekFirstReorder(void)6448 static void TestGreekFirstReorder(void)
6449 {
6450 const char* strRules[] = {
6451 "[reorder Grek]"
6452 };
6453
6454 const int32_t apiRules[] = {
6455 USCRIPT_GREEK
6456 };
6457
6458 const static OneTestCase privateUseCharacterStrings[] = {
6459 { {0x0391}, {0x0391}, UCOL_EQUAL },
6460 { {0x0041}, {0x0391}, UCOL_GREATER },
6461 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6462 { {0x0060}, {0x0391}, UCOL_LESS },
6463 { {0x0391}, {0xe2dc}, UCOL_LESS },
6464 { {0x0391}, {0x0060}, UCOL_GREATER },
6465 };
6466
6467 /* Test rules creation */
6468 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6469
6470 /* Test collation reordering API */
6471 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6472 }
6473
TestGreekLastReorder(void)6474 static void TestGreekLastReorder(void)
6475 {
6476 const char* strRules[] = {
6477 "[reorder Zzzz Grek]"
6478 };
6479
6480 const int32_t apiRules[] = {
6481 USCRIPT_UNKNOWN, USCRIPT_GREEK
6482 };
6483
6484 const static OneTestCase privateUseCharacterStrings[] = {
6485 { {0x0391}, {0x0391}, UCOL_EQUAL },
6486 { {0x0041}, {0x0391}, UCOL_LESS },
6487 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6488 { {0x0060}, {0x0391}, UCOL_LESS },
6489 { {0x0391}, {0xe2dc}, UCOL_GREATER },
6490 };
6491
6492 /* Test rules creation */
6493 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6494
6495 /* Test collation reordering API */
6496 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6497 }
6498
TestNonScriptReorder(void)6499 static void TestNonScriptReorder(void)
6500 {
6501 const char* strRules[] = {
6502 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6503 };
6504
6505 const int32_t apiRules[] = {
6506 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6507 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6508 UCOL_REORDER_CODE_CURRENCY
6509 };
6510
6511 const static OneTestCase privateUseCharacterStrings[] = {
6512 { {0x0391}, {0x0041}, UCOL_LESS },
6513 { {0x0041}, {0x0391}, UCOL_GREATER },
6514 { {0x0060}, {0x0041}, UCOL_LESS },
6515 { {0x0060}, {0x0391}, UCOL_GREATER },
6516 { {0x0024}, {0x0041}, UCOL_GREATER },
6517 };
6518
6519 /* Test rules creation */
6520 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6521
6522 /* Test collation reordering API */
6523 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6524 }
6525
TestHaniReorder(void)6526 static void TestHaniReorder(void)
6527 {
6528 const char* strRules[] = {
6529 "[reorder Hani]"
6530 };
6531 const int32_t apiRules[] = {
6532 USCRIPT_HAN
6533 };
6534
6535 const static OneTestCase privateUseCharacterStrings[] = {
6536 { {0x4e00}, {0x0041}, UCOL_LESS },
6537 { {0x4e00}, {0x0060}, UCOL_GREATER },
6538 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6539 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6540 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6541 { {0xfa27}, {0x0041}, UCOL_LESS },
6542 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6543 };
6544
6545 /* Test rules creation */
6546 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6547
6548 /* Test collation reordering API */
6549 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6550 }
6551
TestHaniReorderWithOtherRules(void)6552 static void TestHaniReorderWithOtherRules(void)
6553 {
6554 const char* strRules[] = {
6555 "[reorder Hani] &b<a"
6556 };
6557 /*const int32_t apiRules[] = {
6558 USCRIPT_HAN
6559 };*/
6560
6561 const static OneTestCase privateUseCharacterStrings[] = {
6562 { {0x4e00}, {0x0041}, UCOL_LESS },
6563 { {0x4e00}, {0x0060}, UCOL_GREATER },
6564 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6565 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6566 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6567 { {0xfa27}, {0x0041}, UCOL_LESS },
6568 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6569 { {0x0062}, {0x0061}, UCOL_LESS },
6570 };
6571
6572 /* Test rules creation */
6573 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6574 }
6575
TestMultipleReorder(void)6576 static void TestMultipleReorder(void)
6577 {
6578 const char* strRules[] = {
6579 "[reorder Grek Zzzz DIGIT Latn Hani]"
6580 };
6581
6582 const int32_t apiRules[] = {
6583 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
6584 };
6585
6586 const static OneTestCase collationTestCases[] = {
6587 { {0x0391}, {0x0041}, UCOL_LESS},
6588 { {0x0031}, {0x0041}, UCOL_LESS},
6589 { {0x0041}, {0x4e00}, UCOL_LESS},
6590 };
6591
6592 /* Test rules creation */
6593 doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
6594
6595 /* Test collation reordering API */
6596 doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
6597 }
6598
6599 /*
6600 * Test that covers issue reported in ticket 8814
6601 */
TestReorderWithNumericCollation(void)6602 static void TestReorderWithNumericCollation(void)
6603 {
6604 UErrorCode status = U_ZERO_ERROR;
6605 UCollator *myCollation;
6606 UCollator *myReorderCollation;
6607 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
6608 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
6609 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
6610 UChar fortyS[] = { 0x0053 };
6611 UChar fortyThreeP[] = { 0x0050 };
6612 uint8_t fortyS_sortKey[128];
6613 int32_t fortyS_sortKey_Length;
6614 uint8_t fortyThreeP_sortKey[128];
6615 int32_t fortyThreeP_sortKey_Length;
6616 uint8_t fortyS_sortKey_reorder[128];
6617 int32_t fortyS_sortKey_reorder_Length;
6618 uint8_t fortyThreeP_sortKey_reorder[128];
6619 int32_t fortyThreeP_sortKey_reorder_Length;
6620 UCollationResult collResult;
6621 UCollationResult collResultReorder;
6622
6623 log_verbose("Testing reordering with and without numeric collation\n");
6624
6625 /* build collator tertiary with numeric */
6626 myCollation = ucol_open("", &status);
6627 /*
6628 ucol_setStrength(myCollation, UCOL_TERTIARY);
6629 */
6630 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6631 if(U_FAILURE(status)) {
6632 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6633 return;
6634 }
6635
6636 /* build collator tertiary with numeric and reordering */
6637 myReorderCollation = ucol_open("", &status);
6638 /*
6639 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
6640 */
6641 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6642 ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
6643 if(U_FAILURE(status)) {
6644 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6645 return;
6646 }
6647
6648 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
6649 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
6650 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
6651 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
6652
6653 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
6654 log_err_status(status, "ERROR: couldn't generate sort keys\n");
6655 return;
6656 }
6657 collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6658 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6659 /*
6660 fprintf(stderr, "\tcollResult = %x\n", collResult);
6661 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
6662 fprintf(stderr, "\nfortyS\n");
6663 for (i = 0; i < fortyS_sortKey_Length; i++) {
6664 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
6665 }
6666 fprintf(stderr, "\nfortyThreeP\n");
6667 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
6668 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
6669 }
6670 */
6671 if (collResult != collResultReorder) {
6672 log_err_status(status, "ERROR: collation results should have been the same.\n");
6673 return;
6674 }
6675
6676 ucol_close(myCollation);
6677 ucol_close(myReorderCollation);
6678 }
6679
compare_uint8_t_arrays(const uint8_t * a,const uint8_t * b)6680 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6681 {
6682 for (; *a == *b; ++a, ++b) {
6683 if (*a == 0) {
6684 return 0;
6685 }
6686 }
6687 return (*a < *b ? -1 : 1);
6688 }
6689
TestImportRulesDeWithPhonebook(void)6690 static void TestImportRulesDeWithPhonebook(void)
6691 {
6692 const char* normalRules[] = {
6693 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
6694 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
6695 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
6696 };
6697 const OneTestCase normalTests[] = {
6698 { {0x00e6}, {0x00c6}, UCOL_LESS},
6699 { {0x00fc}, {0x00dc}, UCOL_GREATER},
6700 };
6701
6702 const char* importRules[] = {
6703 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
6704 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6705 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6706 };
6707 const OneTestCase importTests[] = {
6708 { {0x00e6}, {0x00c6}, UCOL_LESS},
6709 { {0x00fc}, {0x00dc}, UCOL_LESS},
6710 };
6711
6712 doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
6713 doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
6714 }
6715
6716 #if 0
6717 static void TestImportRulesFiWithEor(void)
6718 {
6719 /* DUCET. */
6720 const char* defaultRules[] = {
6721 "&a<b", /* Dummy rule. */
6722 };
6723
6724 const OneTestCase defaultTests[] = {
6725 { {0x0110}, {0x00F0}, UCOL_LESS},
6726 { {0x00a3}, {0x00a5}, UCOL_LESS},
6727 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6728 };
6729
6730 /* European Ordering rules: ignore currency characters. */
6731 const char* eorRules[] = {
6732 "[import root-u-co-eor]",
6733 };
6734
6735 const OneTestCase eorTests[] = {
6736 { {0x0110}, {0x00F0}, UCOL_LESS},
6737 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6738 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6739 };
6740
6741 const char* fiStdRules[] = {
6742 "[import fi-u-co-standard]",
6743 };
6744
6745 const OneTestCase fiStdTests[] = {
6746 { {0x0110}, {0x00F0}, UCOL_GREATER},
6747 { {0x00a3}, {0x00a5}, UCOL_LESS},
6748 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6749 };
6750
6751 /* Both European Ordering Rules and Fi Standard Rules. */
6752 const char* eorFiStdRules[] = {
6753 "[import root-u-co-eor][import fi-u-co-standard]",
6754 };
6755
6756 /* This is essentially same as the one before once fi.txt is updated with import. */
6757 const char* fiEorRules[] = {
6758 "[import fi-u-co-eor]",
6759 };
6760
6761 const OneTestCase fiEorTests[] = {
6762 { {0x0110}, {0x00F0}, UCOL_GREATER},
6763 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6764 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6765 };
6766
6767 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6768 doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
6769 doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
6770 doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
6771
6772 log_knownIssue("8962", NULL);
6773 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
6774 eor{
6775 Sequence{
6776 "[import root-u-co-eor][import fi-u-co-standard]"
6777 }
6778 Version{"21.0"}
6779 }
6780 */
6781 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
6782
6783 }
6784 #endif
6785
6786 #if 0
6787 /*
6788 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
6789 * the resource files are built with -includeUnihanColl option.
6790 * TODO: Uncomment this function and make it work when unihan rules are built by default.
6791 */
6792 static void TestImportRulesCJKWithUnihan(void)
6793 {
6794 /* DUCET. */
6795 const char* defaultRules[] = {
6796 "&a<b", /* Dummy rule. */
6797 };
6798
6799 const OneTestCase defaultTests[] = {
6800 { {0x3402}, {0x4e1e}, UCOL_GREATER},
6801 };
6802
6803 /* European Ordering rules: ignore currency characters. */
6804 const char* unihanRules[] = {
6805 "[import ko-u-co-unihan]",
6806 };
6807
6808 const OneTestCase unihanTests[] = {
6809 { {0x3402}, {0x4e1e}, UCOL_LESS},
6810 };
6811
6812 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6813 doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
6814
6815 }
6816 #endif
6817
TestImport(void)6818 static void TestImport(void)
6819 {
6820 UCollator* vicoll;
6821 UCollator* escoll;
6822 UCollator* viescoll;
6823 UCollator* importviescoll;
6824 UParseError error;
6825 UErrorCode status = U_ZERO_ERROR;
6826 UChar* virules;
6827 int32_t viruleslength;
6828 UChar* esrules;
6829 int32_t esruleslength;
6830 UChar* viesrules;
6831 int32_t viesruleslength;
6832 char srules[500] = "[import vi][import es]";
6833 UChar rules[500];
6834 uint32_t length = 0;
6835 int32_t itemCount;
6836 int32_t i, k;
6837 UChar32 start;
6838 UChar32 end;
6839 UChar str[500];
6840 int32_t strLength;
6841
6842 uint8_t sk1[500];
6843 uint8_t sk2[500];
6844
6845 UBool b;
6846 USet* tailoredSet;
6847 USet* importTailoredSet;
6848
6849
6850 vicoll = ucol_open("vi", &status);
6851 if(U_FAILURE(status)){
6852 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6853 return;
6854 }
6855
6856 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6857 escoll = ucol_open("es", &status);
6858 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6859 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6860 viesrules[0] = 0;
6861 u_strcat(viesrules, virules);
6862 u_strcat(viesrules, esrules);
6863 viesruleslength = viruleslength + esruleslength;
6864 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6865
6866 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6867 length = u_unescape(srules, rules, 500);
6868 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6869 if(U_FAILURE(status)){
6870 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6871 return;
6872 }
6873
6874 tailoredSet = ucol_getTailoredSet(viescoll, &status);
6875 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6876
6877 if(!uset_equals(tailoredSet, importTailoredSet)){
6878 log_err("Tailored sets not equal");
6879 }
6880
6881 uset_close(importTailoredSet);
6882
6883 itemCount = uset_getItemCount(tailoredSet);
6884
6885 for( i = 0; i < itemCount; i++){
6886 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6887 if(strLength < 2){
6888 for (; start <= end; start++){
6889 k = 0;
6890 U16_APPEND(str, k, 500, start, b);
6891 (void)b; /* Suppress set but not used warning. */
6892 ucol_getSortKey(viescoll, str, 1, sk1, 500);
6893 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6894 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6895 log_err("Sort key for %s not equal\n", str);
6896 break;
6897 }
6898 }
6899 }else{
6900 ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6901 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6902 if(compare_uint8_t_arrays(sk1, sk2) != 0){
6903 log_err("ZZSort key for %s not equal\n", str);
6904 break;
6905 }
6906
6907 }
6908 }
6909
6910 uset_close(tailoredSet);
6911
6912 uprv_free(viesrules);
6913
6914 ucol_close(vicoll);
6915 ucol_close(escoll);
6916 ucol_close(viescoll);
6917 ucol_close(importviescoll);
6918 }
6919
TestImportWithType(void)6920 static void TestImportWithType(void)
6921 {
6922 UCollator* vicoll;
6923 UCollator* decoll;
6924 UCollator* videcoll;
6925 UCollator* importvidecoll;
6926 UParseError error;
6927 UErrorCode status = U_ZERO_ERROR;
6928 const UChar* virules;
6929 int32_t viruleslength;
6930 const UChar* derules;
6931 int32_t deruleslength;
6932 UChar* viderules;
6933 int32_t videruleslength;
6934 const char srules[500] = "[import vi][import de-u-co-phonebk]";
6935 UChar rules[500];
6936 uint32_t length = 0;
6937 int32_t itemCount;
6938 int32_t i, k;
6939 UChar32 start;
6940 UChar32 end;
6941 UChar str[500];
6942 int32_t strLength;
6943
6944 uint8_t sk1[500];
6945 uint8_t sk2[500];
6946
6947 USet* tailoredSet;
6948 USet* importTailoredSet;
6949
6950 vicoll = ucol_open("vi", &status);
6951 if(U_FAILURE(status)){
6952 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6953 return;
6954 }
6955 virules = ucol_getRules(vicoll, &viruleslength);
6956 /* decoll = ucol_open("de@collation=phonebook", &status); */
6957 decoll = ucol_open("de-u-co-phonebk", &status);
6958 if(U_FAILURE(status)){
6959 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6960 return;
6961 }
6962
6963
6964 derules = ucol_getRules(decoll, &deruleslength);
6965 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6966 viderules[0] = 0;
6967 u_strcat(viderules, virules);
6968 u_strcat(viderules, derules);
6969 videruleslength = viruleslength + deruleslength;
6970 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6971
6972 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6973 length = u_unescape(srules, rules, 500);
6974 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6975 if(U_FAILURE(status)){
6976 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6977 return;
6978 }
6979
6980 tailoredSet = ucol_getTailoredSet(videcoll, &status);
6981 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6982
6983 if(!uset_equals(tailoredSet, importTailoredSet)){
6984 log_err("Tailored sets not equal");
6985 }
6986
6987 uset_close(importTailoredSet);
6988
6989 itemCount = uset_getItemCount(tailoredSet);
6990
6991 for( i = 0; i < itemCount; i++){
6992 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6993 if(strLength < 2){
6994 for (; start <= end; start++){
6995 k = 0;
6996 U16_APPEND_UNSAFE(str, k, start);
6997 ucol_getSortKey(videcoll, str, 1, sk1, 500);
6998 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6999 if(compare_uint8_t_arrays(sk1, sk2) != 0){
7000 log_err("Sort key for %s not equal\n", str);
7001 break;
7002 }
7003 }
7004 }else{
7005 ucol_getSortKey(videcoll, str, strLength, sk1, 500);
7006 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
7007 if(compare_uint8_t_arrays(sk1, sk2) != 0){
7008 log_err("Sort key for %s not equal\n", str);
7009 break;
7010 }
7011
7012 }
7013 }
7014
7015 uset_close(tailoredSet);
7016
7017 uprv_free(viderules);
7018
7019 ucol_close(videcoll);
7020 ucol_close(importvidecoll);
7021 ucol_close(vicoll);
7022 ucol_close(decoll);
7023 }
7024
7025 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
7026 static const UChar longUpperStr1[]= { /* 155 chars */
7027 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
7028 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
7029 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
7030 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
7031 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
7032 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
7033 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
7034 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
7035 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
7036 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
7037 };
7038
7039 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
7040 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
7041 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
7042 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
7043 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
7044 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
7045 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
7046 };
7047
7048 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
7049 static const UChar longUpperStr3[]= { /* 324 chars */
7050 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7051 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7052 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7053 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7054 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7055 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7056 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7057 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7058 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7059 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7060 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7061 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
7062 };
7063
7064 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
7065
7066 typedef struct {
7067 const UChar * longUpperStrPtr;
7068 int32_t longUpperStrLen;
7069 } LongUpperStrItem;
7070
7071 /* String pointers must be in reverse collation order of the corresponding strings */
7072 static const LongUpperStrItem longUpperStrItems[] = {
7073 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
7074 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
7075 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
7076 { NULL, 0 }
7077 };
7078
7079 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
7080
7081 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
TestCaseLevelBufferOverflow(void)7082 static void TestCaseLevelBufferOverflow(void)
7083 {
7084 UErrorCode status = U_ZERO_ERROR;
7085 UCollator * ucol = ucol_open("root", &status);
7086 if ( U_SUCCESS(status) ) {
7087 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
7088 if ( U_SUCCESS(status) ) {
7089 const LongUpperStrItem * itemPtr;
7090 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
7091 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
7092 int32_t sortKeyLen;
7093 if (itemPtr > longUpperStrItems) {
7094 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
7095 }
7096 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
7097 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
7098 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
7099 break;
7100 }
7101 if ( itemPtr > longUpperStrItems ) {
7102 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
7103 if (compareResult >= 0) {
7104 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
7105 }
7106 }
7107 }
7108 } else {
7109 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
7110 }
7111 ucol_close(ucol);
7112 } else {
7113 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
7114 }
7115 }
7116
7117
7118 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
7119
addMiscCollTest(TestNode ** root)7120 void addMiscCollTest(TestNode** root)
7121 {
7122 TEST(TestRuleOptions);
7123 TEST(TestBeforePrefixFailure);
7124 TEST(TestContractionClosure);
7125 TEST(TestPrefixCompose);
7126 TEST(TestStrCollIdenticalPrefix);
7127 TEST(TestPrefix);
7128 TEST(TestNewJapanese);
7129 /*TEST(TestLimitations);*/
7130 TEST(TestNonChars);
7131 TEST(TestExtremeCompression);
7132 TEST(TestSurrogates);
7133 TEST(TestVariableTopSetting);
7134 TEST(TestBocsuCoverage);
7135 TEST(TestCyrillicTailoring);
7136 TEST(TestCase);
7137 TEST(IncompleteCntTest);
7138 TEST(BlackBirdTest);
7139 TEST(FunkyATest);
7140 TEST(BillFairmanTest);
7141 TEST(RamsRulesTest);
7142 TEST(IsTailoredTest);
7143 TEST(TestCollations);
7144 TEST(TestChMove);
7145 TEST(TestImplicitTailoring);
7146 TEST(TestFCDProblem);
7147 TEST(TestEmptyRule);
7148 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
7149 TEST(TestJ815);
7150 /*TEST(TestJ831);*/ /* we changed lv locale */
7151 TEST(TestBefore);
7152 TEST(TestRedundantRules);
7153 TEST(TestExpansionSyntax);
7154 TEST(TestHangulTailoring);
7155 TEST(TestUCARules);
7156 TEST(TestIncrementalNormalize);
7157 TEST(TestComposeDecompose);
7158 TEST(TestCompressOverlap);
7159 TEST(TestContraction);
7160 TEST(TestExpansion);
7161 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
7162 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
7163 TEST(TestOptimize);
7164 TEST(TestSuppressContractions);
7165 TEST(Alexis2);
7166 TEST(TestHebrewUCA);
7167 TEST(TestPartialSortKeyTermination);
7168 TEST(TestSettings);
7169 TEST(TestEquals);
7170 TEST(TestJ2726);
7171 TEST(NullRule);
7172 TEST(TestNumericCollation);
7173 TEST(TestTibetanConformance);
7174 TEST(TestPinyinProblem);
7175 TEST(TestImplicitGeneration);
7176 TEST(TestSeparateTrees);
7177 TEST(TestBeforePinyin);
7178 TEST(TestBeforeTightening);
7179 /*TEST(TestMoreBefore);*/
7180 TEST(TestTailorNULL);
7181 TEST(TestUpperFirstQuaternary);
7182 TEST(TestJ4960);
7183 TEST(TestJ5223);
7184 TEST(TestJ5232);
7185 TEST(TestJ5367);
7186 TEST(TestHiragana);
7187 TEST(TestSortKeyConsistency);
7188 TEST(TestVI5913); /* VI, RO tailored rules */
7189 TEST(TestCroatianSortKey);
7190 TEST(TestTailor6179);
7191 TEST(TestUCAPrecontext);
7192 TEST(TestOutOfBuffer5468);
7193 TEST(TestSameStrengthList);
7194
7195 TEST(TestSameStrengthListQuoted);
7196 TEST(TestSameStrengthListSupplemental);
7197 TEST(TestSameStrengthListQwerty);
7198 TEST(TestSameStrengthListQuotedQwerty);
7199 TEST(TestSameStrengthListRanges);
7200 TEST(TestSameStrengthListSupplementalRanges);
7201 TEST(TestSpecialCharacters);
7202 TEST(TestPrivateUseCharacters);
7203 TEST(TestPrivateUseCharactersInList);
7204 TEST(TestPrivateUseCharactersInRange);
7205 TEST(TestInvalidListsAndRanges);
7206 TEST(TestImportRulesDeWithPhonebook);
7207 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
7208 /* TEST(TestImportRulesCJKWithUnihan); */
7209 TEST(TestImport);
7210 TEST(TestImportWithType);
7211
7212 TEST(TestBeforeRuleWithScriptReordering);
7213 TEST(TestNonLeadBytesDuringCollationReordering);
7214 TEST(TestReorderingAPI);
7215 TEST(TestReorderingAPIWithRuleCreatedCollator);
7216 TEST(TestEquivalentReorderingScripts);
7217 TEST(TestGreekFirstReorder);
7218 TEST(TestGreekLastReorder);
7219 TEST(TestNonScriptReorder);
7220 TEST(TestHaniReorder);
7221 TEST(TestHaniReorderWithOtherRules);
7222 TEST(TestMultipleReorder);
7223 TEST(TestReorderingAcrossCloning);
7224 TEST(TestReorderWithNumericCollation);
7225
7226 TEST(TestCaseLevelBufferOverflow);
7227 }
7228
7229 #endif /* #if !UCONFIG_NO_COLLATION */
7230