1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
9 *
10 * File cmsccoll.C
11 *
12 *******************************************************************************/
13 /**
14 * These are the tests specific to ICU 1.8 and above, that I didn't know where
15 * to fit.
16 */
17
18 #include <stdio.h>
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_COLLATION
23
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
27 #include "cintltst.h"
28 #include "ccolltst.h"
29 #include "callcoll.h"
30 #include "unicode/ustring.h"
31 #include "string.h"
32 #include "ucol_imp.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
41 #include "uparse.h"
42 #include "putilimp.h"
43
44
45 #define MAX_TOKEN_LEN 16
46
47 typedef UCollationResult tst_strcoll(void *collator, const int object,
48 const UChar *source, const int sLen,
49 const UChar *target, const int tLen);
50
51
52
53 const static char cnt1[][10] = {
54
55 "AA",
56 "AC",
57 "AZ",
58 "AQ",
59 "AB",
60 "ABZ",
61 "ABQ",
62 "Z",
63 "ABC",
64 "Q",
65 "B"
66 };
67
68 const static char cnt2[][10] = {
69 "DA",
70 "DAD",
71 "DAZ",
72 "MAR",
73 "Z",
74 "DAVIS",
75 "MARK",
76 "DAV",
77 "DAVI"
78 };
79
IncompleteCntTest(void)80 static void IncompleteCntTest(void)
81 {
82 UErrorCode status = U_ZERO_ERROR;
83 UChar temp[90];
84 UChar t1[90];
85 UChar t2[90];
86
87 UCollator *coll = NULL;
88 uint32_t i = 0, j = 0;
89 uint32_t size = 0;
90
91 u_uastrcpy(temp, " & Z < ABC < Q < B");
92
93 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
94
95 if(U_SUCCESS(status)) {
96 size = UPRV_LENGTHOF(cnt1);
97 for(i = 0; i < size-1; i++) {
98 for(j = i+1; j < size; j++) {
99 UCollationElements *iter;
100 u_uastrcpy(t1, cnt1[i]);
101 u_uastrcpy(t2, cnt1[j]);
102 doTest(coll, t1, t2, UCOL_LESS);
103 /* synwee : added collation element iterator test */
104 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
105 if (U_FAILURE(status)) {
106 log_err("Creation of iterator failed\n");
107 break;
108 }
109 backAndForth(iter);
110 ucol_closeElements(iter);
111 }
112 }
113 }
114
115 ucol_close(coll);
116
117
118 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
119 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
120
121 if(U_SUCCESS(status)) {
122 size = UPRV_LENGTHOF(cnt2);
123 for(i = 0; i < size-1; i++) {
124 for(j = i+1; j < size; j++) {
125 UCollationElements *iter;
126 u_uastrcpy(t1, cnt2[i]);
127 u_uastrcpy(t2, cnt2[j]);
128 doTest(coll, t1, t2, UCOL_LESS);
129
130 /* synwee : added collation element iterator test */
131 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
132 if (U_FAILURE(status)) {
133 log_err("Creation of iterator failed\n");
134 break;
135 }
136 backAndForth(iter);
137 ucol_closeElements(iter);
138 }
139 }
140 }
141
142 ucol_close(coll);
143
144
145 }
146
147 const static char shifted[][20] = {
148 "black bird",
149 "black-bird",
150 "blackbird",
151 "black Bird",
152 "black-Bird",
153 "blackBird",
154 "black birds",
155 "black-birds",
156 "blackbirds"
157 };
158
159 const static UCollationResult shiftedTert[] = {
160 UCOL_EQUAL,
161 UCOL_EQUAL,
162 UCOL_EQUAL,
163 UCOL_LESS,
164 UCOL_EQUAL,
165 UCOL_EQUAL,
166 UCOL_LESS,
167 UCOL_EQUAL,
168 UCOL_EQUAL
169 };
170
171 const static char nonignorable[][20] = {
172 "black bird",
173 "black Bird",
174 "black birds",
175 "black-bird",
176 "black-Bird",
177 "black-birds",
178 "blackbird",
179 "blackBird",
180 "blackbirds"
181 };
182
BlackBirdTest(void)183 static void BlackBirdTest(void) {
184 UErrorCode status = U_ZERO_ERROR;
185 UChar t1[90];
186 UChar t2[90];
187
188 uint32_t i = 0, j = 0;
189 uint32_t size = 0;
190 UCollator *coll = ucol_open("en_US", &status);
191
192 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
193 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
194
195 if(U_SUCCESS(status)) {
196 size = UPRV_LENGTHOF(nonignorable);
197 for(i = 0; i < size-1; i++) {
198 for(j = i+1; j < size; j++) {
199 u_uastrcpy(t1, nonignorable[i]);
200 u_uastrcpy(t2, nonignorable[j]);
201 doTest(coll, t1, t2, UCOL_LESS);
202 }
203 }
204 }
205
206 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
207 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
208
209 if(U_SUCCESS(status)) {
210 size = UPRV_LENGTHOF(shifted);
211 for(i = 0; i < size-1; i++) {
212 for(j = i+1; j < size; j++) {
213 u_uastrcpy(t1, shifted[i]);
214 u_uastrcpy(t2, shifted[j]);
215 doTest(coll, t1, t2, UCOL_LESS);
216 }
217 }
218 }
219
220 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
221 if(U_SUCCESS(status)) {
222 size = UPRV_LENGTHOF(shifted);
223 for(i = 1; i < size; i++) {
224 u_uastrcpy(t1, shifted[i-1]);
225 u_uastrcpy(t2, shifted[i]);
226 doTest(coll, t1, t2, shiftedTert[i]);
227 }
228 }
229
230 ucol_close(coll);
231 }
232
233 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
234 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236 {0x0041/*'A'*/, 0x0300, 0x0000},
237 {0x00C0, 0x0301, 0x0000},
238 /* this would work with forced normalization */
239 {0x00C0, 0x0316, 0x0000}
240 };
241
242 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
243 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
245 {0x00C0, 0},
246 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247 /* this would work with forced normalization */
248 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249 };
250
251 const static UCollationResult results[] = {
252 UCOL_GREATER,
253 UCOL_EQUAL,
254 UCOL_EQUAL,
255 UCOL_GREATER,
256 UCOL_EQUAL
257 };
258
FunkyATest(void)259 static void FunkyATest(void)
260 {
261
262 int32_t i;
263 UErrorCode status = U_ZERO_ERROR;
264 UCollator *myCollation;
265 myCollation = ucol_open("en_US", &status);
266 if(U_FAILURE(status)){
267 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
268 return;
269 }
270 log_verbose("Testing some A letters, for some reason\n");
271 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
272 ucol_setStrength(myCollation, UCOL_TERTIARY);
273 for (i = 0; i < 4 ; i++)
274 {
275 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
276 }
277 ucol_close(myCollation);
278 }
279
280 UColAttributeValue caseFirst[] = {
281 UCOL_OFF,
282 UCOL_LOWER_FIRST,
283 UCOL_UPPER_FIRST
284 };
285
286
287 UColAttributeValue alternateHandling[] = {
288 UCOL_NON_IGNORABLE,
289 UCOL_SHIFTED
290 };
291
292 UColAttributeValue caseLevel[] = {
293 UCOL_OFF,
294 UCOL_ON
295 };
296
297 UColAttributeValue strengths[] = {
298 UCOL_PRIMARY,
299 UCOL_SECONDARY,
300 UCOL_TERTIARY,
301 UCOL_QUATERNARY,
302 UCOL_IDENTICAL
303 };
304
305 #if 0
306 static const char * strengthsC[] = {
307 "UCOL_PRIMARY",
308 "UCOL_SECONDARY",
309 "UCOL_TERTIARY",
310 "UCOL_QUATERNARY",
311 "UCOL_IDENTICAL"
312 };
313
314 static const char * caseFirstC[] = {
315 "UCOL_OFF",
316 "UCOL_LOWER_FIRST",
317 "UCOL_UPPER_FIRST"
318 };
319
320
321 static const char * alternateHandlingC[] = {
322 "UCOL_NON_IGNORABLE",
323 "UCOL_SHIFTED"
324 };
325
326 static const char * caseLevelC[] = {
327 "UCOL_OFF",
328 "UCOL_ON"
329 };
330
331 /* not used currently - does not test only prints */
332 static void PrintMarkDavis(void)
333 {
334 UErrorCode status = U_ZERO_ERROR;
335 UChar m[256];
336 uint8_t sortkey[256];
337 UCollator *coll = ucol_open("en_US", &status);
338 uint32_t h,i,j,k, sortkeysize;
339 uint32_t sizem = 0;
340 char buffer[512];
341 uint32_t len = 512;
342
343 log_verbose("PrintMarkDavis");
344
345 u_uastrcpy(m, "Mark Davis");
346 sizem = u_strlen(m);
347
348
349 m[1] = 0xe4;
350
351 for(i = 0; i<sizem; i++) {
352 fprintf(stderr, "\\u%04X ", m[i]);
353 }
354 fprintf(stderr, "\n");
355
356 for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
357 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
358 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
359
360 for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
361 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
362 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
363
364 for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
365 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
366 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
367
368 for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
369 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
370 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
371 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
372 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
373 }
374
375 }
376
377 }
378
379 }
380 }
381 #endif
382
BillFairmanTest(void)383 static void BillFairmanTest(void) {
384 /*
385 ** check for actual locale via ICU resource bundles
386 **
387 ** lp points to the original locale ("fr_FR_....")
388 */
389
390 UResourceBundle *lr,*cr;
391 UErrorCode lec = U_ZERO_ERROR;
392 const char *lp = "fr_FR_you_ll_never_find_this_locale";
393
394 log_verbose("BillFairmanTest\n");
395
396 lr = ures_open(NULL,lp,&lec);
397 if (lr) {
398 cr = ures_getByKey(lr,"collations",0,&lec);
399 if (cr) {
400 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
401 if (lp) {
402 if (U_SUCCESS(lec)) {
403 if(strcmp(lp, "fr") != 0) {
404 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
405 }
406 }
407 }
408 ures_close(cr);
409 }
410 ures_close(lr);
411 }
412 }
413
414 const static char chTest[][20] = {
415 "c",
416 "C",
417 "ca", "cb", "cx", "cy", "CZ",
418 "c\\u030C", "C\\u030C",
419 "h",
420 "H",
421 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422 "ch", "cH", "Ch", "CH",
423 "cha", "charly", "che", "chh", "chch", "chr",
424 "i", "I", "iarly",
425 "r", "R",
426 "r\\u030C", "R\\u030C",
427 "s",
428 "S",
429 "s\\u030C", "S\\u030C",
430 "z", "Z",
431 "z\\u030C", "Z\\u030C"
432 };
433
TestChMove(void)434 static void TestChMove(void) {
435 UChar t1[256] = {0};
436 UChar t2[256] = {0};
437
438 uint32_t i = 0, j = 0;
439 uint32_t size = 0;
440 UErrorCode status = U_ZERO_ERROR;
441
442 UCollator *coll = ucol_open("cs", &status);
443
444 if(U_SUCCESS(status)) {
445 size = UPRV_LENGTHOF(chTest);
446 for(i = 0; i < size-1; i++) {
447 for(j = i+1; j < size; j++) {
448 u_unescape(chTest[i], t1, 256);
449 u_unescape(chTest[j], t2, 256);
450 doTest(coll, t1, t2, UCOL_LESS);
451 }
452 }
453 }
454 else {
455 log_data_err("Can't open collator");
456 }
457 ucol_close(coll);
458 }
459
460
461
462
463 /*
464 const static char impTest[][20] = {
465 "\\u4e00",
466 "a",
467 "A",
468 "b",
469 "B",
470 "\\u4e01"
471 };
472 */
473
474
TestImplicitTailoring(void)475 static void TestImplicitTailoring(void) {
476 static const struct {
477 const char *rules;
478 const char *data[10];
479 const uint32_t len;
480 } tests[] = {
481 {
482 /* Tailor b and c before U+4E00. */
483 "&[before 1]\\u4e00 < b < c "
484 /* Now, before U+4E00 is c; put d and e after that. */
485 "&[before 1]\\u4e00 < d < e",
486 { "b", "c", "d", "e", "\\u4e00"}, 5 },
487 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
490 };
491
492 int32_t i = 0;
493
494 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
495 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
496 }
497
498 /*
499 UChar t1[256] = {0};
500 UChar t2[256] = {0};
501
502 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
503
504 uint32_t i = 0, j = 0;
505 uint32_t size = 0;
506 uint32_t ruleLen = 0;
507 UErrorCode status = U_ZERO_ERROR;
508 UCollator *coll = NULL;
509 ruleLen = u_unescape(rule, t1, 256);
510
511 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
512
513 if(U_SUCCESS(status)) {
514 size = UPRV_LENGTHOF(impTest);
515 for(i = 0; i < size-1; i++) {
516 for(j = i+1; j < size; j++) {
517 u_unescape(impTest[i], t1, 256);
518 u_unescape(impTest[j], t2, 256);
519 doTest(coll, t1, t2, UCOL_LESS);
520 }
521 }
522 }
523 else {
524 log_err("Can't open collator");
525 }
526 ucol_close(coll);
527 */
528 }
529
TestFCDProblem(void)530 static void TestFCDProblem(void) {
531 UChar t1[256] = {0};
532 UChar t2[256] = {0};
533
534 const char *s1 = "\\u0430\\u0306\\u0325";
535 const char *s2 = "\\u04D1\\u0325";
536
537 UErrorCode status = U_ZERO_ERROR;
538 UCollator *coll = ucol_open("", &status);
539 u_unescape(s1, t1, 256);
540 u_unescape(s2, t2, 256);
541
542 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
543 doTest(coll, t1, t2, UCOL_EQUAL);
544
545 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
546 doTest(coll, t1, t2, UCOL_EQUAL);
547
548 ucol_close(coll);
549 }
550
551 /*
552 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553 We're only using NFC/NFD in this test.
554 */
555 #define NORM_BUFFER_TEST_LEN 18
556 typedef struct {
557 UChar32 u;
558 UChar NFC[NORM_BUFFER_TEST_LEN];
559 UChar NFD[NORM_BUFFER_TEST_LEN];
560 } tester;
561
TestComposeDecompose(void)562 static void TestComposeDecompose(void) {
563 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564 static const UChar UNICODESET_STR[] = {
565 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568 };
569 int32_t noOfLoc;
570 int32_t i = 0, j = 0;
571
572 UErrorCode status = U_ZERO_ERROR;
573 const char *locName = NULL;
574 uint32_t nfcSize;
575 uint32_t nfdSize;
576 tester **t;
577 uint32_t noCases = 0;
578 UCollator *coll = NULL;
579 UChar32 u = 0;
580 UChar comp[NORM_BUFFER_TEST_LEN];
581 uint32_t len = 0;
582 UCollationElements *iter;
583 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
584 int32_t charsToTestSize;
585
586 noOfLoc = uloc_countAvailable();
587
588 coll = ucol_open("", &status);
589 if (U_FAILURE(status)) {
590 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
591 return;
592 }
593 charsToTestSize = uset_size(charsToTest);
594 if (charsToTestSize <= 0) {
595 log_err("Set was zero. Missing data?\n");
596 return;
597 }
598 t = (tester **)malloc(charsToTestSize * sizeof(tester *));
599 t[0] = (tester *)malloc(sizeof(tester));
600 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
601
602 for(u = 0; u < charsToTestSize; u++) {
603 UChar32 ch = uset_charAt(charsToTest, u);
604 len = 0;
605 U16_APPEND_UNSAFE(comp, len, ch);
606 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
607 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
608
609 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
610 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
611 t[noCases]->u = ch;
612 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
613 u_strncpy(t[noCases]->NFC, comp, len);
614 t[noCases]->NFC[len] = 0;
615 }
616 noCases++;
617 t[noCases] = (tester *)malloc(sizeof(tester));
618 uprv_memset(t[noCases], 0, sizeof(tester));
619 }
620 }
621 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
622 uset_close(charsToTest);
623 charsToTest = NULL;
624
625 for(u=0; u<(UChar32)noCases; u++) {
626 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
627 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
628 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
629 }
630 }
631 /*
632 for(u = 0; u < charsToTestSize; u++) {
633 if(!(u&0xFFFF)) {
634 log_verbose("%08X ", u);
635 }
636 uprv_memset(t[noCases], 0, sizeof(tester));
637 t[noCases]->u = u;
638 len = 0;
639 U16_APPEND_UNSAFE(comp, len, u);
640 comp[len] = 0;
641 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
642 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
643 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
644 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
645 }
646 */
647
648 ucol_close(coll);
649
650 log_verbose("Testing locales, number of cases = %i\n", noCases);
651 for(i = 0; i<noOfLoc; i++) {
652 status = U_ZERO_ERROR;
653 locName = uloc_getAvailable(i);
654 if(hasCollationElements(locName)) {
655 char cName[256];
656 UChar name[256];
657 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
658
659 for(j = 0; j<nameSize; j++) {
660 cName[j] = (char)name[j];
661 }
662 cName[nameSize] = 0;
663 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
664
665 coll = ucol_open(locName, &status);
666 ucol_setStrength(coll, UCOL_IDENTICAL);
667 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
668
669 for(u=0; u<(UChar32)noCases; u++) {
670 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
671 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
672 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
673 log_verbose("Testing NFC\n");
674 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
675 backAndForth(iter);
676 log_verbose("Testing NFD\n");
677 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
678 backAndForth(iter);
679 }
680 }
681 ucol_closeElements(iter);
682 ucol_close(coll);
683 }
684 }
685 for(u = 0; u <= (UChar32)noCases; u++) {
686 free(t[u]);
687 }
688 free(t);
689 }
690
TestEmptyRule(void)691 static void TestEmptyRule(void) {
692 UErrorCode status = U_ZERO_ERROR;
693 UChar rulez[] = { 0 };
694 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
695
696 ucol_close(coll);
697 }
698
TestUCARules(void)699 static void TestUCARules(void) {
700 UErrorCode status = U_ZERO_ERROR;
701 UChar b[256];
702 UChar *rules = b;
703 uint32_t ruleLen = 0;
704 UCollator *UCAfromRules = NULL;
705 UCollator *coll = ucol_open("", &status);
706 if(status == U_FILE_ACCESS_ERROR) {
707 log_data_err("Is your data around?\n");
708 return;
709 } else if(U_FAILURE(status)) {
710 log_err("Error opening collator\n");
711 return;
712 }
713 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
714
715 log_verbose("TestUCARules\n");
716 if(ruleLen > 256) {
717 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
718 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
719 }
720 log_verbose("Rules length is %d\n", ruleLen);
721 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
722 if(U_SUCCESS(status)) {
723 ucol_close(UCAfromRules);
724 } else {
725 log_verbose("Unable to create a collator from UCARules!\n");
726 }
727 /*
728 u_unescape(blah, b, 256);
729 ucol_getSortKey(coll, b, 1, res, 256);
730 */
731 ucol_close(coll);
732 if(rules != b) {
733 free(rules);
734 }
735 }
736
737
738 /* Pinyin tonal order */
739 /*
740 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
741 (w/macron)< (w/acute)< (w/caron)< (w/grave)
742 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
743 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
744 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
745 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
746 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
747 .. (\u00fc)
748
749 However, in testing we got the following order:
750 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
751 (w/acute)< (w/grave)< (w/caron)< (w/macron)
752 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
753 .. (\u0113)
754 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
755 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
756 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
757 .. (\u01d8)
758 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
759 */
760
TestBefore(void)761 static void TestBefore(void) {
762 const static char *data[] = {
763 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
764 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
765 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
766 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
767 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
768 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
769 };
770 genericRulesStarter(
771 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
772 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
773 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
774 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
775 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
776 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
777 data, UPRV_LENGTHOF(data));
778 }
779
780 #if 0
781 /* superceded by TestBeforePinyin */
782 static void TestJ784(void) {
783 const static char *data[] = {
784 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
785 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
786 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
787 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
788 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
789 "\\u00fc",
790 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
791 };
792 genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
793 }
794 #endif
795
796 #if 0
797 /* superceded by the changes to the lv locale */
798 static void TestJ831(void) {
799 const static char *data[] = {
800 "I",
801 "i",
802 "Y",
803 "y"
804 };
805 genericLocaleStarter("lv", data, UPRV_LENGTHOF(data));
806 }
807 #endif
808
TestJ815(void)809 static void TestJ815(void) {
810 const static char *data[] = {
811 "aa",
812 "Aa",
813 "ab",
814 "Ab",
815 "ad",
816 "Ad",
817 "ae",
818 "Ae",
819 "\\u00e6",
820 "\\u00c6",
821 "af",
822 "Af",
823 "b",
824 "B"
825 };
826 genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
827 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
828 }
829
830
TestCase(void)831 static void TestCase(void)
832 {
833 const static UChar gRules[MAX_TOKEN_LEN] =
834 /*" & 0 < 1,\u2461<a,A"*/
835 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
836
837 const static UChar testCase[][MAX_TOKEN_LEN] =
838 {
839 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
840 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
841 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
842 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
843 };
844
845 const static UCollationResult caseTestResults[][9] =
846 {
847 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
848 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
849 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
850 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
851 };
852
853 const static UColAttributeValue caseTestAttributes[][2] =
854 {
855 { UCOL_LOWER_FIRST, UCOL_OFF},
856 { UCOL_UPPER_FIRST, UCOL_OFF},
857 { UCOL_LOWER_FIRST, UCOL_ON},
858 { UCOL_UPPER_FIRST, UCOL_ON}
859 };
860 int32_t i,j,k;
861 UErrorCode status = U_ZERO_ERROR;
862 UCollationElements *iter;
863 UCollator *myCollation;
864 myCollation = ucol_open("en_US", &status);
865
866 if(U_FAILURE(status)){
867 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
868 return;
869 }
870 log_verbose("Testing different case settings\n");
871 ucol_setStrength(myCollation, UCOL_TERTIARY);
872
873 for(k = 0; k<4; k++) {
874 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
875 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
876 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
877 for (i = 0; i < 3 ; i++) {
878 for(j = i+1; j<4; j++) {
879 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
880 }
881 }
882 }
883 ucol_close(myCollation);
884
885 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
886 if(U_FAILURE(status)){
887 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
888 return;
889 }
890 log_verbose("Testing different case settings with custom rules\n");
891 ucol_setStrength(myCollation, UCOL_TERTIARY);
892
893 for(k = 0; k<4; k++) {
894 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
895 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
896 for (i = 0; i < 3 ; i++) {
897 for(j = i+1; j<4; j++) {
898 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
899 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
900 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
901 backAndForth(iter);
902 ucol_closeElements(iter);
903 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
904 backAndForth(iter);
905 ucol_closeElements(iter);
906 }
907 }
908 }
909 ucol_close(myCollation);
910 {
911 const static char *lowerFirst[] = {
912 "h",
913 "H",
914 "ch",
915 "Ch",
916 "CH",
917 "cha",
918 "chA",
919 "Cha",
920 "ChA",
921 "CHa",
922 "CHA",
923 "i",
924 "I"
925 };
926
927 const static char *upperFirst[] = {
928 "H",
929 "h",
930 "CH",
931 "Ch",
932 "ch",
933 "CHA",
934 "CHa",
935 "ChA",
936 "Cha",
937 "chA",
938 "cha",
939 "I",
940 "i"
941 };
942 log_verbose("mixed case test\n");
943 log_verbose("lower first, case level off\n");
944 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
945 log_verbose("upper first, case level off\n");
946 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
947 log_verbose("lower first, case level on\n");
948 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
949 log_verbose("upper first, case level on\n");
950 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
951 }
952
953 }
954
TestIncrementalNormalize(void)955 static void TestIncrementalNormalize(void) {
956
957 /*UChar baseA =0x61;*/
958 UChar baseA =0x41;
959 /* UChar baseB = 0x42;*/
960 static const UChar ccMix[] = {0x316, 0x321, 0x300};
961 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
962 /*
963 0x316 is combining grave accent below, cc=220
964 0x321 is combining palatalized hook below, cc=202
965 0x300 is combining grave accent, cc=230
966 */
967
968 #define MAXSLEN 2000
969 /*int maxSLen = 64000;*/
970 int sLen;
971 int i;
972
973 UCollator *coll;
974 UErrorCode status = U_ZERO_ERROR;
975 UCollationResult result;
976
977 int32_t myQ = getTestOption(QUICK_OPTION);
978
979 if(getTestOption(QUICK_OPTION) < 0) {
980 setTestOption(QUICK_OPTION, 1);
981 }
982
983 {
984 /* Test 1. Run very long unnormalized strings, to force overflow of*/
985 /* most buffers along the way.*/
986 UChar strA[MAXSLEN+1];
987 UChar strB[MAXSLEN+1];
988
989 coll = ucol_open("en_US", &status);
990 if(status == U_FILE_ACCESS_ERROR) {
991 log_data_err("Is your data around?\n");
992 return;
993 } else if(U_FAILURE(status)) {
994 log_err("Error opening collator\n");
995 return;
996 }
997 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
998
999 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
1000 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1001 /*for (sLen = 1000; sLen<1001; sLen++) {*/
1002 for (sLen = 500; sLen<501; sLen++) {
1003 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1004 strA[0] = baseA;
1005 strB[0] = baseA;
1006 for (i=1; i<=sLen-1; i++) {
1007 strA[i] = ccMix[i % 3];
1008 strB[sLen-i] = ccMix[i % 3];
1009 }
1010 strA[sLen] = 0;
1011 strB[sLen] = 0;
1012
1013 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
1014 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
1015 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
1016 doTest(coll, strA, strB, UCOL_EQUAL);
1017 }
1018 }
1019
1020 setTestOption(QUICK_OPTION, myQ);
1021
1022
1023 /* Test 2: Non-normal sequence in a string that extends to the last character*/
1024 /* of the string. Checks a couple of edge cases.*/
1025
1026 {
1027 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1028 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1029 ucol_setStrength(coll, UCOL_TERTIARY);
1030 doTest(coll, strA, strB, UCOL_EQUAL);
1031 }
1032
1033 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
1034
1035 {
1036 /* New UCA 3.1.1.
1037 * test below used a code point from Desseret, which sorts differently
1038 * than d800 dc00
1039 */
1040 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1041 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1042 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1043 ucol_setStrength(coll, UCOL_TERTIARY);
1044 doTest(coll, strA, strB, UCOL_GREATER);
1045 }
1046
1047 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
1048
1049 {
1050 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1051 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1052 char sortKeyA[50];
1053 char sortKeyAz[50];
1054 char sortKeyB[50];
1055 char sortKeyBz[50];
1056 int r;
1057
1058 /* there used to be -3 here. Hmmmm.... */
1059 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1060 result = ucol_strcoll(coll, strA, 3, strB, 3);
1061 if (result != UCOL_GREATER) {
1062 log_err("ERROR 1 in test 4\n");
1063 }
1064 result = ucol_strcoll(coll, strA, -1, strB, -1);
1065 if (result != UCOL_EQUAL) {
1066 log_err("ERROR 2 in test 4\n");
1067 }
1068
1069 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1070 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1071 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1072 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1073
1074 r = strcmp(sortKeyA, sortKeyAz);
1075 if (r <= 0) {
1076 log_err("Error 3 in test 4\n");
1077 }
1078 r = strcmp(sortKeyA, sortKeyB);
1079 if (r <= 0) {
1080 log_err("Error 4 in test 4\n");
1081 }
1082 r = strcmp(sortKeyAz, sortKeyBz);
1083 if (r != 0) {
1084 log_err("Error 5 in test 4\n");
1085 }
1086
1087 ucol_setStrength(coll, UCOL_IDENTICAL);
1088 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1089 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1090 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1091 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1092
1093 r = strcmp(sortKeyA, sortKeyAz);
1094 if (r <= 0) {
1095 log_err("Error 6 in test 4\n");
1096 }
1097 r = strcmp(sortKeyA, sortKeyB);
1098 if (r <= 0) {
1099 log_err("Error 7 in test 4\n");
1100 }
1101 r = strcmp(sortKeyAz, sortKeyBz);
1102 if (r != 0) {
1103 log_err("Error 8 in test 4\n");
1104 }
1105 ucol_setStrength(coll, UCOL_TERTIARY);
1106 }
1107
1108
1109 /* Test 5: Null characters in non-normal source strings.*/
1110
1111 {
1112 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1113 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1114 char sortKeyA[50];
1115 char sortKeyAz[50];
1116 char sortKeyB[50];
1117 char sortKeyBz[50];
1118 int r;
1119
1120 result = ucol_strcoll(coll, strA, 6, strB, 6);
1121 if (result != UCOL_GREATER) {
1122 log_err("ERROR 1 in test 5\n");
1123 }
1124 result = ucol_strcoll(coll, strA, -1, strB, -1);
1125 if (result != UCOL_EQUAL) {
1126 log_err("ERROR 2 in test 5\n");
1127 }
1128
1129 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1130 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1131 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1132 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1133
1134 r = strcmp(sortKeyA, sortKeyAz);
1135 if (r <= 0) {
1136 log_err("Error 3 in test 5\n");
1137 }
1138 r = strcmp(sortKeyA, sortKeyB);
1139 if (r <= 0) {
1140 log_err("Error 4 in test 5\n");
1141 }
1142 r = strcmp(sortKeyAz, sortKeyBz);
1143 if (r != 0) {
1144 log_err("Error 5 in test 5\n");
1145 }
1146
1147 ucol_setStrength(coll, UCOL_IDENTICAL);
1148 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1149 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1150 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1151 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1152
1153 r = strcmp(sortKeyA, sortKeyAz);
1154 if (r <= 0) {
1155 log_err("Error 6 in test 5\n");
1156 }
1157 r = strcmp(sortKeyA, sortKeyB);
1158 if (r <= 0) {
1159 log_err("Error 7 in test 5\n");
1160 }
1161 r = strcmp(sortKeyAz, sortKeyBz);
1162 if (r != 0) {
1163 log_err("Error 8 in test 5\n");
1164 }
1165 ucol_setStrength(coll, UCOL_TERTIARY);
1166 }
1167
1168
1169 /* Test 6: Null character as base of a non-normal combining sequence.*/
1170
1171 {
1172 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1173 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1174
1175 result = ucol_strcoll(coll, strA, 5, strB, 5);
1176 if (result != UCOL_LESS) {
1177 log_err("Error 1 in test 6\n");
1178 }
1179 result = ucol_strcoll(coll, strA, -1, strB, -1);
1180 if (result != UCOL_EQUAL) {
1181 log_err("Error 2 in test 6\n");
1182 }
1183 }
1184
1185 ucol_close(coll);
1186 }
1187
1188
1189
1190 #if 0
1191 static void TestGetCaseBit(void) {
1192 static const char *caseBitData[] = {
1193 "a", "A", "ch", "Ch", "CH",
1194 "\\uFF9E", "\\u0009"
1195 };
1196
1197 static const uint8_t results[] = {
1198 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1199 UCOL_UPPER_CASE, UCOL_LOWER_CASE
1200 };
1201
1202 uint32_t i, blen = 0;
1203 UChar b[256] = {0};
1204 UErrorCode status = U_ZERO_ERROR;
1205 UCollator *UCA = ucol_open("", &status);
1206 uint8_t res = 0;
1207
1208 for(i = 0; i<UPRV_LENGTHOF(results); i++) {
1209 blen = u_unescape(caseBitData[i], b, 256);
1210 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1211 if(results[i] != res) {
1212 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1213 }
1214 }
1215 }
1216 #endif
1217
TestHangulTailoring(void)1218 static void TestHangulTailoring(void) {
1219 static const char *koreanData[] = {
1220 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1221 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1222 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1223 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1224 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1225 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1226 };
1227
1228 const char *rules =
1229 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1230 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1231 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1232 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1233 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1234 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1235
1236
1237 UErrorCode status = U_ZERO_ERROR;
1238 UChar rlz[2048] = { 0 };
1239 uint32_t rlen = u_unescape(rules, rlz, 2048);
1240
1241 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1242 if(status == U_FILE_ACCESS_ERROR) {
1243 log_data_err("Is your data around?\n");
1244 return;
1245 } else if(U_FAILURE(status)) {
1246 log_err("Error opening collator\n");
1247 return;
1248 }
1249
1250 log_verbose("Using start of korean rules\n");
1251
1252 if(U_SUCCESS(status)) {
1253 genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
1254 } else {
1255 log_err("Unable to open collator with rules %s\n", rules);
1256 }
1257
1258 ucol_close(coll);
1259
1260 log_verbose("Using ko__LOTUS locale\n");
1261 genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
1262 }
1263
1264 /*
1265 * The secondary/tertiary compression middle byte
1266 * as used by the current implementation.
1267 * Subject to change as the sort key compression changes.
1268 * See class CollationKeys.
1269 */
1270 enum {
1271 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */
1272 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */
1273 };
1274
TestCompressOverlap(void)1275 static void TestCompressOverlap(void) {
1276 UChar secstr[150];
1277 UChar tertstr[150];
1278 UErrorCode status = U_ZERO_ERROR;
1279 UCollator *coll;
1280 uint8_t result[500];
1281 uint32_t resultlen;
1282 int count = 0;
1283 uint8_t *tempptr;
1284
1285 coll = ucol_open("", &status);
1286
1287 if (U_FAILURE(status)) {
1288 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1289 return;
1290 }
1291 while (count < 149) {
1292 secstr[count] = 0x0020; /* [06, 05, 05] */
1293 tertstr[count] = 0x0020;
1294 count ++;
1295 }
1296
1297 /* top down compression ----------------------------------- */
1298 secstr[count] = 0x0332; /* [, 87, 05] */
1299 tertstr[count] = 0x3000; /* [06, 05, 07] */
1300
1301 /* no compression secstr should have 150 secondary bytes, tertstr should
1302 have 150 tertiary bytes.
1303 with correct compression, secstr should have 6 secondary
1304 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1305 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1306 (void)resultlen; /* Suppress set but not used warning. */
1307 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1308 while (*(tempptr + 1) != 1) {
1309 /* the last secondary collation element is not checked since it is not
1310 part of the compression */
1311 if (*tempptr < SEC_COMMON_MIDDLE) {
1312 log_err("Secondary top down compression overlapped\n");
1313 }
1314 tempptr ++;
1315 }
1316
1317 /* tertiary top/bottom/common for en_US is similar to the secondary
1318 top/bottom/common */
1319 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1320 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1321 while (*(tempptr + 1) != 0) {
1322 /* the last secondary collation element is not checked since it is not
1323 part of the compression */
1324 if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1325 log_err("Tertiary top down compression overlapped\n");
1326 }
1327 tempptr ++;
1328 }
1329
1330 /* bottom up compression ------------------------------------- */
1331 secstr[count] = 0;
1332 tertstr[count] = 0;
1333 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1334 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1335 while (*(tempptr + 1) != 1) {
1336 /* the last secondary collation element is not checked since it is not
1337 part of the compression */
1338 if (*tempptr > SEC_COMMON_MIDDLE) {
1339 log_err("Secondary bottom up compression overlapped\n");
1340 }
1341 tempptr ++;
1342 }
1343
1344 /* tertiary top/bottom/common for en_US is similar to the secondary
1345 top/bottom/common */
1346 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1347 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1348 while (*(tempptr + 1) != 0) {
1349 /* the last secondary collation element is not checked since it is not
1350 part of the compression */
1351 if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1352 log_err("Tertiary bottom up compression overlapped\n");
1353 }
1354 tempptr ++;
1355 }
1356
1357 ucol_close(coll);
1358 }
1359
TestCyrillicTailoring(void)1360 static void TestCyrillicTailoring(void) {
1361 static const char *test[] = {
1362 "\\u0410b",
1363 "\\u0410\\u0306a",
1364 "\\u04d0A"
1365 };
1366
1367 /* Russian overrides contractions, so this test is not valid anymore */
1368 /*genericLocaleStarter("ru", test, 3);*/
1369
1370 // Most of the following are commented out because UCA 8.0
1371 // drops most of the Cyrillic contractions from the default order.
1372 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1373
1374 // genericLocaleStarter("root", test, 3);
1375 // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1376 // genericRulesStarter("&Z < \\u0410", test, 3);
1377 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1378 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1379 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1380 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1381 }
1382
TestSuppressContractions(void)1383 static void TestSuppressContractions(void) {
1384
1385 static const char *testNoCont2[] = {
1386 "\\u0410\\u0302a",
1387 "\\u0410\\u0306b",
1388 "\\u0410c"
1389 };
1390 static const char *testNoCont[] = {
1391 "a\\u0410",
1392 "A\\u0410\\u0306",
1393 "\\uFF21\\u0410\\u0302"
1394 };
1395
1396 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1397 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1398 }
1399
TestContraction(void)1400 static void TestContraction(void) {
1401 const static char *testrules[] = {
1402 "&A = AB / B",
1403 "&A = A\\u0306/\\u0306",
1404 "&c = ch / h"
1405 };
1406 const static UChar testdata[][2] = {
1407 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1408 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1409 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1410 };
1411 const static UChar testdata2[][2] = {
1412 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1413 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1414 {0x0063 /* 'c' */, 0x006C /* 'l' */}
1415 };
1416 #if 0
1417 /*
1418 * These pairs of rule strings are not guaranteed to yield the very same mappings.
1419 * In fact, LDML 24 recommends an improved way of creating mappings
1420 * which always yields different mappings for such pairs. See
1421 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1422 */
1423 const static char *testrules3[] = {
1424 "&z < xyz &xyzw << B",
1425 "&z < xyz &xyz << B / w",
1426 "&z < ch &achm << B",
1427 "&z < ch &a << B / chm",
1428 "&\\ud800\\udc00w << B",
1429 "&\\ud800\\udc00 << B / w",
1430 "&a\\ud800\\udc00m << B",
1431 "&a << B / \\ud800\\udc00m",
1432 };
1433 #endif
1434
1435 UErrorCode status = U_ZERO_ERROR;
1436 UCollator *coll;
1437 UChar rule[256] = {0};
1438 uint32_t rlen = 0;
1439 int i;
1440
1441 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1442 UCollationElements *iter1;
1443 int j = 0;
1444 log_verbose("Rule %s for testing\n", testrules[i]);
1445 rlen = u_unescape(testrules[i], rule, 32);
1446 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1447 if (U_FAILURE(status)) {
1448 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1449 return;
1450 }
1451 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1452 if (U_FAILURE(status)) {
1453 log_err("Collation iterator creation failed\n");
1454 return;
1455 }
1456 while (j < 2) {
1457 UCollationElements *iter2 = ucol_openElements(coll,
1458 &(testdata[i][j]),
1459 1, &status);
1460 uint32_t ce;
1461 if (U_FAILURE(status)) {
1462 log_err("Collation iterator creation failed\n");
1463 return;
1464 }
1465 ce = ucol_next(iter2, &status);
1466 while (ce != UCOL_NULLORDER) {
1467 if ((uint32_t)ucol_next(iter1, &status) != ce) {
1468 log_err("Collation elements in contraction split does not match\n");
1469 return;
1470 }
1471 ce = ucol_next(iter2, &status);
1472 }
1473 j ++;
1474 ucol_closeElements(iter2);
1475 }
1476 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1477 log_err("Collation elements not exhausted\n");
1478 return;
1479 }
1480 ucol_closeElements(iter1);
1481 ucol_close(coll);
1482 }
1483
1484 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1485 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1486 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1487 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1488 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1489 testdata2[1][1]);
1490 return;
1491 }
1492 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1493 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1494 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1495 testdata2[2][1]);
1496 return;
1497 }
1498 ucol_close(coll);
1499 #if 0 /* see above */
1500 for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
1501 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1502 UCollator *coll1,
1503 *coll2;
1504 UCollationElements *iter1,
1505 *iter2;
1506 UChar ch = 0x0042 /* 'B' */;
1507 uint32_t ce;
1508 rlen = u_unescape(testrules3[i], rule, 32);
1509 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1510 rlen = u_unescape(testrules3[i + 1], rule, 32);
1511 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1512 if (U_FAILURE(status)) {
1513 log_err("Collator creation failed %s\n", testrules[i]);
1514 return;
1515 }
1516 iter1 = ucol_openElements(coll1, &ch, 1, &status);
1517 iter2 = ucol_openElements(coll2, &ch, 1, &status);
1518 if (U_FAILURE(status)) {
1519 log_err("Collation iterator creation failed\n");
1520 return;
1521 }
1522 ce = ucol_next(iter1, &status);
1523 if (U_FAILURE(status)) {
1524 log_err("Retrieving ces failed\n");
1525 return;
1526 }
1527 while (ce != UCOL_NULLORDER) {
1528 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1529 if (ce == ce2) {
1530 log_verbose("CEs match: %08x\n", ce);
1531 } else {
1532 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1533 return;
1534 }
1535 ce = ucol_next(iter1, &status);
1536 if (U_FAILURE(status)) {
1537 log_err("Retrieving ces failed\n");
1538 return;
1539 }
1540 }
1541 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1542 log_err("CEs not exhausted\n");
1543 return;
1544 }
1545 ucol_closeElements(iter1);
1546 ucol_closeElements(iter2);
1547 ucol_close(coll1);
1548 ucol_close(coll2);
1549 }
1550 #endif
1551 }
1552
TestExpansion(void)1553 static void TestExpansion(void) {
1554 const static char *testrules[] = {
1555 #if 0
1556 /*
1557 * This seems to have tested that M was not mapped to an expansion.
1558 * I believe the old builder just did that because it computed the extension CEs
1559 * at the very end, which was a bug.
1560 * Among other problems, it violated the core tailoring principle
1561 * by making an earlier rule depend on a later one.
1562 * And, of course, if M did not get an expansion, then it was primary different from K,
1563 * unlike what the rule &K<<M says.
1564 */
1565 "&J << K / B & K << M",
1566 #endif
1567 "&J << K / B << M"
1568 };
1569 const static UChar testdata[][3] = {
1570 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1571 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1572 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1573 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1574 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1575 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1576 };
1577
1578 UErrorCode status = U_ZERO_ERROR;
1579 UCollator *coll;
1580 UChar rule[256] = {0};
1581 uint32_t rlen = 0;
1582 int i;
1583
1584 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1585 int j = 0;
1586 log_verbose("Rule %s for testing\n", testrules[i]);
1587 rlen = u_unescape(testrules[i], rule, 32);
1588 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1589 if (U_FAILURE(status)) {
1590 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1591 return;
1592 }
1593
1594 for (j = 0; j < 5; j ++) {
1595 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1596 }
1597 ucol_close(coll);
1598 }
1599 }
1600
1601 #if 0
1602 /* this test tests the current limitations of the engine */
1603 /* it always fail, so it is disabled by default */
1604 static void TestLimitations(void) {
1605 /* recursive expansions */
1606 {
1607 static const char *rule = "&a=b/c&d=c/e";
1608 static const char *tlimit01[] = {"add","b","adf"};
1609 static const char *tlimit02[] = {"aa","b","af"};
1610 log_verbose("recursive expansions\n");
1611 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1612 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1613 }
1614 /* contractions spanning expansions */
1615 {
1616 static const char *rule = "&a<<<c/e&g<<<eh";
1617 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1618 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1619 log_verbose("contractions spanning expansions\n");
1620 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1621 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1622 }
1623 /* normalization: nulls in contractions */
1624 {
1625 static const char *rule = "&a<<<\\u0000\\u0302";
1626 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1627 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1628 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1629 static const UColAttributeValue valOn[] = { UCOL_ON };
1630 static const UColAttributeValue valOff[] = { UCOL_OFF };
1631
1632 log_verbose("NULL in contractions\n");
1633 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1634 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1635 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1636 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1637
1638 }
1639 /* normalization: contractions spanning normalization */
1640 {
1641 static const char *rule = "&a<<<\\u0000\\u0302";
1642 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1643 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1644 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1645 static const UColAttributeValue valOn[] = { UCOL_ON };
1646 static const UColAttributeValue valOff[] = { UCOL_OFF };
1647
1648 log_verbose("contractions spanning normalization\n");
1649 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1650 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1651 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1652 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1653
1654 }
1655 /* variable top: */
1656 {
1657 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1658 static const char *rule = "&\\u2010<x<[variable top]=z";
1659 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1660 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1661 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1662 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1663 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1664 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1665 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1666
1667 log_verbose("variable top\n");
1668 genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1669 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1670 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1671 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1672 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
1673
1674 }
1675 /* case level */
1676 {
1677 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1678 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1679 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1680 static const UColAttribute att[] = { UCOL_CASE_FIRST};
1681 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1682 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1683 log_verbose("case level\n");
1684 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1685 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1686 /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1687 /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1688 }
1689
1690 }
1691 #endif
1692
TestBocsuCoverage(void)1693 static void TestBocsuCoverage(void) {
1694 UErrorCode status = U_ZERO_ERROR;
1695 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1696 UChar test[256] = {0};
1697 uint32_t tlen = u_unescape(testString, test, 32);
1698 uint8_t key[256] = {0};
1699 uint32_t klen = 0;
1700
1701 UCollator *coll = ucol_open("", &status);
1702 if(U_SUCCESS(status)) {
1703 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1704
1705 klen = ucol_getSortKey(coll, test, tlen, key, 256);
1706 (void)klen; /* Suppress set but not used warning. */
1707
1708 ucol_close(coll);
1709 } else {
1710 log_data_err("Couldn't open UCA\n");
1711 }
1712 }
1713
TestVariableTopSetting(void)1714 static void TestVariableTopSetting(void) {
1715 UErrorCode status = U_ZERO_ERROR;
1716 uint32_t varTopOriginal = 0, varTop1, varTop2;
1717 UCollator *coll = ucol_open("", &status);
1718 if(U_SUCCESS(status)) {
1719
1720 static const UChar nul = 0;
1721 static const UChar space = 0x20;
1722 static const UChar dot = 0x2e; /* punctuation */
1723 static const UChar degree = 0xb0; /* symbol */
1724 static const UChar dollar = 0x24; /* currency symbol */
1725 static const UChar zero = 0x30; /* digit */
1726
1727 varTopOriginal = ucol_getVariableTop(coll, &status);
1728 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1729 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1730
1731 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1732 varTop2 = ucol_getVariableTop(coll, &status);
1733 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1734 if(U_FAILURE(status) || varTop1 != varTop2 ||
1735 !ucol_equal(coll, &nul, 0, &space, 1) ||
1736 ucol_equal(coll, &nul, 0, &dot, 1) ||
1737 ucol_equal(coll, &nul, 0, °ree, 1) ||
1738 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1739 ucol_equal(coll, &nul, 0, &zero, 1) ||
1740 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1741 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1742 }
1743
1744 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1745 varTop2 = ucol_getVariableTop(coll, &status);
1746 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1747 if(U_FAILURE(status) || varTop1 != varTop2 ||
1748 !ucol_equal(coll, &nul, 0, &space, 1) ||
1749 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1750 ucol_equal(coll, &nul, 0, °ree, 1) ||
1751 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1752 ucol_equal(coll, &nul, 0, &zero, 1) ||
1753 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) {
1754 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1755 }
1756
1757 varTop1 = ucol_setVariableTop(coll, °ree, 1, &status);
1758 varTop2 = ucol_getVariableTop(coll, &status);
1759 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1760 if(U_FAILURE(status) || varTop1 != varTop2 ||
1761 !ucol_equal(coll, &nul, 0, &space, 1) ||
1762 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1763 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1764 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1765 ucol_equal(coll, &nul, 0, &zero, 1) ||
1766 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) {
1767 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1768 }
1769
1770 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1771 varTop2 = ucol_getVariableTop(coll, &status);
1772 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1773 if(U_FAILURE(status) || varTop1 != varTop2 ||
1774 !ucol_equal(coll, &nul, 0, &space, 1) ||
1775 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1776 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1777 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1778 ucol_equal(coll, &nul, 0, &zero, 1) ||
1779 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1780 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1781 }
1782
1783 log_verbose("Testing setting variable top to contractions\n");
1784 {
1785 UChar first[4] = { 0 };
1786 first[0] = 0x0040;
1787 first[1] = 0x0050;
1788 first[2] = 0x0000;
1789
1790 status = U_ZERO_ERROR;
1791 ucol_setVariableTop(coll, first, -1, &status);
1792
1793 if(U_SUCCESS(status)) {
1794 log_err("Invalid contraction succeded in setting variable top!\n");
1795 }
1796
1797 }
1798
1799 log_verbose("Test restoring variable top\n");
1800
1801 status = U_ZERO_ERROR;
1802 ucol_restoreVariableTop(coll, varTopOriginal, &status);
1803 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1804 log_err("Couldn't restore old variable top\n");
1805 }
1806
1807 log_verbose("Testing calling with error set\n");
1808
1809 status = U_INTERNAL_PROGRAM_ERROR;
1810 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1811 varTop2 = ucol_getVariableTop(coll, &status);
1812 ucol_restoreVariableTop(coll, varTop2, &status);
1813 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1814 varTop2 = ucol_getVariableTop(NULL, &status);
1815 ucol_restoreVariableTop(NULL, varTop2, &status);
1816 if(status != U_INTERNAL_PROGRAM_ERROR) {
1817 log_err("Bad reaction to passed error!\n");
1818 }
1819 ucol_close(coll);
1820 } else {
1821 log_data_err("Couldn't open UCA collator\n");
1822 }
1823 }
1824
TestMaxVariable()1825 static void TestMaxVariable() {
1826 UErrorCode status = U_ZERO_ERROR;
1827 UColReorderCode oldMax, max;
1828 UCollator *coll;
1829
1830 static const UChar nul = 0;
1831 static const UChar space = 0x20;
1832 static const UChar dot = 0x2e; /* punctuation */
1833 static const UChar degree = 0xb0; /* symbol */
1834 static const UChar dollar = 0x24; /* currency symbol */
1835 static const UChar zero = 0x30; /* digit */
1836
1837 coll = ucol_open("", &status);
1838 if(U_FAILURE(status)) {
1839 log_data_err("Couldn't open root collator\n");
1840 return;
1841 }
1842
1843 oldMax = ucol_getMaxVariable(coll);
1844 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1845 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1846
1847 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1848 max = ucol_getMaxVariable(coll);
1849 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1850 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1851 !ucol_equal(coll, &nul, 0, &space, 1) ||
1852 ucol_equal(coll, &nul, 0, &dot, 1) ||
1853 ucol_equal(coll, &nul, 0, °ree, 1) ||
1854 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1855 ucol_equal(coll, &nul, 0, &zero, 1) ||
1856 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1857 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1858 }
1859
1860 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1861 max = ucol_getMaxVariable(coll);
1862 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1863 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1864 !ucol_equal(coll, &nul, 0, &space, 1) ||
1865 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1866 ucol_equal(coll, &nul, 0, °ree, 1) ||
1867 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1868 ucol_equal(coll, &nul, 0, &zero, 1) ||
1869 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) {
1870 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1871 }
1872
1873 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1874 max = ucol_getMaxVariable(coll);
1875 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1876 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1877 !ucol_equal(coll, &nul, 0, &space, 1) ||
1878 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1879 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1880 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1881 ucol_equal(coll, &nul, 0, &zero, 1) ||
1882 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) {
1883 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1884 }
1885
1886 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1887 max = ucol_getMaxVariable(coll);
1888 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1889 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1890 !ucol_equal(coll, &nul, 0, &space, 1) ||
1891 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1892 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1893 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1894 ucol_equal(coll, &nul, 0, &zero, 1) ||
1895 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1896 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1897 }
1898
1899 log_verbose("Test restoring maxVariable\n");
1900 status = U_ZERO_ERROR;
1901 ucol_setMaxVariable(coll, oldMax, &status);
1902 if(oldMax != ucol_getMaxVariable(coll)) {
1903 log_err("Couldn't restore old maxVariable\n");
1904 }
1905
1906 log_verbose("Testing calling with error set\n");
1907 status = U_INTERNAL_PROGRAM_ERROR;
1908 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1909 max = ucol_getMaxVariable(coll);
1910 if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1911 log_err("Bad reaction to passed error!\n");
1912 }
1913 ucol_close(coll);
1914 }
1915
TestNonChars(void)1916 static void TestNonChars(void) {
1917 static const char *test[] = {
1918 "\\u0000", /* ignorable */
1919 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
1920 "\\uFDD0", "\\uFDEF",
1921 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
1922 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
1923 "\\U0003FFFE", "\\U0003FFFF",
1924 "\\U0004FFFE", "\\U0004FFFF",
1925 "\\U0005FFFE", "\\U0005FFFF",
1926 "\\U0006FFFE", "\\U0006FFFF",
1927 "\\U0007FFFE", "\\U0007FFFF",
1928 "\\U0008FFFE", "\\U0008FFFF",
1929 "\\U0009FFFE", "\\U0009FFFF",
1930 "\\U000AFFFE", "\\U000AFFFF",
1931 "\\U000BFFFE", "\\U000BFFFF",
1932 "\\U000CFFFE", "\\U000CFFFF",
1933 "\\U000DFFFE", "\\U000DFFFF",
1934 "\\U000EFFFE", "\\U000EFFFF",
1935 "\\U000FFFFE", "\\U000FFFFF",
1936 "\\U0010FFFE", "\\U0010FFFF",
1937 "\\uFFFF" /* special character with maximum primary weight */
1938 };
1939 UErrorCode status = U_ZERO_ERROR;
1940 UCollator *coll = ucol_open("en_US", &status);
1941
1942 log_verbose("Test non characters\n");
1943
1944 if(U_SUCCESS(status)) {
1945 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1946 } else {
1947 log_err_status(status, "Unable to open collator\n");
1948 }
1949
1950 ucol_close(coll);
1951 }
1952
TestExtremeCompression(void)1953 static void TestExtremeCompression(void) {
1954 static char *test[4];
1955 int32_t j = 0, i = 0;
1956
1957 for(i = 0; i<4; i++) {
1958 test[i] = (char *)malloc(2048*sizeof(char));
1959 }
1960
1961 for(j = 20; j < 500; j++) {
1962 for(i = 0; i<4; i++) {
1963 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1964 test[i][j-1] = (char)('a'+i);
1965 test[i][j] = 0;
1966 }
1967 genericLocaleStarter("en_US", (const char **)test, 4);
1968 }
1969
1970
1971 for(i = 0; i<4; i++) {
1972 free(test[i]);
1973 }
1974 }
1975
1976 #if 0
1977 static void TestExtremeCompression(void) {
1978 static char *test[4];
1979 int32_t j = 0, i = 0;
1980 UErrorCode status = U_ZERO_ERROR;
1981 UCollator *coll = ucol_open("en_US", status);
1982 for(i = 0; i<4; i++) {
1983 test[i] = (char *)malloc(2048*sizeof(char));
1984 }
1985 for(j = 10; j < 2048; j++) {
1986 for(i = 0; i<4; i++) {
1987 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1988 test[i][j-1] = (char)('a'+i);
1989 test[i][j] = 0;
1990 }
1991 }
1992 genericLocaleStarter("en_US", (const char **)test, 4);
1993
1994 for(j = 10; j < 2048; j++) {
1995 for(i = 0; i<1; i++) {
1996 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1997 test[i][j] = 0;
1998 }
1999 }
2000 for(i = 0; i<4; i++) {
2001 free(test[i]);
2002 }
2003 }
2004 #endif
2005
TestSurrogates(void)2006 static void TestSurrogates(void) {
2007 static const char *test[] = {
2008 "z","\\ud900\\udc25", "\\ud805\\udc50",
2009 "\\ud800\\udc00y", "\\ud800\\udc00r",
2010 "\\ud800\\udc00f", "\\ud800\\udc00",
2011 "\\ud800\\udc00c", "\\ud800\\udc00b",
2012 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2013 "\\ud800\\udc00a",
2014 "c", "b"
2015 };
2016
2017 static const char *rule =
2018 "&z < \\ud900\\udc25 < \\ud805\\udc50"
2019 "< \\ud800\\udc00y < \\ud800\\udc00r"
2020 "< \\ud800\\udc00f << \\ud800\\udc00"
2021 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2022 "< \\ud800\\udc00a < c < b" ;
2023
2024 genericRulesStarter(rule, test, 14);
2025 }
2026
2027 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)2028 static void TestPrefix(void) {
2029 uint32_t i;
2030
2031 static const struct {
2032 const char *rules;
2033 const char *data[50];
2034 const uint32_t len;
2035 } tests[] = {
2036 { "&z <<< z|a",
2037 {"zz", "za"}, 2 },
2038
2039 { "&z <<< z| a",
2040 {"zz", "za"}, 2 },
2041 { "[strength I]"
2042 "&a=\\ud900\\udc25"
2043 "&z<<<\\ud900\\udc25|a",
2044 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2045 };
2046
2047
2048 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2049 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2050 }
2051 }
2052
2053 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2054 /* JIS X 4061 collation order implementation */
TestNewJapanese(void)2055 static void TestNewJapanese(void) {
2056
2057 static const char * const test1[] = {
2058 "\\u30b7\\u30e3\\u30fc\\u30ec",
2059 "\\u30b7\\u30e3\\u30a4",
2060 "\\u30b7\\u30e4\\u30a3",
2061 "\\u30b7\\u30e3\\u30ec",
2062 "\\u3061\\u3087\\u3053",
2063 "\\u3061\\u3088\\u3053",
2064 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2065 "\\u3066\\u30fc\\u305f",
2066 "\\u30c6\\u30fc\\u30bf",
2067 "\\u30c6\\u30a7\\u30bf",
2068 "\\u3066\\u3048\\u305f",
2069 "\\u3067\\u30fc\\u305f",
2070 "\\u30c7\\u30fc\\u30bf",
2071 "\\u30c7\\u30a7\\u30bf",
2072 "\\u3067\\u3048\\u305f",
2073 "\\u3066\\u30fc\\u305f\\u30fc",
2074 "\\u30c6\\u30fc\\u30bf\\u30a1",
2075 "\\u30c6\\u30a7\\u30bf\\u30fc",
2076 "\\u3066\\u3047\\u305f\\u3041",
2077 "\\u3066\\u3048\\u305f\\u30fc",
2078 "\\u3067\\u30fc\\u305f\\u30fc",
2079 "\\u30c7\\u30fc\\u30bf\\u30a1",
2080 "\\u3067\\u30a7\\u305f\\u30a1",
2081 "\\u30c7\\u3047\\u30bf\\u3041",
2082 "\\u30c7\\u30a8\\u30bf\\u30a2",
2083 "\\u3072\\u3086",
2084 "\\u3073\\u3085\\u3042",
2085 "\\u3074\\u3085\\u3042",
2086 "\\u3073\\u3085\\u3042\\u30fc",
2087 "\\u30d3\\u30e5\\u30a2\\u30fc",
2088 "\\u3074\\u3085\\u3042\\u30fc",
2089 "\\u30d4\\u30e5\\u30a2\\u30fc",
2090 "\\u30d2\\u30e5\\u30a6",
2091 "\\u30d2\\u30e6\\u30a6",
2092 "\\u30d4\\u30e5\\u30a6\\u30a2",
2093 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2094 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2095 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2096 "\\u3072\\u3085\\u3093",
2097 "\\u3074\\u3085\\u3093",
2098 "\\u3075\\u30fc\\u308a",
2099 "\\u30d5\\u30fc\\u30ea",
2100 "\\u3075\\u3045\\u308a",
2101 "\\u3075\\u30a5\\u308a",
2102 "\\u3075\\u30a5\\u30ea",
2103 "\\u30d5\\u30a6\\u30ea",
2104 "\\u3076\\u30fc\\u308a",
2105 "\\u30d6\\u30fc\\u30ea",
2106 "\\u3076\\u3045\\u308a",
2107 "\\u30d6\\u30a5\\u308a",
2108 "\\u3077\\u3046\\u308a",
2109 "\\u30d7\\u30a6\\u30ea",
2110 "\\u3075\\u30fc\\u308a\\u30fc",
2111 "\\u30d5\\u30a5\\u30ea\\u30fc",
2112 "\\u3075\\u30a5\\u308a\\u30a3",
2113 "\\u30d5\\u3045\\u308a\\u3043",
2114 "\\u30d5\\u30a6\\u30ea\\u30fc",
2115 "\\u3075\\u3046\\u308a\\u3043",
2116 "\\u30d6\\u30a6\\u30ea\\u30a4",
2117 "\\u3077\\u30fc\\u308a\\u30fc",
2118 "\\u3077\\u30a5\\u308a\\u30a4",
2119 "\\u3077\\u3046\\u308a\\u30fc",
2120 "\\u30d7\\u30a6\\u30ea\\u30a4",
2121 "\\u30d5\\u30fd",
2122 "\\u3075\\u309e",
2123 "\\u3076\\u309d",
2124 "\\u3076\\u3075",
2125 "\\u3076\\u30d5",
2126 "\\u30d6\\u3075",
2127 "\\u30d6\\u30d5",
2128 "\\u3076\\u309e",
2129 "\\u3076\\u3077",
2130 "\\u30d6\\u3077",
2131 "\\u3077\\u309d",
2132 "\\u30d7\\u30fd",
2133 "\\u3077\\u3075",
2134 };
2135
2136 static const char *test2[] = {
2137 "\\u306f\\u309d", /* H\\u309d */
2138 "\\u30cf\\u30fd", /* K\\u30fd */
2139 "\\u306f\\u306f", /* HH */
2140 "\\u306f\\u30cf", /* HK */
2141 "\\u30cf\\u30cf", /* KK */
2142 "\\u306f\\u309e", /* H\\u309e */
2143 "\\u30cf\\u30fe", /* K\\u30fe */
2144 "\\u306f\\u3070", /* HH\\u309b */
2145 "\\u30cf\\u30d0", /* KK\\u309b */
2146 "\\u306f\\u3071", /* HH\\u309c */
2147 "\\u30cf\\u3071", /* KH\\u309c */
2148 "\\u30cf\\u30d1", /* KK\\u309c */
2149 "\\u3070\\u309d", /* H\\u309b\\u309d */
2150 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2151 "\\u3070\\u306f", /* H\\u309bH */
2152 "\\u30d0\\u30cf", /* K\\u309bK */
2153 "\\u3070\\u309e", /* H\\u309b\\u309e */
2154 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2155 "\\u3070\\u3070", /* H\\u309bH\\u309b */
2156 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2157 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2158 "\\u3070\\u3071", /* H\\u309bH\\u309c */
2159 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2160 "\\u3071\\u309d", /* H\\u309c\\u309d */
2161 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2162 "\\u3071\\u306f", /* H\\u309cH */
2163 "\\u30d1\\u30cf", /* K\\u309cK */
2164 "\\u3071\\u3070", /* H\\u309cH\\u309b */
2165 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2166 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2167 "\\u3071\\u3071", /* H\\u309cH\\u309c */
2168 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2169 };
2170 /*
2171 static const char *test3[] = {
2172 "\\u221er\\u221e",
2173 "\\u221eR#",
2174 "\\u221et\\u221e",
2175 "#r\\u221e",
2176 "#R#",
2177 "#t%",
2178 "#T%",
2179 "8t\\u221e",
2180 "8T\\u221e",
2181 "8t#",
2182 "8T#",
2183 "8t%",
2184 "8T%",
2185 "8t8",
2186 "8T8",
2187 "\\u03c9r\\u221e",
2188 "\\u03a9R%",
2189 "rr\\u221e",
2190 "rR\\u221e",
2191 "Rr\\u221e",
2192 "RR\\u221e",
2193 "RT%",
2194 "rt8",
2195 "tr\\u221e",
2196 "tr8",
2197 "TR8",
2198 "tt8",
2199 "\\u30b7\\u30e3\\u30fc\\u30ec",
2200 };
2201 */
2202 static const UColAttribute att[] = { UCOL_STRENGTH };
2203 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2204
2205 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2206 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2207
2208 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2209 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2210 /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2211 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2212 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
2213 }
2214
TestStrCollIdenticalPrefix(void)2215 static void TestStrCollIdenticalPrefix(void) {
2216 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2217 const char* test[] = {
2218 "ab\\ud9b0\\udc70",
2219 "ab\\ud9b0\\udc71"
2220 };
2221 genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
2222 }
2223 /* Contractions should have all their canonically equivalent */
2224 /* strings included */
TestContractionClosure(void)2225 static void TestContractionClosure(void) {
2226 static const struct {
2227 const char *rules;
2228 const char *data[10];
2229 const uint32_t len;
2230 } tests[] = {
2231 { "&b=\\u00e4\\u00e4",
2232 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2233 { "&b=\\u00C5",
2234 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2235 };
2236 uint32_t i;
2237
2238
2239 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2240 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2241 }
2242 }
2243
2244 /* This tests also fails*/
TestBeforePrefixFailure(void)2245 static void TestBeforePrefixFailure(void) {
2246 static const struct {
2247 const char *rules;
2248 const char *data[10];
2249 const uint32_t len;
2250 } tests[] = {
2251 { "&g <<< a"
2252 "&[before 3]\\uff41 <<< x",
2253 {"x", "\\uff41"}, 2 },
2254 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2255 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2256 "&[before 3]\\u30a7<<<\\u30a9",
2257 {"\\u30a9", "\\u30a7"}, 2 },
2258 { "&[before 3]\\u30a7<<<\\u30a9"
2259 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2260 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2261 {"\\u30a9", "\\u30a7"}, 2 },
2262 };
2263 uint32_t i;
2264
2265
2266 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2267 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2268 }
2269
2270 #if 0
2271 const char* rule1 =
2272 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2273 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2274 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2275 const char* rule2 =
2276 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2277 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2278 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2279 const char* test[] = {
2280 "\\u30c6\\u30fc\\u30bf",
2281 "\\u30c6\\u30a7\\u30bf",
2282 };
2283 genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2284 genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
2285 /* this piece of code should be in some sort of verbose mode */
2286 /* it gets the collation elements for elements and prints them */
2287 /* This is useful when trying to see whether the problem is */
2288 {
2289 UErrorCode status = U_ZERO_ERROR;
2290 uint32_t i = 0;
2291 UCollationElements *it = NULL;
2292 uint32_t CE;
2293 UChar string[256];
2294 uint32_t uStringLen;
2295 UCollator *coll = NULL;
2296
2297 uStringLen = u_unescape(rule1, string, 256);
2298
2299 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2300
2301 /*coll = ucol_open("ja_JP_JIS", &status);*/
2302 it = ucol_openElements(coll, string, 0, &status);
2303
2304 for(i = 0; i < UPRV_LENGTHOF(test); i++) {
2305 log_verbose("%s\n", test[i]);
2306 uStringLen = u_unescape(test[i], string, 256);
2307 ucol_setText(it, string, uStringLen, &status);
2308
2309 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2310 log_verbose("%08X\n", CE);
2311 }
2312 log_verbose("\n");
2313
2314 }
2315
2316 ucol_closeElements(it);
2317 ucol_close(coll);
2318 }
2319 #endif
2320 }
2321
TestPrefixCompose(void)2322 static void TestPrefixCompose(void) {
2323 const char* rule1 =
2324 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2325 /*
2326 const char* test[] = {
2327 "\\u30c6\\u30fc\\u30bf",
2328 "\\u30c6\\u30a7\\u30bf",
2329 };
2330 */
2331 {
2332 UErrorCode status = U_ZERO_ERROR;
2333 /*uint32_t i = 0;*/
2334 /*UCollationElements *it = NULL;*/
2335 /* uint32_t CE;*/
2336 UChar string[256];
2337 uint32_t uStringLen;
2338 UCollator *coll = NULL;
2339
2340 uStringLen = u_unescape(rule1, string, 256);
2341
2342 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2343 ucol_close(coll);
2344 }
2345
2346
2347 }
2348
2349 /*
2350 [last variable] last variable value
2351 [last primary ignorable] largest CE for primary ignorable
2352 [last secondary ignorable] largest CE for secondary ignorable
2353 [last tertiary ignorable] largest CE for tertiary ignorable
2354 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2355 */
2356
TestRuleOptions(void)2357 static void TestRuleOptions(void) {
2358 /* values here are hardcoded and are correct for the current UCA
2359 * when the UCA changes, one might be forced to change these
2360 * values.
2361 */
2362
2363 /*
2364 * These strings contain the last character before [variable top]
2365 * and the first and second characters (by primary weights) after it.
2366 * See FractionalUCA.txt. For example:
2367 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2368 [variable top = 0C FE]
2369 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2370 and
2371 00B4; [0D 0C, 05, 05]
2372 *
2373 * Note: Starting with UCA 6.0, the [variable top] collation element
2374 * is not the weight of any character or string,
2375 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2376 */
2377 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2378 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2379 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2380
2381 /*
2382 * This string has to match the character that has the [last regular] weight
2383 * which changes with each UCA version.
2384 * See the bottom of FractionalUCA.txt which says something like
2385 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2386 *
2387 * Note: Starting with UCA 6.0, the [last regular] collation element
2388 * is not the weight of any character or string,
2389 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2390 */
2391 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2392
2393 static const struct {
2394 const char *rules;
2395 const char *data[10];
2396 const uint32_t len;
2397 } tests[] = {
2398 #if 0
2399 /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2400 /* - all befores here amount to zero */
2401 { "&[before 3][first tertiary ignorable]<<<a",
2402 { "\\u0000", "a"}, 2
2403 }, /* you cannot go before first tertiary ignorable */
2404
2405 { "&[before 3][last tertiary ignorable]<<<a",
2406 { "\\u0000", "a"}, 2
2407 }, /* you cannot go before last tertiary ignorable */
2408 #endif
2409 /*
2410 * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2411 * and it *is* possible to "go before" that.
2412 */
2413 { "&[before 3][first secondary ignorable]<<<a",
2414 { "\\u0000", "a"}, 2
2415 },
2416
2417 { "&[before 3][last secondary ignorable]<<<a",
2418 { "\\u0000", "a"}, 2
2419 },
2420
2421 /* 'normal' befores */
2422
2423 /*
2424 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2425 * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2426 * because there is no tailoring space before that boundary.
2427 * Made the tests work by tailoring to a space instead.
2428 */
2429 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */
2430 { "c", "b", "\\u0332", "a" }, 4
2431 },
2432
2433 /* we don't have a code point that corresponds to
2434 * the last primary ignorable
2435 */
2436 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */
2437 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2438 },
2439
2440 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2441 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
2442 },
2443
2444 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2445 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2446 },
2447
2448 { "&[first regular]<a"
2449 "&[before 1][first regular]<b",
2450 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2451 },
2452
2453 { "&[before 1][last regular]<b"
2454 "&[last regular]<a",
2455 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2456 },
2457
2458 { "&[before 1][first implicit]<b"
2459 "&[first implicit]<a",
2460 { "b", "\\u4e00", "a", "\\u4e01"}, 4
2461 },
2462 #if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2463 { "&[before 1][last implicit]<b"
2464 "&[last implicit]<a",
2465 { "b", "\\U0010FFFD", "a" }, 3
2466 },
2467 #endif
2468 { "&[last variable]<z"
2469 "&' '<x" /* was &[last primary ignorable]<x, see above */
2470 "&[last secondary ignorable]<<y"
2471 "&[last tertiary ignorable]<<<w"
2472 "&[top]<u",
2473 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2474 }
2475
2476 };
2477 uint32_t i;
2478
2479 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2480 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2481 }
2482 }
2483
2484
TestOptimize(void)2485 static void TestOptimize(void) {
2486 /* this is not really a test - just trying out
2487 * whether copying of UCA contents will fail
2488 * Cannot really test, since the functionality
2489 * remains the same.
2490 */
2491 static const struct {
2492 const char *rules;
2493 const char *data[10];
2494 const uint32_t len;
2495 } tests[] = {
2496 /* - all befores here amount to zero */
2497 { "[optimize [\\uAC00-\\uD7FF]]",
2498 { "a", "b"}, 2}
2499 };
2500 uint32_t i;
2501
2502 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2503 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2504 }
2505 }
2506
2507 /*
2508 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2509 weiv ucol_strcollIter?
2510 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2511 weiv these are the input strings?
2512 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2513 weiv will check - could be a problem with utf-8 iterator
2514 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2515 weiv hmmm
2516 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2517 weiv that doesn't sound right
2518 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2519 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
2520 cycheng@ca.ibm.c... yes
2521 weiv and then do the comparison
2522 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2523 weiv utf-16 strings look like a little endian ones in the example you sent me
2524 weiv It could be a bug - let me try to test it out
2525 cycheng@ca.ibm.c... ok
2526 cycheng@ca.ibm.c... we can wait till the conf. call
2527 cycheng@ca.ibm.c... next weke
2528 weiv that would be great
2529 weiv hmmm
2530 weiv I might be wrong
2531 weiv let me play with it some more
2532 cycheng@ca.ibm.c... ok
2533 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
2534 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2535 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2536 weiv ok
2537 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2538 weiv thanks
2539 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2540 */
2541 #if 0
2542 static void Alexis(void) {
2543 UErrorCode status = U_ZERO_ERROR;
2544 UCollator *coll = ucol_open("", &status);
2545
2546
2547 const char utf16be[2][4] = {
2548 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2549 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2550 };
2551
2552 const char utf8[2][4] = {
2553 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2554 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2555 };
2556
2557 UCharIterator iterU161, iterU162;
2558 UCharIterator iterU81, iterU82;
2559
2560 UCollationResult resU16, resU8;
2561
2562 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2563 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2564
2565 uiter_setUTF8(&iterU81, utf8[0], 4);
2566 uiter_setUTF8(&iterU82, utf8[1], 4);
2567
2568 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2569
2570 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2571 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2572
2573
2574 if(resU16 != resU8) {
2575 log_err("different results\n");
2576 }
2577
2578 ucol_close(coll);
2579 }
2580 #endif
2581
2582 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)2583 static void Alexis2(void) {
2584 UErrorCode status = U_ZERO_ERROR;
2585 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2586 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2587 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2588 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2589
2590 UConverter *conv = NULL;
2591
2592 UCharIterator U16BEItS, U16BEItT;
2593 UCharIterator U8ItS, U8ItT;
2594
2595 UCollationResult resU16, resU16BE, resU8;
2596
2597 static const char* const pairs[][2] = {
2598 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2599 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2600 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2601 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2602 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2603 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2604 { "\\u0020", "\\u0020\\u0000"}
2605 /*
2606 5F20 (my result here)
2607 5F204E008E3F
2608 5F20 (your result here)
2609 */
2610 };
2611
2612 int32_t i = 0;
2613
2614 UCollator *coll = ucol_open("", &status);
2615 if(status == U_FILE_ACCESS_ERROR) {
2616 log_data_err("Is your data around?\n");
2617 return;
2618 } else if(U_FAILURE(status)) {
2619 log_err("Error opening collator\n");
2620 return;
2621 }
2622 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2623 conv = ucnv_open("UTF16BE", &status);
2624 for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
2625 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2626 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2627
2628 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2629
2630 log_verbose("Result of strcoll is %i\n", resU16);
2631
2632 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2633 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2634 (void)U16BELenS; /* Suppress set but not used warnings. */
2635 (void)U16BELenT;
2636
2637 /* use the original sizes, as the result from converter is in bytes */
2638 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2639 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2640
2641 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2642
2643 log_verbose("Result of U16BE is %i\n", resU16BE);
2644
2645 if(resU16 != resU16BE) {
2646 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2647 }
2648
2649 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2650 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2651
2652 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2653 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2654
2655 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2656
2657 if(resU16 != resU8) {
2658 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2659 }
2660
2661 }
2662
2663 ucol_close(coll);
2664 ucnv_close(conv);
2665 }
2666
TestHebrewUCA(void)2667 static void TestHebrewUCA(void) {
2668 UErrorCode status = U_ZERO_ERROR;
2669 static const char *first[] = {
2670 "d790d6b8d79cd795d6bcd7a9",
2671 "d790d79cd79ed7a7d799d799d7a1",
2672 "d790d6b4d79ed795d6bcd7a9",
2673 };
2674
2675 char utf8String[3][256];
2676 UChar utf16String[3][256];
2677
2678 int32_t i = 0, j = 0;
2679 int32_t sizeUTF8[3];
2680 int32_t sizeUTF16[3];
2681
2682 UCollator *coll = ucol_open("", &status);
2683 if (U_FAILURE(status)) {
2684 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2685 return;
2686 }
2687 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2688
2689 for(i = 0; i < UPRV_LENGTHOF(first); i++) {
2690 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2691 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2692 log_verbose("%i: ");
2693 for(j = 0; j < sizeUTF16[i]; j++) {
2694 /*log_verbose("\\u%04X", utf16String[i][j]);*/
2695 log_verbose("%04X", utf16String[i][j]);
2696 }
2697 log_verbose("\n");
2698 }
2699 for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2700 for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
2701 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2702 }
2703 }
2704
2705 ucol_close(coll);
2706
2707 }
2708
TestPartialSortKeyTermination(void)2709 static void TestPartialSortKeyTermination(void) {
2710 static const char* cases[] = {
2711 "\\u1234\\u1234\\udc00",
2712 "\\udc00\\ud800\\ud800"
2713 };
2714
2715 int32_t i;
2716
2717 UErrorCode status = U_ZERO_ERROR;
2718
2719 UCollator *coll = ucol_open("", &status);
2720
2721 UCharIterator iter;
2722
2723 UChar currCase[256];
2724 int32_t length = 0;
2725 int32_t pKeyLen = 0;
2726
2727 uint8_t key[256];
2728
2729 for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
2730 uint32_t state[2] = {0, 0};
2731 length = u_unescape(cases[i], currCase, 256);
2732 uiter_setString(&iter, currCase, length);
2733 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2734 (void)pKeyLen; /* Suppress set but not used warning. */
2735
2736 log_verbose("Done\n");
2737
2738 }
2739 ucol_close(coll);
2740 }
2741
TestSettings(void)2742 static void TestSettings(void) {
2743 static const char* cases[] = {
2744 "apple",
2745 "Apple"
2746 };
2747
2748 static const char* locales[] = {
2749 "",
2750 "en"
2751 };
2752
2753 UErrorCode status = U_ZERO_ERROR;
2754
2755 int32_t i = 0, j = 0;
2756
2757 UChar source[256], target[256];
2758 int32_t sLen = 0, tLen = 0;
2759
2760 UCollator *collateObject = NULL;
2761 for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
2762 collateObject = ucol_open(locales[i], &status);
2763 ucol_setStrength(collateObject, UCOL_PRIMARY);
2764 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2765 for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
2766 sLen = u_unescape(cases[j-1], source, 256);
2767 source[sLen] = 0;
2768 tLen = u_unescape(cases[j], target, 256);
2769 source[tLen] = 0;
2770 doTest(collateObject, source, target, UCOL_EQUAL);
2771 }
2772 ucol_close(collateObject);
2773 }
2774 }
2775
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)2776 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2777 UErrorCode status = U_ZERO_ERROR;
2778 int32_t errorNo = 0;
2779 const UChar *sourceRules = NULL;
2780 int32_t sourceRulesLen = 0;
2781 UParseError parseError;
2782 UColAttributeValue french = UCOL_OFF;
2783
2784 if(!ucol_equals(source, target)) {
2785 log_err("Same collators, different address not equal\n");
2786 errorNo++;
2787 }
2788 ucol_close(target);
2789 if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2790 target = ucol_safeClone(source, NULL, NULL, &status);
2791 if(U_FAILURE(status)) {
2792 log_err("Error creating clone\n");
2793 errorNo++;
2794 return errorNo;
2795 }
2796 if(!ucol_equals(source, target)) {
2797 log_err("Collator different from it's clone\n");
2798 errorNo++;
2799 }
2800 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2801 if(french == UCOL_ON) {
2802 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2803 } else {
2804 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2805 }
2806 if(U_FAILURE(status)) {
2807 log_err("Error setting attributes\n");
2808 errorNo++;
2809 return errorNo;
2810 }
2811 if(ucol_equals(source, target)) {
2812 log_err("Collators same even when options changed\n");
2813 errorNo++;
2814 }
2815 ucol_close(target);
2816
2817 sourceRules = ucol_getRules(source, &sourceRulesLen);
2818 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2819 if(U_FAILURE(status)) {
2820 log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2821 errorNo++;
2822 return errorNo;
2823 }
2824 /* Note: The tailoring rule string is an optional data item. */
2825 if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2826 log_err("Collator different from collator that was created from the same rules\n");
2827 errorNo++;
2828 }
2829 ucol_close(target);
2830 }
2831 return errorNo;
2832 }
2833
2834
TestEquals(void)2835 static void TestEquals(void) {
2836 /* ucol_equals is not currently a public API. There is a chance that it will become
2837 * something like this.
2838 */
2839 /* test whether the two collators instantiated from the same locale are equal */
2840 UErrorCode status = U_ZERO_ERROR;
2841 UParseError parseError;
2842 int32_t noOfLoc = uloc_countAvailable();
2843 const char *locName = NULL;
2844 UCollator *source = NULL, *target = NULL;
2845 int32_t i = 0;
2846
2847 const char* rules[] = {
2848 "&l < lj <<< Lj <<< LJ",
2849 "&n < nj <<< Nj <<< NJ",
2850 "&ae <<< \\u00e4",
2851 "&AE <<< \\u00c4"
2852 };
2853 /*
2854 const char* badRules[] = {
2855 "&l <<< Lj",
2856 "&n < nj <<< nJ <<< NJ",
2857 "&a <<< \\u00e4",
2858 "&AE <<< \\u00c4 <<< x"
2859 };
2860 */
2861
2862 UChar sourceRules[1024], targetRules[1024];
2863 int32_t sourceRulesSize = 0, targetRulesSize = 0;
2864 int32_t rulesSize = UPRV_LENGTHOF(rules);
2865
2866 for(i = 0; i < rulesSize; i++) {
2867 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2868 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2869 }
2870
2871 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2872 if(status == U_FILE_ACCESS_ERROR) {
2873 log_data_err("Is your data around?\n");
2874 return;
2875 } else if(U_FAILURE(status)) {
2876 log_err("Error opening collator\n");
2877 return;
2878 }
2879 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2880 if(!ucol_equals(source, target)) {
2881 log_err("Equivalent collators not equal!\n");
2882 }
2883 ucol_close(source);
2884 ucol_close(target);
2885
2886 source = ucol_open("root", &status);
2887 target = ucol_open("root", &status);
2888 log_verbose("Testing root\n");
2889 if(!ucol_equals(source, source)) {
2890 log_err("Same collator not equal\n");
2891 }
2892 if(TestEqualsForCollator("root", source, target)) {
2893 log_err("Errors for root\n");
2894 }
2895 ucol_close(source);
2896
2897 for(i = 0; i<noOfLoc; i++) {
2898 status = U_ZERO_ERROR;
2899 locName = uloc_getAvailable(i);
2900 /*if(hasCollationElements(locName)) {*/
2901 log_verbose("Testing equality for locale %s\n", locName);
2902 source = ucol_open(locName, &status);
2903 target = ucol_open(locName, &status);
2904 if (U_FAILURE(status)) {
2905 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
2906 continue;
2907 }
2908 if(TestEqualsForCollator(locName, source, target)) {
2909 log_err("Errors for locale %s\n", locName);
2910 }
2911 ucol_close(source);
2912 /*}*/
2913 }
2914 }
2915
TestJ2726(void)2916 static void TestJ2726(void) {
2917 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2918 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2919 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2920 UErrorCode status = U_ZERO_ERROR;
2921 UCollator *coll = ucol_open("en", &status);
2922 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2923 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2924 doTest(coll, a, aSpace, UCOL_EQUAL);
2925 doTest(coll, aSpace, a, UCOL_EQUAL);
2926 doTest(coll, a, spaceA, UCOL_EQUAL);
2927 doTest(coll, spaceA, a, UCOL_EQUAL);
2928 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2929 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2930 ucol_close(coll);
2931 }
2932
NullRule(void)2933 static void NullRule(void) {
2934 UChar r[3] = {0};
2935 UErrorCode status = U_ZERO_ERROR;
2936 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2937 if(U_SUCCESS(status)) {
2938 log_err("This should have been an error!\n");
2939 ucol_close(coll);
2940 } else {
2941 status = U_ZERO_ERROR;
2942 }
2943 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2944 if(U_FAILURE(status)) {
2945 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2946 } else {
2947 ucol_close(coll);
2948 }
2949 }
2950
2951 /**
2952 * Test for CollationElementIterator previous and next for the whole set of
2953 * unicode characters with normalization on.
2954 */
TestNumericCollation(void)2955 static void TestNumericCollation(void)
2956 {
2957 UErrorCode status = U_ZERO_ERROR;
2958
2959 const static char *basicTestStrings[]={
2960 "hello1",
2961 "hello2",
2962 "hello2002",
2963 "hello2003",
2964 "hello123456",
2965 "hello1234567",
2966 "hello10000000",
2967 "hello100000000",
2968 "hello1000000000",
2969 "hello10000000000",
2970 };
2971
2972 const static char *preZeroTestStrings[]={
2973 "avery10000",
2974 "avery010000",
2975 "avery0010000",
2976 "avery00010000",
2977 "avery000010000",
2978 "avery0000010000",
2979 "avery00000010000",
2980 "avery000000010000",
2981 };
2982
2983 const static char *thirtyTwoBitNumericStrings[]={
2984 "avery42949672960",
2985 "avery42949672961",
2986 "avery42949672962",
2987 "avery429496729610"
2988 };
2989
2990 const static char *longNumericStrings[]={
2991 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2992 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2993 are treated as multiple collation elements. */
2994 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2995 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2996 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2997 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2998 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2999 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
3000 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3001 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3002 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3003 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3004 };
3005
3006 const static char *supplementaryDigits[] = {
3007 "\\uD835\\uDFCE", /* 0 */
3008 "\\uD835\\uDFCF", /* 1 */
3009 "\\uD835\\uDFD0", /* 2 */
3010 "\\uD835\\uDFD1", /* 3 */
3011 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3012 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3013 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3014 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3015 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3016 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3017 };
3018
3019 const static char *foreignDigits[] = {
3020 "\\u0661",
3021 "\\u0662",
3022 "\\u0663",
3023 "\\u0661\\u0660",
3024 "\\u0661\\u0662",
3025 "\\u0661\\u0663",
3026 "\\u0662\\u0660",
3027 "\\u0662\\u0662",
3028 "\\u0662\\u0663",
3029 "\\u0663\\u0660",
3030 "\\u0663\\u0662",
3031 "\\u0663\\u0663"
3032 };
3033
3034 const static char *evenZeroes[] = {
3035 "2000",
3036 "2001",
3037 "2002",
3038 "2003"
3039 };
3040
3041 UColAttribute att = UCOL_NUMERIC_COLLATION;
3042 UColAttributeValue val = UCOL_ON;
3043
3044 /* Open our collator. */
3045 UCollator* coll = ucol_open("root", &status);
3046 if (U_FAILURE(status)){
3047 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3048 myErrorName(status));
3049 return;
3050 }
3051 genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3052 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3053 genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3054 genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3055 genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3056 genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
3057
3058 /* Setting up our collator to do digits. */
3059 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3060 if (U_FAILURE(status)){
3061 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3062 myErrorName(status));
3063 return;
3064 }
3065
3066 /*
3067 Testing that prepended zeroes still yield the correct collation behavior.
3068 We expect that every element in our strings array will be equal.
3069 */
3070 genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
3071
3072 ucol_close(coll);
3073 }
3074
TestTibetanConformance(void)3075 static void TestTibetanConformance(void)
3076 {
3077 const char* test[] = {
3078 "\\u0FB2\\u0591\\u0F71\\u0061",
3079 "\\u0FB2\\u0F71\\u0061"
3080 };
3081
3082 UErrorCode status = U_ZERO_ERROR;
3083 UCollator *coll = ucol_open("", &status);
3084 UChar source[100];
3085 UChar target[100];
3086 int result;
3087 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3088 if (U_SUCCESS(status)) {
3089 u_unescape(test[0], source, 100);
3090 u_unescape(test[1], target, 100);
3091 doTest(coll, source, target, UCOL_EQUAL);
3092 result = ucol_strcoll(coll, source, -1, target, -1);
3093 log_verbose("result %d\n", result);
3094 if (UCOL_EQUAL != result) {
3095 log_err("Tibetan comparison error\n");
3096 }
3097 }
3098 ucol_close(coll);
3099
3100 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3101 }
3102
TestPinyinProblem(void)3103 static void TestPinyinProblem(void) {
3104 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3105 genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
3106 }
3107
3108 /**
3109 * Iterate through the given iterator, checking to see that all the strings
3110 * in the expected array are present.
3111 * @param expected array of strings we expect to see, or NULL
3112 * @param expectedCount number of elements of expected, or 0
3113 */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)3114 static int32_t checkUEnumeration(const char* msg,
3115 UEnumeration* iter,
3116 const char** expected,
3117 int32_t expectedCount) {
3118 UErrorCode ec = U_ZERO_ERROR;
3119 int32_t i = 0, n, j, bit;
3120 int32_t seenMask = 0;
3121
3122 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3123 n = uenum_count(iter, &ec);
3124 if (!assertSuccess("count", &ec)) return -1;
3125 log_verbose("%s = [", msg);
3126 for (;; ++i) {
3127 const char* s = uenum_next(iter, NULL, &ec);
3128 if (!assertSuccess("snext", &ec) || s == NULL) break;
3129 if (i != 0) log_verbose(",");
3130 log_verbose("%s", s);
3131 /* check expected list */
3132 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3133 if ((seenMask&bit) == 0 &&
3134 uprv_strcmp(s, expected[j]) == 0) {
3135 seenMask |= bit;
3136 break;
3137 }
3138 }
3139 }
3140 log_verbose("] (%d)\n", i);
3141 assertTrue("count verified", i==n);
3142 /* did we see all expected strings? */
3143 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3144 if ((seenMask&bit)!=0) {
3145 log_verbose("Ok: \"%s\" seen\n", expected[j]);
3146 } else {
3147 log_err("FAIL: \"%s\" not seen\n", expected[j]);
3148 }
3149 }
3150 return n;
3151 }
3152
3153 /**
3154 * Test new API added for separate collation tree.
3155 */
TestSeparateTrees(void)3156 static void TestSeparateTrees(void) {
3157 UErrorCode ec = U_ZERO_ERROR;
3158 UEnumeration *e = NULL;
3159 int32_t n = -1;
3160 UBool isAvailable;
3161 char loc[256];
3162
3163 static const char* AVAIL[] = { "en", "de" };
3164
3165 static const char* KW[] = { "collation" };
3166
3167 static const char* KWVAL[] = { "phonebook", "stroke" };
3168
3169 #if !UCONFIG_NO_SERVICE
3170 e = ucol_openAvailableLocales(&ec);
3171 if (e != NULL) {
3172 assertSuccess("ucol_openAvailableLocales", &ec);
3173 assertTrue("ucol_openAvailableLocales!=0", e!=0);
3174 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
3175 (void)n; /* Suppress set but not used warnings. */
3176 /* Don't need to check n because we check list */
3177 uenum_close(e);
3178 } else {
3179 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3180 }
3181 #endif
3182
3183 e = ucol_getKeywords(&ec);
3184 if (e != NULL) {
3185 assertSuccess("ucol_getKeywords", &ec);
3186 assertTrue("ucol_getKeywords!=0", e!=0);
3187 n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
3188 /* Don't need to check n because we check list */
3189 uenum_close(e);
3190 } else {
3191 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3192 }
3193
3194 e = ucol_getKeywordValues(KW[0], &ec);
3195 if (e != NULL) {
3196 assertSuccess("ucol_getKeywordValues", &ec);
3197 assertTrue("ucol_getKeywordValues!=0", e!=0);
3198 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3199 /* Don't need to check n because we check list */
3200 uenum_close(e);
3201 } else {
3202 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3203 }
3204
3205 /* Try setting a warning before calling ucol_getKeywordValues */
3206 ec = U_USING_FALLBACK_WARNING;
3207 e = ucol_getKeywordValues(KW[0], &ec);
3208 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3209 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3210 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3211 /* Don't need to check n because we check list */
3212 uenum_close(e);
3213 }
3214
3215 /*
3216 U_DRAFT int32_t U_EXPORT2
3217 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3218 const char* locale, UBool* isAvailable,
3219 UErrorCode* status);
3220 }
3221 */
3222 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3223 &isAvailable, &ec);
3224 if (assertSuccess("getFunctionalEquivalent", &ec)) {
3225 assertEquals("getFunctionalEquivalent(de)", "root", loc);
3226 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3227 isAvailable == TRUE);
3228 }
3229
3230 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3231 &isAvailable, &ec);
3232 if (assertSuccess("getFunctionalEquivalent", &ec)) {
3233 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3234 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3235 isAvailable == FALSE);
3236 }
3237 }
3238
3239 /* supercedes TestJ784 */
TestBeforePinyin(void)3240 static void TestBeforePinyin(void) {
3241 const static char rules[] = {
3242 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3243 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3244 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3245 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3246 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3247 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3248 };
3249
3250 const static char *test[] = {
3251 "l\\u0101",
3252 "la",
3253 "l\\u0101n",
3254 "lan ",
3255 "l\\u0113",
3256 "le",
3257 "l\\u0113n",
3258 "len"
3259 };
3260
3261 const static char *test2[] = {
3262 "x\\u0101",
3263 "x\\u0100",
3264 "X\\u0101",
3265 "X\\u0100",
3266 "x\\u00E1",
3267 "x\\u00C1",
3268 "X\\u00E1",
3269 "X\\u00C1",
3270 "x\\u01CE",
3271 "x\\u01CD",
3272 "X\\u01CE",
3273 "X\\u01CD",
3274 "x\\u00E0",
3275 "x\\u00C0",
3276 "X\\u00E0",
3277 "X\\u00C0",
3278 "xa",
3279 "xA",
3280 "Xa",
3281 "XA",
3282 "x\\u0101x",
3283 "x\\u0100x",
3284 "x\\u00E1x",
3285 "x\\u00C1x",
3286 "x\\u01CEx",
3287 "x\\u01CDx",
3288 "x\\u00E0x",
3289 "x\\u00C0x",
3290 "xax",
3291 "xAx"
3292 };
3293
3294 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3295 genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3296 genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3297 genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3298 }
3299
TestBeforeTightening(void)3300 static void TestBeforeTightening(void) {
3301 static const struct {
3302 const char *rules;
3303 UErrorCode expectedStatus;
3304 } tests[] = {
3305 { "&[before 1]a<x", U_ZERO_ERROR },
3306 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3307 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3308 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3309 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3310 { "&[before 2]a<<x",U_ZERO_ERROR },
3311 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3312 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3313 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
3314 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
3315 { "&[before 3]a<<<x",U_ZERO_ERROR },
3316 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
3317 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3318 };
3319
3320 int32_t i = 0;
3321
3322 UErrorCode status = U_ZERO_ERROR;
3323 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3324 uint32_t rlen = 0;
3325
3326 UCollator *coll = NULL;
3327
3328
3329 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3330 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3331 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3332 if(status != tests[i].expectedStatus) {
3333 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3334 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3335 }
3336 ucol_close(coll);
3337 status = U_ZERO_ERROR;
3338 }
3339
3340 }
3341
3342 /*
3343 &m < a
3344 &[before 1] a < x <<< X << q <<< Q < z
3345 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3346
3347 &m < a
3348 &[before 2] a << x <<< X << q <<< Q < z
3349 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3350
3351 &m < a
3352 &[before 3] a <<< x <<< X << q <<< Q < z
3353 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3354
3355
3356 &m << a
3357 &[before 1] a < x <<< X << q <<< Q < z
3358 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3359
3360 &m << a
3361 &[before 2] a << x <<< X << q <<< Q < z
3362 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3363
3364 &m << a
3365 &[before 3] a <<< x <<< X << q <<< Q < z
3366 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3367
3368
3369 &m <<< a
3370 &[before 1] a < x <<< X << q <<< Q < z
3371 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3372
3373 &m <<< a
3374 &[before 2] a << x <<< X << q <<< Q < z
3375 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
3376
3377 &m <<< a
3378 &[before 3] a <<< x <<< X << q <<< Q < z
3379 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
3380
3381
3382 &[before 1] s < x <<< X << q <<< Q < z
3383 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3384
3385 &[before 2] s << x <<< X << q <<< Q < z
3386 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3387
3388 &[before 3] s <<< x <<< X << q <<< Q < z
3389 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3390
3391
3392 &[before 1] \u24DC < x <<< X << q <<< Q < z
3393 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3394
3395 &[before 2] \u24DC << x <<< X << q <<< Q < z
3396 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3397
3398 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3399 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
3400 */
3401
3402
3403 #if 0
3404 /* requires features not yet supported */
3405 static void TestMoreBefore(void) {
3406 static const struct {
3407 const char* rules;
3408 const char* order[16];
3409 int32_t size;
3410 } tests[] = {
3411 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3412 { "m","M","x","X","q","Q","z","a","n" }, 9},
3413 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3414 { "m","M","x","X","q","Q","a","z","n" }, 9},
3415 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3416 { "m","M","x","X","a","q","Q","z","n" }, 9},
3417 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3418 { "x","X","q","Q","z","m","M","a","n" }, 9},
3419 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3420 { "m","M","x","X","q","Q","a","z","n" }, 9},
3421 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3422 { "m","M","x","X","a","q","Q","z","n" }, 9},
3423 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3424 { "x","X","q","Q","z","n","m","a","M" }, 9},
3425 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3426 { "x","X","q","Q","m","a","M","z","n" }, 9},
3427 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3428 { "m","x","X","a","M","q","Q","z","n" }, 9},
3429 { "&[before 1] s < x <<< X << q <<< Q < z",
3430 { "r","R","x","X","q","Q","z","s","n" }, 9},
3431 { "&[before 2] s << x <<< X << q <<< Q < z",
3432 { "r","R","x","X","q","Q","s","z","n" }, 9},
3433 { "&[before 3] s <<< x <<< X << q <<< Q < z",
3434 { "r","R","x","X","s","q","Q","z","n" }, 9},
3435 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3436 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3437 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3438 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3439 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3440 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3441 };
3442
3443 int32_t i = 0;
3444
3445 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3446 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3447 }
3448 }
3449 #endif
3450
TestTailorNULL(void)3451 static void TestTailorNULL( void ) {
3452 const static char* rule = "&a <<< '\\u0000'";
3453 UErrorCode status = U_ZERO_ERROR;
3454 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3455 uint32_t rlen = 0;
3456 UChar a = 1, null = 0;
3457 UCollationResult res = UCOL_EQUAL;
3458
3459 UCollator *coll = NULL;
3460
3461
3462 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3463 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3464
3465 if(U_FAILURE(status)) {
3466 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3467 } else {
3468 res = ucol_strcoll(coll, &a, 1, &null, 1);
3469
3470 if(res != UCOL_LESS) {
3471 log_err("NULL was not tailored properly!\n");
3472 }
3473 }
3474
3475 ucol_close(coll);
3476 }
3477
3478 static void
TestUpperFirstQuaternary(void)3479 TestUpperFirstQuaternary(void)
3480 {
3481 const char* tests[] = { "B", "b", "Bb", "bB" };
3482 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3483 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3484 genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
3485 }
3486
3487 static void
TestJ4960(void)3488 TestJ4960(void)
3489 {
3490 const char* tests[] = { "\\u00e2T", "aT" };
3491 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3492 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3493 const char* tests2[] = { "a", "A" };
3494 const char* rule = "&[first tertiary ignorable]=A=a";
3495 UColAttribute att2[] = { UCOL_CASE_LEVEL };
3496 UColAttributeValue attVals2[] = { UCOL_ON };
3497 /* Test whether we correctly ignore primary ignorables on case level when */
3498 /* we have only primary & case level */
3499 genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
3500 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3501 /* and case level */
3502 genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
3503 /* Test whether completely ignorable letters have case level info (they shouldn't) */
3504 genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
3505 }
3506
3507 static void
TestJ5223(void)3508 TestJ5223(void)
3509 {
3510 static const char *test = "this is a test string";
3511 UChar ustr[256];
3512 int32_t ustr_length = u_unescape(test, ustr, 256);
3513 unsigned char sortkey[256];
3514 int32_t sortkey_length;
3515 UErrorCode status = U_ZERO_ERROR;
3516 static UCollator *coll = NULL;
3517 coll = ucol_open("root", &status);
3518 if(U_FAILURE(status)) {
3519 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3520 return;
3521 }
3522 ucol_setStrength(coll, UCOL_PRIMARY);
3523 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3524 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3525 if (U_FAILURE(status)) {
3526 log_err("Failed setting atributes\n");
3527 return;
3528 }
3529 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3530 if (sortkey_length > 256) return;
3531
3532 /* we mark the position where the null byte should be written in advance */
3533 sortkey[sortkey_length-1] = 0xAA;
3534
3535 /* we set the buffer size one byte higher than needed */
3536 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3537 sortkey_length+1);
3538
3539 /* no error occurs (for me) */
3540 if (sortkey[sortkey_length-1] == 0xAA) {
3541 log_err("Hit bug at first try\n");
3542 }
3543
3544 /* we mark the position where the null byte should be written again */
3545 sortkey[sortkey_length-1] = 0xAA;
3546
3547 /* this time we set the buffer size to the exact amount needed */
3548 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3549 sortkey_length);
3550
3551 /* now the trailing null byte is not written */
3552 if (sortkey[sortkey_length-1] == 0xAA) {
3553 log_err("Hit bug at second try\n");
3554 }
3555
3556 ucol_close(coll);
3557 }
3558
3559 /* Regression test for Thai partial sort key problem */
3560 static void
TestJ5232(void)3561 TestJ5232(void)
3562 {
3563 const static char *test[] = {
3564 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3565 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3566 };
3567
3568 genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
3569 }
3570
3571 static void
TestJ5367(void)3572 TestJ5367(void)
3573 {
3574 const static char *test[] = { "a", "y" };
3575 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3576 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3577 }
3578
3579 static void
TestVI5913(void)3580 TestVI5913(void)
3581 {
3582 UErrorCode status = U_ZERO_ERROR;
3583 int32_t i, j;
3584 UCollator *coll =NULL;
3585 uint8_t resColl[100], expColl[100];
3586 int32_t rLen, tLen, ruleLen, sLen, kLen;
3587 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/
3588 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
3589 /*
3590 * Note: Just tailoring &z<ae^ does not work as expected:
3591 * The UCA spec requires for discontiguous contractions that they
3592 * extend an *existing match* by one combining mark at a time.
3593 * Therefore, ae must be a contraction so that the builder finds
3594 * discontiguous contractions for ae^, for example with an intervening underdot.
3595 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3596 */
3597 UChar rule3[256]={
3598 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
3599 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
3600 0};
3601 static const UChar tData[][20]={
3602 {0x1EAC, 0},
3603 {0x0041, 0x0323, 0x0302, 0},
3604 {0x1EA0, 0x0302, 0},
3605 {0x00C2, 0x0323, 0},
3606 {0x1ED8, 0}, /* O with dot and circumflex */
3607 {0x1ECC, 0x0302, 0},
3608 {0x1EB7, 0},
3609 {0x1EA1, 0x0306, 0},
3610 };
3611 static const UChar tailorData[][20]={
3612 {0x1FA2, 0}, /* Omega with 3 combining marks */
3613 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3614 {0x1FF3, 0x0313, 0x0300, 0},
3615 {0x1F60, 0x0300, 0x0345, 0},
3616 {0x1F62, 0x0345, 0},
3617 {0x1FA0, 0x0300, 0},
3618 };
3619 static const UChar tailorData2[][20]={
3620 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
3621 {0x0073, 0x0323, 0x030C, 0},
3622 {0x0073, 0x030C, 0x0323, 0},
3623 };
3624 static const UChar tailorData3[][20]={
3625 {0x007a, 0}, /* z */
3626 {0x0061, 0x0065, 0}, /* a + e */
3627 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3628 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
3629 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3630 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
3631 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
3632 {0x00EA, 0}, /* e with circumflex */
3633 };
3634
3635 /* Test Vietnamese sort. */
3636 coll = ucol_open("vi", &status);
3637 if(U_FAILURE(status)) {
3638 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3639 return;
3640 }
3641 log_verbose("\n\nVI collation:");
3642 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3643 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3644 }
3645 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3646 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3647 }
3648 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3649 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3650 }
3651 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3652 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3653 }
3654
3655 for (j=0; j<8; j++) {
3656 tLen = u_strlen(tData[j]);
3657 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3658 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3659 for(i = 0; i<rLen; i++) {
3660 log_verbose(" %02X", resColl[i]);
3661 }
3662 }
3663
3664 ucol_close(coll);
3665
3666 /* Test Romanian sort. */
3667 coll = ucol_open("ro", &status);
3668 log_verbose("\n\nRO collation:");
3669 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3670 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3671 }
3672 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3673 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3674 }
3675 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3676 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3677 }
3678
3679 for (j=4; j<8; j++) {
3680 tLen = u_strlen(tData[j]);
3681 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3682 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3683 for(i = 0; i<rLen; i++) {
3684 log_verbose(" %02X", resColl[i]);
3685 }
3686 }
3687 ucol_close(coll);
3688
3689 /* Test the precomposed Greek character with 3 combining marks. */
3690 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3691 ruleLen = u_strlen(rule);
3692 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3693 if (U_FAILURE(status)) {
3694 log_err("ucol_openRules failed with %s\n", u_errorName(status));
3695 return;
3696 }
3697 sLen = u_strlen(tailorData[0]);
3698 for (j=1; j<6; j++) {
3699 tLen = u_strlen(tailorData[j]);
3700 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
3701 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3702 }
3703 }
3704 /* Test getSortKey. */
3705 tLen = u_strlen(tailorData[0]);
3706 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3707 for (j=0; j<6; j++) {
3708 tLen = u_strlen(tailorData[j]);
3709 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3710 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3711 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3712 for(i = 0; i<rLen; i++) {
3713 log_err(" %02X", resColl[i]);
3714 }
3715 }
3716 }
3717 ucol_close(coll);
3718
3719 log_verbose("\n\nTailoring test for s with caron:");
3720 ruleLen = u_strlen(rule2);
3721 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3722 tLen = u_strlen(tailorData2[0]);
3723 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3724 for (j=1; j<3; j++) {
3725 tLen = u_strlen(tailorData2[j]);
3726 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3727 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3728 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3729 for(i = 0; i<rLen; i++) {
3730 log_err(" %02X", resColl[i]);
3731 }
3732 }
3733 }
3734 ucol_close(coll);
3735
3736 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3737 ruleLen = u_strlen(rule3);
3738 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3739 tLen = u_strlen(tailorData3[3]);
3740 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3741 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3742 for(i = 0; i<kLen; i++) {
3743 log_verbose(" %02X", expColl[i]);
3744 }
3745 for (j=4; j<6; j++) {
3746 tLen = u_strlen(tailorData3[j]);
3747 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3748
3749 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3750 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3751 for(i = 0; i<rLen; i++) {
3752 log_err(" %02X", resColl[i]);
3753 }
3754 }
3755
3756 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3757 for(i = 0; i<rLen; i++) {
3758 log_verbose(" %02X", resColl[i]);
3759 }
3760 }
3761 ucol_close(coll);
3762 }
3763
3764 static void
TestTailor6179(void)3765 TestTailor6179(void)
3766 {
3767 UErrorCode status = U_ZERO_ERROR;
3768 int32_t i;
3769 UCollator *coll =NULL;
3770 uint8_t resColl[100];
3771 int32_t rLen, tLen, ruleLen;
3772 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
3773 static const UChar rule1[]={
3774 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3775 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3776 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3777 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3778 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3779 static const UChar rule2[]={
3780 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3781 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3782 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3783 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3784 0x3C,0x3C,0x20,0x62,0};
3785
3786 static const UChar tData1[][4]={
3787 {0x61, 0},
3788 {0x62, 0},
3789 { 0xFDD0,0x009E, 0}
3790 };
3791 static const UChar tData2[][4]={
3792 {0x61, 0},
3793 {0x62, 0},
3794 { 0xFDD0,0x009E, 0}
3795 };
3796
3797 /*
3798 * These values from FractionalUCA.txt will change,
3799 * and need to be updated here.
3800 * TODO: Make this not check for particular sort keys.
3801 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3802 */
3803 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3804 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3805 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3806 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3807
3808 UParseError parseError;
3809
3810 /* Test [Last Primary ignorable] */
3811
3812 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
3813 ruleLen = u_strlen(rule1);
3814 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3815 if (U_FAILURE(status)) {
3816 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3817 return;
3818 }
3819 tLen = u_strlen(tData1[0]);
3820 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3821 if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3822 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
3823 for(i = 0; i<rLen; i++) {
3824 log_err(" %02X", resColl[i]);
3825 }
3826 log_err("\n");
3827 }
3828 tLen = u_strlen(tData1[1]);
3829 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3830 if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3831 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
3832 for(i = 0; i<rLen; i++) {
3833 log_err(" %02X", resColl[i]);
3834 }
3835 log_err("\n");
3836 }
3837 ucol_close(coll);
3838
3839
3840 /* Test [Last Secondary ignorable] */
3841 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
3842 ruleLen = u_strlen(rule2);
3843 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3844 if (U_FAILURE(status)) {
3845 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3846 log_info(" offset=%d \"%s\" | \"%s\"\n",
3847 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3848 return;
3849 }
3850 tLen = u_strlen(tData2[0]);
3851 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3852 if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3853 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
3854 for(i = 0; i<rLen; i++) {
3855 log_err(" %02X", resColl[i]);
3856 }
3857 log_err("\n");
3858 }
3859 tLen = u_strlen(tData2[1]);
3860 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3861 if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3862 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
3863 for(i = 0; i<rLen; i++) {
3864 log_err(" %02X", resColl[i]);
3865 }
3866 log_err("\n");
3867 }
3868 ucol_close(coll);
3869 }
3870
3871 static void
TestUCAPrecontext(void)3872 TestUCAPrecontext(void)
3873 {
3874 UErrorCode status = U_ZERO_ERROR;
3875 int32_t i, j;
3876 UCollator *coll =NULL;
3877 uint8_t resColl[100], prevColl[100];
3878 int32_t rLen, tLen, ruleLen;
3879 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3880 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3881 /* & l middle-dot << a a is an expansion. */
3882
3883 UChar tData1[][20]={
3884 { 0xb7, 0}, /* standalone middle dot(0xb7) */
3885 { 0x387, 0}, /* standalone middle dot(0x387) */
3886 { 0x61, 0}, /* a */
3887 { 0x6C, 0}, /* l */
3888 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
3889 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
3890 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3891 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
3892 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3893 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
3894 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
3895 };
3896
3897 log_verbose("\n\nEN collation:");
3898 coll = ucol_open("en", &status);
3899 if (U_FAILURE(status)) {
3900 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3901 return;
3902 }
3903 for (j=0; j<11; j++) {
3904 tLen = u_strlen(tData1[j]);
3905 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3906 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3907 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3908 j, tData1[j]);
3909 }
3910 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3911 for(i = 0; i<rLen; i++) {
3912 log_verbose(" %02X", resColl[i]);
3913 }
3914 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3915 }
3916 ucol_close(coll);
3917
3918
3919 log_verbose("\n\nJA collation:");
3920 coll = ucol_open("ja", &status);
3921 if (U_FAILURE(status)) {
3922 log_err("Tailoring test: &z <<a|- failed!");
3923 return;
3924 }
3925 for (j=0; j<11; j++) {
3926 tLen = u_strlen(tData1[j]);
3927 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3928 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3929 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3930 j, tData1[j]);
3931 }
3932 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3933 for(i = 0; i<rLen; i++) {
3934 log_verbose(" %02X", resColl[i]);
3935 }
3936 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3937 }
3938 ucol_close(coll);
3939
3940
3941 log_verbose("\n\nTailoring test: & middle dot < a ");
3942 ruleLen = u_strlen(rule1);
3943 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3944 if (U_FAILURE(status)) {
3945 log_err("Tailoring test: & middle dot < a failed!");
3946 return;
3947 }
3948 for (j=0; j<11; j++) {
3949 tLen = u_strlen(tData1[j]);
3950 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3951 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3952 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3953 j, tData1[j]);
3954 }
3955 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3956 for(i = 0; i<rLen; i++) {
3957 log_verbose(" %02X", resColl[i]);
3958 }
3959 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3960 }
3961 ucol_close(coll);
3962
3963
3964 log_verbose("\n\nTailoring test: & l middle-dot << a ");
3965 ruleLen = u_strlen(rule2);
3966 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3967 if (U_FAILURE(status)) {
3968 log_err("Tailoring test: & l middle-dot << a failed!");
3969 return;
3970 }
3971 for (j=0; j<11; j++) {
3972 tLen = u_strlen(tData1[j]);
3973 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3974 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3975 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3976 j, tData1[j]);
3977 }
3978 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3979 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3980 j, tData1[j]);
3981 }
3982 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3983 for(i = 0; i<rLen; i++) {
3984 log_verbose(" %02X", resColl[i]);
3985 }
3986 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3987 }
3988 ucol_close(coll);
3989 }
3990
3991 static void
TestOutOfBuffer5468(void)3992 TestOutOfBuffer5468(void)
3993 {
3994 static const char *test = "\\u4e00";
3995 UChar ustr[256];
3996 int32_t ustr_length = u_unescape(test, ustr, 256);
3997 unsigned char shortKeyBuf[1];
3998 int32_t sortkey_length;
3999 UErrorCode status = U_ZERO_ERROR;
4000 static UCollator *coll = NULL;
4001
4002 coll = ucol_open("root", &status);
4003 if(U_FAILURE(status)) {
4004 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4005 return;
4006 }
4007 ucol_setStrength(coll, UCOL_PRIMARY);
4008 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4009 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4010 if (U_FAILURE(status)) {
4011 log_err("Failed setting atributes\n");
4012 return;
4013 }
4014
4015 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4016 if (sortkey_length != 4) {
4017 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
4018 }
4019 log_verbose("length of sortKey is %d", sortkey_length);
4020 ucol_close(coll);
4021 }
4022
4023 #define TSKC_DATA_SIZE 5
4024 #define TSKC_BUF_SIZE 50
4025 static void
TestSortKeyConsistency(void)4026 TestSortKeyConsistency(void)
4027 {
4028 UErrorCode icuRC = U_ZERO_ERROR;
4029 UCollator* ucol;
4030 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4031
4032 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4033 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4034 int32_t i, j, i2;
4035
4036 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4037 if (U_FAILURE(icuRC))
4038 {
4039 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4040 return;
4041 }
4042
4043 for (i = 0; i < TSKC_DATA_SIZE; i++)
4044 {
4045 UCharIterator uiter;
4046 uint32_t state[2] = { 0, 0 };
4047 int32_t dataLen = i+1;
4048 for (j=0; j<TSKC_BUF_SIZE; j++)
4049 bufFull[i][j] = bufPart[i][j] = 0;
4050
4051 /* Full sort key */
4052 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4053
4054 /* Partial sort key */
4055 uiter_setString(&uiter, data, dataLen);
4056 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4057 if (U_FAILURE(icuRC))
4058 {
4059 log_err("ucol_nextSortKeyPart failed\n");
4060 ucol_close(ucol);
4061 return;
4062 }
4063
4064 for (i2=0; i2<i; i2++)
4065 {
4066 UBool fullMatch = TRUE;
4067 UBool partMatch = TRUE;
4068 for (j=0; j<TSKC_BUF_SIZE; j++)
4069 {
4070 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4071 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4072 }
4073 if (fullMatch != partMatch) {
4074 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4075 : "partial key was consistent, but full key changed\n");
4076 ucol_close(ucol);
4077 return;
4078 }
4079 }
4080 }
4081
4082 /*=============================================*/
4083 ucol_close(ucol);
4084 }
4085
4086 /* ticket: 6101 */
TestCroatianSortKey(void)4087 static void TestCroatianSortKey(void) {
4088 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4089 UErrorCode status = U_ZERO_ERROR;
4090 UCollator *ucol;
4091 UCharIterator iter;
4092
4093 static const UChar text[] = { 0x0044, 0xD81A };
4094
4095 size_t length = UPRV_LENGTHOF(text);
4096
4097 uint8_t textSortKey[32];
4098 size_t lenSortKey = 32;
4099 size_t actualSortKeyLen;
4100 uint32_t uStateInfo[2] = { 0, 0 };
4101
4102 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4103 if (U_FAILURE(status)) {
4104 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4105 return;
4106 }
4107
4108 uiter_setString(&iter, text, length);
4109
4110 actualSortKeyLen = ucol_nextSortKeyPart(
4111 ucol, &iter, (uint32_t*)uStateInfo,
4112 textSortKey, lenSortKey, &status
4113 );
4114
4115 if (actualSortKeyLen == lenSortKey) {
4116 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4117 }
4118
4119 ucol_close(ucol);
4120 }
4121
4122 /* ticket: 6140 */
4123 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4124 * they are both Hiragana and Katakana
4125 */
4126 #define SORTKEYLEN 50
TestHiragana(void)4127 static void TestHiragana(void) {
4128 UErrorCode status = U_ZERO_ERROR;
4129 UCollator* ucol;
4130 UCollationResult strcollresult;
4131 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4132 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4133 int32_t data1Len = UPRV_LENGTHOF(data1);
4134 int32_t data2Len = UPRV_LENGTHOF(data2);
4135 int32_t i, j;
4136 uint8_t sortKey1[SORTKEYLEN];
4137 uint8_t sortKey2[SORTKEYLEN];
4138
4139 UCharIterator uiter1;
4140 UCharIterator uiter2;
4141 uint32_t state1[2] = { 0, 0 };
4142 uint32_t state2[2] = { 0, 0 };
4143 int32_t keySize1;
4144 int32_t keySize2;
4145
4146 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4147 &status);
4148 if (U_FAILURE(status)) {
4149 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4150 return;
4151 }
4152
4153 /* Start of full sort keys */
4154 /* Full sort key1 */
4155 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4156 /* Full sort key2 */
4157 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4158 if (keySize1 == keySize2) {
4159 for (i = 0; i < keySize1; i++) {
4160 if (sortKey1[i] != sortKey2[i]) {
4161 log_err("Full sort keys are different. Should be equal.");
4162 }
4163 }
4164 } else {
4165 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4166 }
4167 /* End of full sort keys */
4168
4169 /* Start of partial sort keys */
4170 /* Partial sort key1 */
4171 uiter_setString(&uiter1, data1, data1Len);
4172 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4173 /* Partial sort key2 */
4174 uiter_setString(&uiter2, data2, data2Len);
4175 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4176 if (U_SUCCESS(status) && keySize1 == keySize2) {
4177 for (j = 0; j < keySize1; j++) {
4178 if (sortKey1[j] != sortKey2[j]) {
4179 log_err("Partial sort keys are different. Should be equal");
4180 }
4181 }
4182 } else {
4183 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4184 }
4185 /* End of partial sort keys */
4186
4187 /* Start of strcoll */
4188 /* Use ucol_strcoll() to determine ordering */
4189 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4190 if (strcollresult != UCOL_EQUAL) {
4191 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4192 }
4193
4194 ucol_close(ucol);
4195 }
4196
4197 /* Convenient struct for running collation tests */
4198 typedef struct {
4199 const UChar source[MAX_TOKEN_LEN]; /* String on left */
4200 const UChar target[MAX_TOKEN_LEN]; /* String on right */
4201 UCollationResult result; /* -1, 0 or +1, depending on collation */
4202 } OneTestCase;
4203
4204 /*
4205 * Utility function to test one collation test case.
4206 * @param testcases Array of test cases.
4207 * @param n_testcases Size of the array testcases.
4208 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
4209 * @param n_rules Size of the array str_rules.
4210 */
doTestOneTestCase(const OneTestCase testcases[],int n_testcases,const char * str_rules[],int n_rules)4211 static void doTestOneTestCase(const OneTestCase testcases[],
4212 int n_testcases,
4213 const char* str_rules[],
4214 int n_rules)
4215 {
4216 int rule_no, testcase_no;
4217 UChar rule[500];
4218 int32_t length = 0;
4219 UErrorCode status = U_ZERO_ERROR;
4220 UParseError parse_error;
4221 UCollator *myCollation;
4222
4223 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4224
4225 length = u_unescape(str_rules[rule_no], rule, 500);
4226 if (length == 0) {
4227 log_err("ERROR: The rule cannot be unescaped: %s\n");
4228 return;
4229 }
4230 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4231 if(U_FAILURE(status)){
4232 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4233 log_info(" offset=%d \"%s\" | \"%s\"\n",
4234 parse_error.offset,
4235 aescstrdup(parse_error.preContext, -1),
4236 aescstrdup(parse_error.postContext, -1));
4237 return;
4238 }
4239 log_verbose("Testing the <<* syntax\n");
4240 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4241 ucol_setStrength(myCollation, UCOL_TERTIARY);
4242 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4243 doTest(myCollation,
4244 testcases[testcase_no].source,
4245 testcases[testcase_no].target,
4246 testcases[testcase_no].result
4247 );
4248 }
4249 ucol_close(myCollation);
4250 }
4251 }
4252
4253 const static OneTestCase rangeTestcases[] = {
4254 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */
4255 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */
4256 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */
4257
4258 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */
4259 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */
4260 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */
4261 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */
4262 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */
4263
4264 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */
4265 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */
4266 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */
4267 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */
4268
4269 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */
4270 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */
4271 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */
4272 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */
4273 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */
4274 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */
4275 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */
4276 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */
4277 };
4278
4279 static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
4280
4281 const static OneTestCase rangeTestcasesSupplemental[] = {
4282 { {0x4e00}, {0xfffb}, UCOL_LESS }, /* U+4E00 < U+FFFB */
4283 { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFB < U+10000 */
4284 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
4285 { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+4E00 < U+10001 */
4286 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
4287 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
4288 { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+4E00 < U+10001 */
4289 };
4290
4291 static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
4292
4293 const static OneTestCase rangeTestcasesQwerty[] = {
4294 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */
4295 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */
4296
4297 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */
4298 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */
4299
4300 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */
4301 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */
4302
4303 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */
4304 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */
4305
4306 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4307 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */
4308 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4309 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */
4310 };
4311
4312 static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
4313
TestSameStrengthList(void)4314 static void TestSameStrengthList(void)
4315 {
4316 const char* strRules[] = {
4317 /* Normal */
4318 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
4319
4320 /* Lists */
4321 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4322 };
4323 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4324 }
4325
TestSameStrengthListQuoted(void)4326 static void TestSameStrengthListQuoted(void)
4327 {
4328 const char* strRules[] = {
4329 /* Lists with quoted characters */
4330 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4331 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4332
4333 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4334 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4335
4336 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
4337 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4338 };
4339 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4340 }
4341
TestSameStrengthListSupplemental(void)4342 static void TestSameStrengthListSupplemental(void)
4343 {
4344 const char* strRules[] = {
4345 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4346 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4347 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4348 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4349 };
4350 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4351 }
4352
TestSameStrengthListQwerty(void)4353 static void TestSameStrengthListQwerty(void)
4354 {
4355 const char* strRules[] = {
4356 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4357 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4358 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4359 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4360 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4361
4362 /* Quoted characters also will work if two quoted characters are not consecutive. */
4363 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4364
4365 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4366 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4367
4368 };
4369 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4370 }
4371
TestSameStrengthListQuotedQwerty(void)4372 static void TestSameStrengthListQuotedQwerty(void)
4373 {
4374 const char* strRules[] = {
4375 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4376 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4377 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
4378
4379 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4380 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4381 };
4382 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4383 }
4384
TestSameStrengthListRanges(void)4385 static void TestSameStrengthListRanges(void)
4386 {
4387 const char* strRules[] = {
4388 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4389 };
4390 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4391 }
4392
TestSameStrengthListSupplementalRanges(void)4393 static void TestSameStrengthListSupplementalRanges(void)
4394 {
4395 const char* strRules[] = {
4396 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4397 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4398 };
4399 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4400 }
4401
TestSpecialCharacters(void)4402 static void TestSpecialCharacters(void)
4403 {
4404 const char* strRules[] = {
4405 /* Normal */
4406 "&';'<'+'<','<'-'<'&'<'*'",
4407
4408 /* List */
4409 "&';'<*'+,-&*'",
4410
4411 /* Range */
4412 "&';'<*'+'-'-&*'",
4413 };
4414
4415 const static OneTestCase specialCharacterStrings[] = {
4416 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */
4417 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */
4418 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */
4419 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */
4420 };
4421 doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4422 }
4423
TestPrivateUseCharacters(void)4424 static void TestPrivateUseCharacters(void)
4425 {
4426 const char* strRules[] = {
4427 /* Normal */
4428 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4429 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4430 };
4431
4432 const static OneTestCase privateUseCharacterStrings[] = {
4433 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4434 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4435 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4436 { {0xe2da}, {0xe2db}, UCOL_LESS },
4437 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4438 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4439 };
4440 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4441 }
4442
TestPrivateUseCharactersInList(void)4443 static void TestPrivateUseCharactersInList(void)
4444 {
4445 const char* strRules[] = {
4446 /* List */
4447 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4448 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4449 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4450 };
4451
4452 const static OneTestCase privateUseCharacterStrings[] = {
4453 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4454 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4455 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4456 { {0xe2da}, {0xe2db}, UCOL_LESS },
4457 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4458 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4459 };
4460 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4461 }
4462
TestPrivateUseCharactersInRange(void)4463 static void TestPrivateUseCharactersInRange(void)
4464 {
4465 const char* strRules[] = {
4466 /* Range */
4467 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4468 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4469 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4470 };
4471
4472 const static OneTestCase privateUseCharacterStrings[] = {
4473 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4474 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4475 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4476 { {0xe2da}, {0xe2db}, UCOL_LESS },
4477 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4478 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4479 };
4480 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4481 }
4482
TestInvalidListsAndRanges(void)4483 static void TestInvalidListsAndRanges(void)
4484 {
4485 const char* invalidRules[] = {
4486 /* Range not in starred expression */
4487 "&\\ufffe<\\uffff-\\U00010002",
4488
4489 /* Range without start */
4490 "&a<*-c",
4491
4492 /* Range without end */
4493 "&a<*b-",
4494
4495 /* More than one hyphen */
4496 "&a<*b-g-l",
4497
4498 /* Range in the wrong order */
4499 "&a<*k-b",
4500
4501 };
4502
4503 UChar rule[500];
4504 UErrorCode status = U_ZERO_ERROR;
4505 UParseError parse_error;
4506 int n_rules = UPRV_LENGTHOF(invalidRules);
4507 int rule_no;
4508 int length;
4509 UCollator *myCollation;
4510
4511 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4512
4513 length = u_unescape(invalidRules[rule_no], rule, 500);
4514 if (length == 0) {
4515 log_err("ERROR: The rule cannot be unescaped: %s\n");
4516 return;
4517 }
4518 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4519 (void)myCollation; /* Suppress set but not used warning. */
4520 if(!U_FAILURE(status)){
4521 log_err("ERROR: Could not cause a failure as expected: \n");
4522 }
4523 status = U_ZERO_ERROR;
4524 }
4525 }
4526
4527 /*
4528 * This test ensures that characters placed before a character in a different script have the same lead byte
4529 * in their collation key before and after script reordering.
4530 */
TestBeforeRuleWithScriptReordering(void)4531 static void TestBeforeRuleWithScriptReordering(void)
4532 {
4533 UParseError error;
4534 UErrorCode status = U_ZERO_ERROR;
4535 UCollator *myCollation;
4536 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4537 UChar rules[500];
4538 uint32_t rulesLength = 0;
4539 int32_t reorderCodes[1] = {USCRIPT_GREEK};
4540 UCollationResult collResult;
4541
4542 uint8_t baseKey[256];
4543 uint32_t baseKeyLength;
4544 uint8_t beforeKey[256];
4545 uint32_t beforeKeyLength;
4546
4547 UChar base[] = { 0x03b1 }; /* base */
4548 int32_t baseLen = UPRV_LENGTHOF(base);
4549
4550 UChar before[] = { 0x0e01 }; /* ko kai */
4551 int32_t beforeLen = UPRV_LENGTHOF(before);
4552
4553 /*UChar *data[] = { before, base };
4554 genericRulesStarter(srules, data, 2);*/
4555
4556 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4557
4558 (void)beforeKeyLength; /* Suppress set but not used warnings. */
4559 (void)baseKeyLength;
4560
4561 /* build collator */
4562 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4563
4564 rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
4565 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4566 if(U_FAILURE(status)) {
4567 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4568 return;
4569 }
4570
4571 /* check collation results - before rule applied but not script reordering */
4572 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4573 if (collResult != UCOL_GREATER) {
4574 log_err("Collation result not correct before script reordering = %d\n", collResult);
4575 }
4576
4577 /* check the lead byte of the collation keys before script reordering */
4578 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4579 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4580 if (baseKey[0] != beforeKey[0]) {
4581 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4582 }
4583
4584 /* reorder the scripts */
4585 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4586 if(U_FAILURE(status)) {
4587 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4588 return;
4589 }
4590
4591 /* check collation results - before rule applied and after script reordering */
4592 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4593 if (collResult != UCOL_GREATER) {
4594 log_err("Collation result not correct after script reordering = %d\n", collResult);
4595 }
4596
4597 /* check the lead byte of the collation keys after script reordering */
4598 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4599 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4600 if (baseKey[0] != beforeKey[0]) {
4601 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4602 }
4603
4604 ucol_close(myCollation);
4605 }
4606
4607 /*
4608 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4609 */
TestNonLeadBytesDuringCollationReordering(void)4610 static void TestNonLeadBytesDuringCollationReordering(void)
4611 {
4612 UErrorCode status = U_ZERO_ERROR;
4613 UCollator *myCollation;
4614 int32_t reorderCodes[1] = {USCRIPT_GREEK};
4615
4616 uint8_t baseKey[256];
4617 uint32_t baseKeyLength;
4618 uint8_t reorderKey[256];
4619 uint32_t reorderKeyLength;
4620
4621 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4622
4623 uint32_t i;
4624
4625
4626 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4627
4628 /* build collator tertiary */
4629 myCollation = ucol_open("", &status);
4630 ucol_setStrength(myCollation, UCOL_TERTIARY);
4631 if(U_FAILURE(status)) {
4632 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4633 return;
4634 }
4635 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4636
4637 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4638 if(U_FAILURE(status)) {
4639 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4640 return;
4641 }
4642 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4643
4644 if (baseKeyLength != reorderKeyLength) {
4645 log_err("Key lengths not the same during reordering.\n");
4646 return;
4647 }
4648
4649 for (i = 1; i < baseKeyLength; i++) {
4650 if (baseKey[i] != reorderKey[i]) {
4651 log_err("Collation key bytes not the same at position %d.\n", i);
4652 return;
4653 }
4654 }
4655 ucol_close(myCollation);
4656
4657 /* build collator quaternary */
4658 myCollation = ucol_open("", &status);
4659 ucol_setStrength(myCollation, UCOL_QUATERNARY);
4660 if(U_FAILURE(status)) {
4661 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4662 return;
4663 }
4664 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4665
4666 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4667 if(U_FAILURE(status)) {
4668 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4669 return;
4670 }
4671 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4672
4673 if (baseKeyLength != reorderKeyLength) {
4674 log_err("Key lengths not the same during reordering.\n");
4675 return;
4676 }
4677
4678 for (i = 1; i < baseKeyLength; i++) {
4679 if (baseKey[i] != reorderKey[i]) {
4680 log_err("Collation key bytes not the same at position %d.\n", i);
4681 return;
4682 }
4683 }
4684 ucol_close(myCollation);
4685 }
4686
4687 /*
4688 * Test reordering API.
4689 */
TestReorderingAPI(void)4690 static void TestReorderingAPI(void)
4691 {
4692 UErrorCode status = U_ZERO_ERROR;
4693 UCollator *myCollation;
4694 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4695 int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4696 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4697 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4698 UCollationResult collResult;
4699 int32_t retrievedReorderCodesLength;
4700 int32_t retrievedReorderCodes[10];
4701 UChar greekString[] = { 0x03b1 };
4702 UChar punctuationString[] = { 0x203e };
4703 int loopIndex;
4704
4705 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4706
4707 /* build collator tertiary */
4708 myCollation = ucol_open("", &status);
4709 ucol_setStrength(myCollation, UCOL_TERTIARY);
4710 if(U_FAILURE(status)) {
4711 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4712 return;
4713 }
4714
4715 /* set the reorderding */
4716 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4717 if (U_FAILURE(status)) {
4718 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4719 return;
4720 }
4721
4722 /* get the reordering */
4723 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4724 if (status != U_BUFFER_OVERFLOW_ERROR) {
4725 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4726 return;
4727 }
4728 status = U_ZERO_ERROR;
4729 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4730 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4731 return;
4732 }
4733 /* now let's really get it */
4734 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4735 if (U_FAILURE(status)) {
4736 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4737 return;
4738 }
4739 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4740 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4741 return;
4742 }
4743 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4744 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4745 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4746 return;
4747 }
4748 }
4749 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4750 if (collResult != UCOL_LESS) {
4751 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4752 return;
4753 }
4754
4755 /* clear the reordering */
4756 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4757 if (U_FAILURE(status)) {
4758 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4759 return;
4760 }
4761
4762 /* get the reordering again */
4763 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4764 if (retrievedReorderCodesLength != 0) {
4765 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4766 return;
4767 }
4768
4769 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4770 if (collResult != UCOL_GREATER) {
4771 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4772 return;
4773 }
4774
4775 /* clear the reordering using [NONE] */
4776 ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4777 if (U_FAILURE(status)) {
4778 log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4779 return;
4780 }
4781
4782 /* get the reordering again */
4783 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4784 if (retrievedReorderCodesLength != 0) {
4785 log_err_status(status,
4786 "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4787 retrievedReorderCodesLength);
4788 return;
4789 }
4790
4791 /* test for error condition on duplicate reorder codes */
4792 ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4793 if (!U_FAILURE(status)) {
4794 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4795 return;
4796 }
4797
4798 status = U_ZERO_ERROR;
4799 /* test for reorder codes after a reset code */
4800 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4801 if (!U_FAILURE(status)) {
4802 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4803 return;
4804 }
4805
4806 ucol_close(myCollation);
4807 }
4808
4809 /*
4810 * Test reordering API.
4811 */
TestReorderingAPIWithRuleCreatedCollator(void)4812 static void TestReorderingAPIWithRuleCreatedCollator(void)
4813 {
4814 UErrorCode status = U_ZERO_ERROR;
4815 UCollator *myCollation;
4816 UChar rules[90];
4817 static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4818 static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4819 static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4820 UCollationResult collResult;
4821 int32_t retrievedReorderCodesLength;
4822 int32_t retrievedReorderCodes[10];
4823 static const UChar greekString[] = { 0x03b1 };
4824 static const UChar punctuationString[] = { 0x203e };
4825 static const UChar hanString[] = { 0x65E5, 0x672C };
4826 int loopIndex;
4827
4828 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4829
4830 /* build collator from rules */
4831 u_uastrcpy(rules, "[reorder Hani Grek]");
4832 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4833 if(U_FAILURE(status)) {
4834 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4835 return;
4836 }
4837
4838 /* get the reordering */
4839 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4840 if (U_FAILURE(status)) {
4841 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4842 return;
4843 }
4844 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4845 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4846 return;
4847 }
4848 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4849 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4850 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4851 return;
4852 }
4853 }
4854 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4855 if (collResult != UCOL_GREATER) {
4856 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4857 return;
4858 }
4859
4860 /* set the reordering */
4861 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4862 if (U_FAILURE(status)) {
4863 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4864 return;
4865 }
4866
4867 /* get the reordering */
4868 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4869 if (status != U_BUFFER_OVERFLOW_ERROR) {
4870 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4871 return;
4872 }
4873 status = U_ZERO_ERROR;
4874 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4875 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4876 return;
4877 }
4878 /* now let's really get it */
4879 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4880 if (U_FAILURE(status)) {
4881 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4882 return;
4883 }
4884 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4885 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4886 return;
4887 }
4888 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4889 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4890 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4891 return;
4892 }
4893 }
4894 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4895 if (collResult != UCOL_LESS) {
4896 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4897 return;
4898 }
4899
4900 /* clear the reordering */
4901 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4902 if (U_FAILURE(status)) {
4903 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4904 return;
4905 }
4906
4907 /* get the reordering again */
4908 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4909 if (retrievedReorderCodesLength != 0) {
4910 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4911 return;
4912 }
4913
4914 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4915 if (collResult != UCOL_GREATER) {
4916 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4917 return;
4918 }
4919
4920 /* reset the reordering */
4921 ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4922 if (U_FAILURE(status)) {
4923 log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4924 return;
4925 }
4926 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4927 if (U_FAILURE(status)) {
4928 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4929 return;
4930 }
4931 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4932 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4933 return;
4934 }
4935 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4936 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4937 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4938 return;
4939 }
4940 }
4941
4942 ucol_close(myCollation);
4943 }
4944
containsExpectedScript(const int32_t scripts[],int32_t length,int32_t expectedScript)4945 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4946 int32_t i;
4947 for (i = 0; i < length; ++i) {
4948 if (expectedScript == scripts[i]) { return TRUE; }
4949 }
4950 return FALSE;
4951 }
4952
TestEquivalentReorderingScripts(void)4953 static void TestEquivalentReorderingScripts(void) {
4954 // Beginning with ICU 55, collation reordering moves single scripts
4955 // rather than groups of scripts,
4956 // except where scripts share a range and sort primary-equal.
4957 UErrorCode status = U_ZERO_ERROR;
4958 int32_t equivalentScripts[100];
4959 int32_t length;
4960 int i;
4961 int32_t prevScript;
4962 /* These scripts are expected to be equivalent. */
4963 static const int32_t expectedScripts[] = {
4964 USCRIPT_HIRAGANA,
4965 USCRIPT_KATAKANA,
4966 USCRIPT_KATAKANA_OR_HIRAGANA
4967 };
4968
4969 equivalentScripts[0] = 0;
4970 length = ucol_getEquivalentReorderCodes(
4971 USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4972 if (U_FAILURE(status)) {
4973 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4974 return;
4975 }
4976 if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4977 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4978 "length expected 1, was = %d; expected [%d] was [%d]\n",
4979 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4980 }
4981
4982 length = ucol_getEquivalentReorderCodes(
4983 USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4984 if (U_FAILURE(status)) {
4985 log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4986 return;
4987 }
4988 if (length != UPRV_LENGTHOF(expectedScripts)) {
4989 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4990 "expected %d, was = %d\n",
4991 UPRV_LENGTHOF(expectedScripts), length);
4992 }
4993 prevScript = -1;
4994 for (i = 0; i < length; ++i) {
4995 int32_t script = equivalentScripts[i];
4996 if (script <= prevScript) {
4997 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4998 }
4999 prevScript = script;
5000 }
5001 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5002 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5003 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5004 expectedScripts[i]);
5005 }
5006 }
5007
5008 length = ucol_getEquivalentReorderCodes(
5009 USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5010 if (U_FAILURE(status)) {
5011 log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5012 return;
5013 }
5014 if (length != UPRV_LENGTHOF(expectedScripts)) {
5015 log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5016 "expected %d, was = %d\n",
5017 UPRV_LENGTHOF(expectedScripts), length);
5018 }
5019 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5020 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5021 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5022 expectedScripts[i]);
5023 }
5024 }
5025
5026 length = ucol_getEquivalentReorderCodes(
5027 USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5028 if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
5029 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5030 "expected %d, was = %d\n",
5031 UPRV_LENGTHOF(expectedScripts), length);
5032 }
5033
5034 length = ucol_getEquivalentReorderCodes(
5035 USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5036 if (U_FAILURE(status) || length != 3) {
5037 log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5038 "expected 3, was = %d\n", length);
5039 }
5040 length = ucol_getEquivalentReorderCodes(
5041 USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5042 if (U_FAILURE(status) || length != 3) {
5043 log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5044 "expected 3, was = %d\n", length);
5045 }
5046 length = ucol_getEquivalentReorderCodes(
5047 USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5048 if (U_FAILURE(status) || length != 3) {
5049 log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5050 "expected 3, was = %d\n", length);
5051 }
5052
5053 length = ucol_getEquivalentReorderCodes(
5054 USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5055 if (U_FAILURE(status) || length != 2) {
5056 log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5057 "expected 2, was = %d\n", length);
5058 }
5059 length = ucol_getEquivalentReorderCodes(
5060 USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5061 if (U_FAILURE(status) || length != 2) {
5062 log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5063 "expected 2, was = %d\n", length);
5064 }
5065 }
5066
TestReorderingAcrossCloning(void)5067 static void TestReorderingAcrossCloning(void)
5068 {
5069 UErrorCode status = U_ZERO_ERROR;
5070 UCollator *myCollation;
5071 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5072 UCollator *clonedCollation;
5073 int32_t retrievedReorderCodesLength;
5074 int32_t retrievedReorderCodes[10];
5075 int loopIndex;
5076
5077 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5078
5079 /* build collator tertiary */
5080 myCollation = ucol_open("", &status);
5081 ucol_setStrength(myCollation, UCOL_TERTIARY);
5082 if(U_FAILURE(status)) {
5083 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5084 return;
5085 }
5086
5087 /* set the reorderding */
5088 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5089 if (U_FAILURE(status)) {
5090 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5091 return;
5092 }
5093
5094 /* clone the collator */
5095 clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5096 if (U_FAILURE(status)) {
5097 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5098 return;
5099 }
5100
5101 /* get the reordering */
5102 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
5103 if (U_FAILURE(status)) {
5104 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5105 return;
5106 }
5107 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5108 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
5109 return;
5110 }
5111 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5112 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5113 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5114 return;
5115 }
5116 }
5117
5118 /*uprv_free(buffer);*/
5119 ucol_close(myCollation);
5120 ucol_close(clonedCollation);
5121 }
5122
5123 /*
5124 * Utility function to test one collation reordering test case set.
5125 * @param testcases Array of test cases.
5126 * @param n_testcases Size of the array testcases.
5127 * @param reorderTokens Array of reordering codes.
5128 * @param reorderTokensLen Size of the array reorderTokens.
5129 */
doTestOneReorderingAPITestCase(const OneTestCase testCases[],uint32_t testCasesLen,const int32_t reorderTokens[],int32_t reorderTokensLen)5130 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5131 {
5132 uint32_t testCaseNum;
5133 UErrorCode status = U_ZERO_ERROR;
5134 UCollator *myCollation;
5135
5136 myCollation = ucol_open("", &status);
5137 if (U_FAILURE(status)) {
5138 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5139 return;
5140 }
5141 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5142 if(U_FAILURE(status)) {
5143 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5144 return;
5145 }
5146
5147 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5148 doTest(myCollation,
5149 testCases[testCaseNum].source,
5150 testCases[testCaseNum].target,
5151 testCases[testCaseNum].result
5152 );
5153 }
5154 ucol_close(myCollation);
5155 }
5156
TestGreekFirstReorder(void)5157 static void TestGreekFirstReorder(void)
5158 {
5159 const char* strRules[] = {
5160 "[reorder Grek]"
5161 };
5162
5163 const int32_t apiRules[] = {
5164 USCRIPT_GREEK
5165 };
5166
5167 const static OneTestCase privateUseCharacterStrings[] = {
5168 { {0x0391}, {0x0391}, UCOL_EQUAL },
5169 { {0x0041}, {0x0391}, UCOL_GREATER },
5170 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5171 { {0x0060}, {0x0391}, UCOL_LESS },
5172 { {0x0391}, {0xe2dc}, UCOL_LESS },
5173 { {0x0391}, {0x0060}, UCOL_GREATER },
5174 };
5175
5176 /* Test rules creation */
5177 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5178
5179 /* Test collation reordering API */
5180 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5181 }
5182
TestGreekLastReorder(void)5183 static void TestGreekLastReorder(void)
5184 {
5185 const char* strRules[] = {
5186 "[reorder Zzzz Grek]"
5187 };
5188
5189 const int32_t apiRules[] = {
5190 USCRIPT_UNKNOWN, USCRIPT_GREEK
5191 };
5192
5193 const static OneTestCase privateUseCharacterStrings[] = {
5194 { {0x0391}, {0x0391}, UCOL_EQUAL },
5195 { {0x0041}, {0x0391}, UCOL_LESS },
5196 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5197 { {0x0060}, {0x0391}, UCOL_LESS },
5198 { {0x0391}, {0xe2dc}, UCOL_GREATER },
5199 };
5200
5201 /* Test rules creation */
5202 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5203
5204 /* Test collation reordering API */
5205 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5206 }
5207
TestNonScriptReorder(void)5208 static void TestNonScriptReorder(void)
5209 {
5210 const char* strRules[] = {
5211 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5212 };
5213
5214 const int32_t apiRules[] = {
5215 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5216 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5217 UCOL_REORDER_CODE_CURRENCY
5218 };
5219
5220 const static OneTestCase privateUseCharacterStrings[] = {
5221 { {0x0391}, {0x0041}, UCOL_LESS },
5222 { {0x0041}, {0x0391}, UCOL_GREATER },
5223 { {0x0060}, {0x0041}, UCOL_LESS },
5224 { {0x0060}, {0x0391}, UCOL_GREATER },
5225 { {0x0024}, {0x0041}, UCOL_GREATER },
5226 };
5227
5228 /* Test rules creation */
5229 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5230
5231 /* Test collation reordering API */
5232 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5233 }
5234
TestHaniReorder(void)5235 static void TestHaniReorder(void)
5236 {
5237 const char* strRules[] = {
5238 "[reorder Hani]"
5239 };
5240 const int32_t apiRules[] = {
5241 USCRIPT_HAN
5242 };
5243
5244 const static OneTestCase privateUseCharacterStrings[] = {
5245 { {0x4e00}, {0x0041}, UCOL_LESS },
5246 { {0x4e00}, {0x0060}, UCOL_GREATER },
5247 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5248 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5249 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5250 { {0xfa27}, {0x0041}, UCOL_LESS },
5251 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5252 };
5253
5254 /* Test rules creation */
5255 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5256
5257 /* Test collation reordering API */
5258 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5259 }
5260
TestHaniReorderWithOtherRules(void)5261 static void TestHaniReorderWithOtherRules(void)
5262 {
5263 const char* strRules[] = {
5264 "[reorder Hani] &b<a"
5265 };
5266 /*const int32_t apiRules[] = {
5267 USCRIPT_HAN
5268 };*/
5269
5270 const static OneTestCase privateUseCharacterStrings[] = {
5271 { {0x4e00}, {0x0041}, UCOL_LESS },
5272 { {0x4e00}, {0x0060}, UCOL_GREATER },
5273 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5274 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5275 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5276 { {0xfa27}, {0x0041}, UCOL_LESS },
5277 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5278 { {0x0062}, {0x0061}, UCOL_LESS },
5279 };
5280
5281 /* Test rules creation */
5282 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5283 }
5284
TestMultipleReorder(void)5285 static void TestMultipleReorder(void)
5286 {
5287 const char* strRules[] = {
5288 "[reorder Grek Zzzz DIGIT Latn Hani]"
5289 };
5290
5291 const int32_t apiRules[] = {
5292 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5293 };
5294
5295 const static OneTestCase collationTestCases[] = {
5296 { {0x0391}, {0x0041}, UCOL_LESS},
5297 { {0x0031}, {0x0041}, UCOL_LESS},
5298 { {0x0041}, {0x4e00}, UCOL_LESS},
5299 };
5300
5301 /* Test rules creation */
5302 doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
5303
5304 /* Test collation reordering API */
5305 doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
5306 }
5307
5308 /*
5309 * Test that covers issue reported in ticket 8814
5310 */
TestReorderWithNumericCollation(void)5311 static void TestReorderWithNumericCollation(void)
5312 {
5313 UErrorCode status = U_ZERO_ERROR;
5314 UCollator *myCollation;
5315 UCollator *myReorderCollation;
5316 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5317 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5318 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5319 UChar fortyS[] = { 0x0053 };
5320 UChar fortyThreeP[] = { 0x0050 };
5321 uint8_t fortyS_sortKey[128];
5322 int32_t fortyS_sortKey_Length;
5323 uint8_t fortyThreeP_sortKey[128];
5324 int32_t fortyThreeP_sortKey_Length;
5325 uint8_t fortyS_sortKey_reorder[128];
5326 int32_t fortyS_sortKey_reorder_Length;
5327 uint8_t fortyThreeP_sortKey_reorder[128];
5328 int32_t fortyThreeP_sortKey_reorder_Length;
5329 UCollationResult collResult;
5330 UCollationResult collResultReorder;
5331
5332 log_verbose("Testing reordering with and without numeric collation\n");
5333
5334 /* build collator tertiary with numeric */
5335 myCollation = ucol_open("", &status);
5336 /*
5337 ucol_setStrength(myCollation, UCOL_TERTIARY);
5338 */
5339 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5340 if(U_FAILURE(status)) {
5341 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5342 return;
5343 }
5344
5345 /* build collator tertiary with numeric and reordering */
5346 myReorderCollation = ucol_open("", &status);
5347 /*
5348 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5349 */
5350 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5351 ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5352 if(U_FAILURE(status)) {
5353 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5354 return;
5355 }
5356
5357 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5358 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5359 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5360 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5361
5362 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5363 log_err_status(status, "ERROR: couldn't generate sort keys\n");
5364 return;
5365 }
5366 collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5367 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5368 /*
5369 fprintf(stderr, "\tcollResult = %x\n", collResult);
5370 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5371 fprintf(stderr, "\nfortyS\n");
5372 for (i = 0; i < fortyS_sortKey_Length; i++) {
5373 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5374 }
5375 fprintf(stderr, "\nfortyThreeP\n");
5376 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5377 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5378 }
5379 */
5380 if (collResult != collResultReorder) {
5381 log_err_status(status, "ERROR: collation results should have been the same.\n");
5382 return;
5383 }
5384
5385 ucol_close(myCollation);
5386 ucol_close(myReorderCollation);
5387 }
5388
compare_uint8_t_arrays(const uint8_t * a,const uint8_t * b)5389 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5390 {
5391 for (; *a == *b; ++a, ++b) {
5392 if (*a == 0) {
5393 return 0;
5394 }
5395 }
5396 return (*a < *b ? -1 : 1);
5397 }
5398
TestImportRulesDeWithPhonebook(void)5399 static void TestImportRulesDeWithPhonebook(void)
5400 {
5401 const char* normalRules[] = {
5402 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5403 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5404 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5405 };
5406 const OneTestCase normalTests[] = {
5407 { {0x00e6}, {0x00c6}, UCOL_LESS},
5408 { {0x00fc}, {0x00dc}, UCOL_GREATER},
5409 };
5410
5411 const char* importRules[] = {
5412 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5413 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5414 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5415 };
5416 const OneTestCase importTests[] = {
5417 { {0x00e6}, {0x00c6}, UCOL_LESS},
5418 { {0x00fc}, {0x00dc}, UCOL_LESS},
5419 };
5420
5421 doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5422 doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
5423 }
5424
5425 #if 0
5426 static void TestImportRulesFiWithEor(void)
5427 {
5428 /* DUCET. */
5429 const char* defaultRules[] = {
5430 "&a<b", /* Dummy rule. */
5431 };
5432
5433 const OneTestCase defaultTests[] = {
5434 { {0x0110}, {0x00F0}, UCOL_LESS},
5435 { {0x00a3}, {0x00a5}, UCOL_LESS},
5436 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5437 };
5438
5439 /* European Ordering rules: ignore currency characters. */
5440 const char* eorRules[] = {
5441 "[import root-u-co-eor]",
5442 };
5443
5444 const OneTestCase eorTests[] = {
5445 { {0x0110}, {0x00F0}, UCOL_LESS},
5446 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5447 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5448 };
5449
5450 const char* fiStdRules[] = {
5451 "[import fi-u-co-standard]",
5452 };
5453
5454 const OneTestCase fiStdTests[] = {
5455 { {0x0110}, {0x00F0}, UCOL_GREATER},
5456 { {0x00a3}, {0x00a5}, UCOL_LESS},
5457 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5458 };
5459
5460 /* Both European Ordering Rules and Fi Standard Rules. */
5461 const char* eorFiStdRules[] = {
5462 "[import root-u-co-eor][import fi-u-co-standard]",
5463 };
5464
5465 /* This is essentially same as the one before once fi.txt is updated with import. */
5466 const char* fiEorRules[] = {
5467 "[import fi-u-co-eor]",
5468 };
5469
5470 const OneTestCase fiEorTests[] = {
5471 { {0x0110}, {0x00F0}, UCOL_GREATER},
5472 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5473 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5474 };
5475
5476 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5477 doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5478 doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5479 doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
5480
5481 log_knownIssue("8962", NULL);
5482 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5483 eor{
5484 Sequence{
5485 "[import root-u-co-eor][import fi-u-co-standard]"
5486 }
5487 Version{"21.0"}
5488 }
5489 */
5490 /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5491
5492 }
5493 #endif
5494
5495 #if 0
5496 /*
5497 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5498 * the resource files are built with -includeUnihanColl option.
5499 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5500 */
5501 static void TestImportRulesCJKWithUnihan(void)
5502 {
5503 /* DUCET. */
5504 const char* defaultRules[] = {
5505 "&a<b", /* Dummy rule. */
5506 };
5507
5508 const OneTestCase defaultTests[] = {
5509 { {0x3402}, {0x4e1e}, UCOL_GREATER},
5510 };
5511
5512 /* European Ordering rules: ignore currency characters. */
5513 const char* unihanRules[] = {
5514 "[import ko-u-co-unihan]",
5515 };
5516
5517 const OneTestCase unihanTests[] = {
5518 { {0x3402}, {0x4e1e}, UCOL_LESS},
5519 };
5520
5521 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5522 doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
5523
5524 }
5525 #endif
5526
TestImport(void)5527 static void TestImport(void)
5528 {
5529 UCollator* vicoll;
5530 UCollator* escoll;
5531 UCollator* viescoll;
5532 UCollator* importviescoll;
5533 UParseError error;
5534 UErrorCode status = U_ZERO_ERROR;
5535 UChar* virules;
5536 int32_t viruleslength;
5537 UChar* esrules;
5538 int32_t esruleslength;
5539 UChar* viesrules;
5540 int32_t viesruleslength;
5541 char srules[500] = "[import vi][import es]";
5542 UChar rules[500];
5543 uint32_t length = 0;
5544 int32_t itemCount;
5545 int32_t i, k;
5546 UChar32 start;
5547 UChar32 end;
5548 UChar str[500];
5549 int32_t strLength;
5550
5551 uint8_t sk1[500];
5552 uint8_t sk2[500];
5553
5554 UBool b;
5555 USet* tailoredSet;
5556 USet* importTailoredSet;
5557
5558
5559 vicoll = ucol_open("vi", &status);
5560 if(U_FAILURE(status)){
5561 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5562 return;
5563 }
5564
5565 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5566 if(viruleslength == 0) {
5567 log_data_err("missing vi tailoring rule string\n");
5568 ucol_close(vicoll);
5569 return;
5570 }
5571 escoll = ucol_open("es", &status);
5572 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5573 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5574 viesrules[0] = 0;
5575 u_strcat(viesrules, virules);
5576 u_strcat(viesrules, esrules);
5577 viesruleslength = viruleslength + esruleslength;
5578 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5579
5580 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5581 length = u_unescape(srules, rules, 500);
5582 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5583 if(U_FAILURE(status)){
5584 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5585 return;
5586 }
5587
5588 tailoredSet = ucol_getTailoredSet(viescoll, &status);
5589 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5590
5591 if(!uset_equals(tailoredSet, importTailoredSet)){
5592 log_err("Tailored sets not equal");
5593 }
5594
5595 uset_close(importTailoredSet);
5596
5597 itemCount = uset_getItemCount(tailoredSet);
5598
5599 for( i = 0; i < itemCount; i++){
5600 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5601 if(strLength < 2){
5602 for (; start <= end; start++){
5603 k = 0;
5604 U16_APPEND(str, k, 500, start, b);
5605 (void)b; /* Suppress set but not used warning. */
5606 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5607 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5608 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5609 log_err("Sort key for %s not equal\n", str);
5610 break;
5611 }
5612 }
5613 }else{
5614 ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5615 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5616 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5617 log_err("ZZSort key for %s not equal\n", str);
5618 break;
5619 }
5620
5621 }
5622 }
5623
5624 uset_close(tailoredSet);
5625
5626 uprv_free(viesrules);
5627
5628 ucol_close(vicoll);
5629 ucol_close(escoll);
5630 ucol_close(viescoll);
5631 ucol_close(importviescoll);
5632 }
5633
TestImportWithType(void)5634 static void TestImportWithType(void)
5635 {
5636 UCollator* vicoll;
5637 UCollator* decoll;
5638 UCollator* videcoll;
5639 UCollator* importvidecoll;
5640 UParseError error;
5641 UErrorCode status = U_ZERO_ERROR;
5642 const UChar* virules;
5643 int32_t viruleslength;
5644 const UChar* derules;
5645 int32_t deruleslength;
5646 UChar* viderules;
5647 int32_t videruleslength;
5648 const char srules[500] = "[import vi][import de-u-co-phonebk]";
5649 UChar rules[500];
5650 uint32_t length = 0;
5651 int32_t itemCount;
5652 int32_t i, k;
5653 UChar32 start;
5654 UChar32 end;
5655 UChar str[500];
5656 int32_t strLength;
5657
5658 uint8_t sk1[500];
5659 uint8_t sk2[500];
5660
5661 USet* tailoredSet;
5662 USet* importTailoredSet;
5663
5664 vicoll = ucol_open("vi", &status);
5665 if(U_FAILURE(status)){
5666 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5667 return;
5668 }
5669 virules = ucol_getRules(vicoll, &viruleslength);
5670 if(viruleslength == 0) {
5671 log_data_err("missing vi tailoring rule string\n");
5672 ucol_close(vicoll);
5673 return;
5674 }
5675 /* decoll = ucol_open("de@collation=phonebook", &status); */
5676 decoll = ucol_open("de-u-co-phonebk", &status);
5677 if(U_FAILURE(status)){
5678 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5679 return;
5680 }
5681
5682
5683 derules = ucol_getRules(decoll, &deruleslength);
5684 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5685 viderules[0] = 0;
5686 u_strcat(viderules, virules);
5687 u_strcat(viderules, derules);
5688 videruleslength = viruleslength + deruleslength;
5689 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5690
5691 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5692 length = u_unescape(srules, rules, 500);
5693 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5694 if(U_FAILURE(status)){
5695 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5696 return;
5697 }
5698
5699 tailoredSet = ucol_getTailoredSet(videcoll, &status);
5700 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5701
5702 if(!uset_equals(tailoredSet, importTailoredSet)){
5703 log_err("Tailored sets not equal");
5704 }
5705
5706 uset_close(importTailoredSet);
5707
5708 itemCount = uset_getItemCount(tailoredSet);
5709
5710 for( i = 0; i < itemCount; i++){
5711 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5712 if(strLength < 2){
5713 for (; start <= end; start++){
5714 k = 0;
5715 U16_APPEND_UNSAFE(str, k, start);
5716 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5717 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5718 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5719 log_err("Sort key for %s not equal\n", str);
5720 break;
5721 }
5722 }
5723 }else{
5724 ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5725 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5726 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5727 log_err("Sort key for %s not equal\n", str);
5728 break;
5729 }
5730
5731 }
5732 }
5733
5734 uset_close(tailoredSet);
5735
5736 uprv_free(viderules);
5737
5738 ucol_close(videcoll);
5739 ucol_close(importvidecoll);
5740 ucol_close(vicoll);
5741 ucol_close(decoll);
5742 }
5743
5744 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5745 static const UChar longUpperStr1[]= { /* 155 chars */
5746 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5747 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5748 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5749 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5750 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5751 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5752 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5753 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5754 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5755 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5756 };
5757
5758 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5759 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5760 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5763 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5764 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5765 };
5766
5767 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5768 static const UChar longUpperStr3[]= { /* 324 chars */
5769 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5780 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5781 };
5782
5783 typedef struct {
5784 const UChar * longUpperStrPtr;
5785 int32_t longUpperStrLen;
5786 } LongUpperStrItem;
5787
5788 /* String pointers must be in reverse collation order of the corresponding strings */
5789 static const LongUpperStrItem longUpperStrItems[] = {
5790 { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5791 { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5792 { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
5793 { NULL, 0 }
5794 };
5795
5796 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5797
5798 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
TestCaseLevelBufferOverflow(void)5799 static void TestCaseLevelBufferOverflow(void)
5800 {
5801 UErrorCode status = U_ZERO_ERROR;
5802 UCollator * ucol = ucol_open("root", &status);
5803 if ( U_SUCCESS(status) ) {
5804 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5805 if ( U_SUCCESS(status) ) {
5806 const LongUpperStrItem * itemPtr;
5807 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5808 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5809 int32_t sortKeyLen;
5810 if (itemPtr > longUpperStrItems) {
5811 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5812 }
5813 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5814 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5815 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5816 break;
5817 }
5818 if ( itemPtr > longUpperStrItems ) {
5819 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5820 if (compareResult >= 0) {
5821 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5822 }
5823 }
5824 }
5825 } else {
5826 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5827 }
5828 ucol_close(ucol);
5829 } else {
5830 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5831 }
5832 }
5833
5834 /* Test for #10595 */
5835 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5836 #define KEY_PART_SIZE 16
5837
TestNextSortKeyPartJaIdentical(void)5838 static void TestNextSortKeyPartJaIdentical(void)
5839 {
5840 UErrorCode status = U_ZERO_ERROR;
5841 UCollator *coll;
5842 uint8_t keyPart[KEY_PART_SIZE];
5843 UCharIterator iter;
5844 uint32_t state[2] = {0, 0};
5845 int32_t keyPartLen;
5846
5847 coll = ucol_open("ja", &status);
5848 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5849 if (U_FAILURE(status)) {
5850 log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5851 return;
5852 }
5853
5854 uiter_setString(&iter, testJapaneseName, 5);
5855 keyPartLen = KEY_PART_SIZE;
5856 while (keyPartLen == KEY_PART_SIZE) {
5857 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5858 if (U_FAILURE(status)) {
5859 log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5860 break;
5861 }
5862 }
5863
5864 ucol_close(coll);
5865 }
5866
5867 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5868
addMiscCollTest(TestNode ** root)5869 void addMiscCollTest(TestNode** root)
5870 {
5871 TEST(TestRuleOptions);
5872 TEST(TestBeforePrefixFailure);
5873 TEST(TestContractionClosure);
5874 TEST(TestPrefixCompose);
5875 TEST(TestStrCollIdenticalPrefix);
5876 TEST(TestPrefix);
5877 TEST(TestNewJapanese);
5878 /*TEST(TestLimitations);*/
5879 TEST(TestNonChars);
5880 TEST(TestExtremeCompression);
5881 TEST(TestSurrogates);
5882 TEST(TestVariableTopSetting);
5883 TEST(TestMaxVariable);
5884 TEST(TestBocsuCoverage);
5885 TEST(TestCyrillicTailoring);
5886 TEST(TestCase);
5887 TEST(IncompleteCntTest);
5888 TEST(BlackBirdTest);
5889 TEST(FunkyATest);
5890 TEST(BillFairmanTest);
5891 TEST(TestChMove);
5892 TEST(TestImplicitTailoring);
5893 TEST(TestFCDProblem);
5894 TEST(TestEmptyRule);
5895 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5896 TEST(TestJ815);
5897 /*TEST(TestJ831);*/ /* we changed lv locale */
5898 TEST(TestBefore);
5899 TEST(TestHangulTailoring);
5900 TEST(TestUCARules);
5901 TEST(TestIncrementalNormalize);
5902 TEST(TestComposeDecompose);
5903 TEST(TestCompressOverlap);
5904 TEST(TestContraction);
5905 TEST(TestExpansion);
5906 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5907 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5908 TEST(TestOptimize);
5909 TEST(TestSuppressContractions);
5910 TEST(Alexis2);
5911 TEST(TestHebrewUCA);
5912 TEST(TestPartialSortKeyTermination);
5913 TEST(TestSettings);
5914 TEST(TestEquals);
5915 TEST(TestJ2726);
5916 TEST(NullRule);
5917 TEST(TestNumericCollation);
5918 TEST(TestTibetanConformance);
5919 TEST(TestPinyinProblem);
5920 TEST(TestSeparateTrees);
5921 TEST(TestBeforePinyin);
5922 TEST(TestBeforeTightening);
5923 /*TEST(TestMoreBefore);*/
5924 TEST(TestTailorNULL);
5925 TEST(TestUpperFirstQuaternary);
5926 TEST(TestJ4960);
5927 TEST(TestJ5223);
5928 TEST(TestJ5232);
5929 TEST(TestJ5367);
5930 TEST(TestHiragana);
5931 TEST(TestSortKeyConsistency);
5932 TEST(TestVI5913); /* VI, RO tailored rules */
5933 TEST(TestCroatianSortKey);
5934 TEST(TestTailor6179);
5935 TEST(TestUCAPrecontext);
5936 TEST(TestOutOfBuffer5468);
5937 TEST(TestSameStrengthList);
5938
5939 TEST(TestSameStrengthListQuoted);
5940 TEST(TestSameStrengthListSupplemental);
5941 TEST(TestSameStrengthListQwerty);
5942 TEST(TestSameStrengthListQuotedQwerty);
5943 TEST(TestSameStrengthListRanges);
5944 TEST(TestSameStrengthListSupplementalRanges);
5945 TEST(TestSpecialCharacters);
5946 TEST(TestPrivateUseCharacters);
5947 TEST(TestPrivateUseCharactersInList);
5948 TEST(TestPrivateUseCharactersInRange);
5949 TEST(TestInvalidListsAndRanges);
5950 TEST(TestImportRulesDeWithPhonebook);
5951 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5952 /* TEST(TestImportRulesCJKWithUnihan); */
5953 TEST(TestImport);
5954 TEST(TestImportWithType);
5955
5956 TEST(TestBeforeRuleWithScriptReordering);
5957 TEST(TestNonLeadBytesDuringCollationReordering);
5958 TEST(TestReorderingAPI);
5959 TEST(TestReorderingAPIWithRuleCreatedCollator);
5960 TEST(TestEquivalentReorderingScripts);
5961 TEST(TestGreekFirstReorder);
5962 TEST(TestGreekLastReorder);
5963 TEST(TestNonScriptReorder);
5964 TEST(TestHaniReorder);
5965 TEST(TestHaniReorderWithOtherRules);
5966 TEST(TestMultipleReorder);
5967 TEST(TestReorderingAcrossCloning);
5968 TEST(TestReorderWithNumericCollation);
5969
5970 TEST(TestCaseLevelBufferOverflow);
5971 TEST(TestNextSortKeyPartJaIdentical);
5972 }
5973
5974 #endif /* #if !UCONFIG_NO_COLLATION */
5975