1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
9 *
10 * File cmsccoll.C
11 *
12 *******************************************************************************/
13 /**
14 * These are the tests specific to ICU 1.8 and above, that I didn't know where
15 * to fit.
16 */
17
18 #include <stdio.h>
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_COLLATION
23
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
27 #include "cintltst.h"
28 #include "ccolltst.h"
29 #include "callcoll.h"
30 #include "unicode/ustring.h"
31 #include "string.h"
32 #include "ucol_imp.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
41 #include "uparse.h"
42 #include "putilimp.h"
43
44
45 #define MAX_TOKEN_LEN 16
46
47 typedef UCollationResult tst_strcoll(void *collator, const int object,
48 const UChar *source, const int sLen,
49 const UChar *target, const int tLen);
50
51
52
53 const static char cnt1[][10] = {
54
55 "AA",
56 "AC",
57 "AZ",
58 "AQ",
59 "AB",
60 "ABZ",
61 "ABQ",
62 "Z",
63 "ABC",
64 "Q",
65 "B"
66 };
67
68 const static char cnt2[][10] = {
69 "DA",
70 "DAD",
71 "DAZ",
72 "MAR",
73 "Z",
74 "DAVIS",
75 "MARK",
76 "DAV",
77 "DAVI"
78 };
79
IncompleteCntTest(void)80 static void IncompleteCntTest(void)
81 {
82 UErrorCode status = U_ZERO_ERROR;
83 UChar temp[90];
84 UChar t1[90];
85 UChar t2[90];
86
87 UCollator *coll = NULL;
88 uint32_t i = 0, j = 0;
89 uint32_t size = 0;
90
91 u_uastrcpy(temp, " & Z < ABC < Q < B");
92
93 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
94
95 if(U_SUCCESS(status)) {
96 size = UPRV_LENGTHOF(cnt1);
97 for(i = 0; i < size-1; i++) {
98 for(j = i+1; j < size; j++) {
99 UCollationElements *iter;
100 u_uastrcpy(t1, cnt1[i]);
101 u_uastrcpy(t2, cnt1[j]);
102 doTest(coll, t1, t2, UCOL_LESS);
103 /* synwee : added collation element iterator test */
104 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
105 if (U_FAILURE(status)) {
106 log_err("Creation of iterator failed\n");
107 break;
108 }
109 backAndForth(iter);
110 ucol_closeElements(iter);
111 }
112 }
113 }
114
115 ucol_close(coll);
116
117
118 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
119 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
120
121 if(U_SUCCESS(status)) {
122 size = UPRV_LENGTHOF(cnt2);
123 for(i = 0; i < size-1; i++) {
124 for(j = i+1; j < size; j++) {
125 UCollationElements *iter;
126 u_uastrcpy(t1, cnt2[i]);
127 u_uastrcpy(t2, cnt2[j]);
128 doTest(coll, t1, t2, UCOL_LESS);
129
130 /* synwee : added collation element iterator test */
131 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
132 if (U_FAILURE(status)) {
133 log_err("Creation of iterator failed\n");
134 break;
135 }
136 backAndForth(iter);
137 ucol_closeElements(iter);
138 }
139 }
140 }
141
142 ucol_close(coll);
143
144
145 }
146
147 const static char shifted[][20] = {
148 "black bird",
149 "black-bird",
150 "blackbird",
151 "black Bird",
152 "black-Bird",
153 "blackBird",
154 "black birds",
155 "black-birds",
156 "blackbirds"
157 };
158
159 const static UCollationResult shiftedTert[] = {
160 UCOL_EQUAL,
161 UCOL_EQUAL,
162 UCOL_EQUAL,
163 UCOL_LESS,
164 UCOL_EQUAL,
165 UCOL_EQUAL,
166 UCOL_LESS,
167 UCOL_EQUAL,
168 UCOL_EQUAL
169 };
170
171 const static char nonignorable[][20] = {
172 "black bird",
173 "black Bird",
174 "black birds",
175 "black-bird",
176 "black-Bird",
177 "black-birds",
178 "blackbird",
179 "blackBird",
180 "blackbirds"
181 };
182
BlackBirdTest(void)183 static void BlackBirdTest(void) {
184 UErrorCode status = U_ZERO_ERROR;
185 UChar t1[90];
186 UChar t2[90];
187
188 uint32_t i = 0, j = 0;
189 uint32_t size = 0;
190 UCollator *coll = ucol_open("en_US", &status);
191
192 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
193 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
194
195 if(U_SUCCESS(status)) {
196 size = UPRV_LENGTHOF(nonignorable);
197 for(i = 0; i < size-1; i++) {
198 for(j = i+1; j < size; j++) {
199 u_uastrcpy(t1, nonignorable[i]);
200 u_uastrcpy(t2, nonignorable[j]);
201 doTest(coll, t1, t2, UCOL_LESS);
202 }
203 }
204 }
205
206 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
207 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
208
209 if(U_SUCCESS(status)) {
210 size = UPRV_LENGTHOF(shifted);
211 for(i = 0; i < size-1; i++) {
212 for(j = i+1; j < size; j++) {
213 u_uastrcpy(t1, shifted[i]);
214 u_uastrcpy(t2, shifted[j]);
215 doTest(coll, t1, t2, UCOL_LESS);
216 }
217 }
218 }
219
220 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
221 if(U_SUCCESS(status)) {
222 size = UPRV_LENGTHOF(shifted);
223 for(i = 1; i < size; i++) {
224 u_uastrcpy(t1, shifted[i-1]);
225 u_uastrcpy(t2, shifted[i]);
226 doTest(coll, t1, t2, shiftedTert[i]);
227 }
228 }
229
230 ucol_close(coll);
231 }
232
233 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
234 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236 {0x0041/*'A'*/, 0x0300, 0x0000},
237 {0x00C0, 0x0301, 0x0000},
238 /* this would work with forced normalization */
239 {0x00C0, 0x0316, 0x0000}
240 };
241
242 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
243 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
245 {0x00C0, 0},
246 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247 /* this would work with forced normalization */
248 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249 };
250
251 const static UCollationResult results[] = {
252 UCOL_GREATER,
253 UCOL_EQUAL,
254 UCOL_EQUAL,
255 UCOL_GREATER,
256 UCOL_EQUAL
257 };
258
FunkyATest(void)259 static void FunkyATest(void)
260 {
261
262 int32_t i;
263 UErrorCode status = U_ZERO_ERROR;
264 UCollator *myCollation;
265 myCollation = ucol_open("en_US", &status);
266 if(U_FAILURE(status)){
267 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
268 return;
269 }
270 log_verbose("Testing some A letters, for some reason\n");
271 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
272 ucol_setStrength(myCollation, UCOL_TERTIARY);
273 for (i = 0; i < 4 ; i++)
274 {
275 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
276 }
277 ucol_close(myCollation);
278 }
279
280 UColAttributeValue caseFirst[] = {
281 UCOL_OFF,
282 UCOL_LOWER_FIRST,
283 UCOL_UPPER_FIRST
284 };
285
286
287 UColAttributeValue alternateHandling[] = {
288 UCOL_NON_IGNORABLE,
289 UCOL_SHIFTED
290 };
291
292 UColAttributeValue caseLevel[] = {
293 UCOL_OFF,
294 UCOL_ON
295 };
296
297 UColAttributeValue strengths[] = {
298 UCOL_PRIMARY,
299 UCOL_SECONDARY,
300 UCOL_TERTIARY,
301 UCOL_QUATERNARY,
302 UCOL_IDENTICAL
303 };
304
305 #if 0
306 static const char * strengthsC[] = {
307 "UCOL_PRIMARY",
308 "UCOL_SECONDARY",
309 "UCOL_TERTIARY",
310 "UCOL_QUATERNARY",
311 "UCOL_IDENTICAL"
312 };
313
314 static const char * caseFirstC[] = {
315 "UCOL_OFF",
316 "UCOL_LOWER_FIRST",
317 "UCOL_UPPER_FIRST"
318 };
319
320
321 static const char * alternateHandlingC[] = {
322 "UCOL_NON_IGNORABLE",
323 "UCOL_SHIFTED"
324 };
325
326 static const char * caseLevelC[] = {
327 "UCOL_OFF",
328 "UCOL_ON"
329 };
330
331 /* not used currently - does not test only prints */
332 static void PrintMarkDavis(void)
333 {
334 UErrorCode status = U_ZERO_ERROR;
335 UChar m[256];
336 uint8_t sortkey[256];
337 UCollator *coll = ucol_open("en_US", &status);
338 uint32_t h,i,j,k, sortkeysize;
339 uint32_t sizem = 0;
340 char buffer[512];
341 uint32_t len = 512;
342
343 log_verbose("PrintMarkDavis");
344
345 u_uastrcpy(m, "Mark Davis");
346 sizem = u_strlen(m);
347
348
349 m[1] = 0xe4;
350
351 for(i = 0; i<sizem; i++) {
352 fprintf(stderr, "\\u%04X ", m[i]);
353 }
354 fprintf(stderr, "\n");
355
356 for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
357 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
358 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
359
360 for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
361 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
362 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
363
364 for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
365 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
366 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
367
368 for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
369 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
370 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
371 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
372 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
373 }
374
375 }
376
377 }
378
379 }
380 }
381 #endif
382
BillFairmanTest(void)383 static void BillFairmanTest(void) {
384 /*
385 ** check for actual locale via ICU resource bundles
386 **
387 ** lp points to the original locale ("fr_FR_....")
388 */
389
390 UResourceBundle *lr,*cr;
391 UErrorCode lec = U_ZERO_ERROR;
392 const char *lp = "fr_FR_you_ll_never_find_this_locale";
393
394 log_verbose("BillFairmanTest\n");
395
396 lr = ures_open(NULL,lp,&lec);
397 if (lr) {
398 cr = ures_getByKey(lr,"collations",0,&lec);
399 if (cr) {
400 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
401 if (lp) {
402 if (U_SUCCESS(lec)) {
403 if(strcmp(lp, "fr") != 0) {
404 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
405 }
406 }
407 }
408 ures_close(cr);
409 }
410 ures_close(lr);
411 }
412 }
413
414 const static char chTest[][20] = {
415 "c",
416 "C",
417 "ca", "cb", "cx", "cy", "CZ",
418 "c\\u030C", "C\\u030C",
419 "h",
420 "H",
421 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422 "ch", "cH", "Ch", "CH",
423 "cha", "charly", "che", "chh", "chch", "chr",
424 "i", "I", "iarly",
425 "r", "R",
426 "r\\u030C", "R\\u030C",
427 "s",
428 "S",
429 "s\\u030C", "S\\u030C",
430 "z", "Z",
431 "z\\u030C", "Z\\u030C"
432 };
433
TestChMove(void)434 static void TestChMove(void) {
435 UChar t1[256] = {0};
436 UChar t2[256] = {0};
437
438 uint32_t i = 0, j = 0;
439 uint32_t size = 0;
440 UErrorCode status = U_ZERO_ERROR;
441
442 UCollator *coll = ucol_open("cs", &status);
443
444 if(U_SUCCESS(status)) {
445 size = UPRV_LENGTHOF(chTest);
446 for(i = 0; i < size-1; i++) {
447 for(j = i+1; j < size; j++) {
448 u_unescape(chTest[i], t1, 256);
449 u_unescape(chTest[j], t2, 256);
450 doTest(coll, t1, t2, UCOL_LESS);
451 }
452 }
453 }
454 else {
455 log_data_err("Can't open collator");
456 }
457 ucol_close(coll);
458 }
459
460
461
462
463 /*
464 const static char impTest[][20] = {
465 "\\u4e00",
466 "a",
467 "A",
468 "b",
469 "B",
470 "\\u4e01"
471 };
472 */
473
474
TestImplicitTailoring(void)475 static void TestImplicitTailoring(void) {
476 static const struct {
477 const char *rules;
478 const char *data[10];
479 const uint32_t len;
480 } tests[] = {
481 {
482 /* Tailor b and c before U+4E00. */
483 "&[before 1]\\u4e00 < b < c "
484 /* Now, before U+4E00 is c; put d and e after that. */
485 "&[before 1]\\u4e00 < d < e",
486 { "b", "c", "d", "e", "\\u4e00"}, 5 },
487 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
490 };
491
492 int32_t i = 0;
493
494 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
495 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
496 }
497
498 /*
499 UChar t1[256] = {0};
500 UChar t2[256] = {0};
501
502 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
503
504 uint32_t i = 0, j = 0;
505 uint32_t size = 0;
506 uint32_t ruleLen = 0;
507 UErrorCode status = U_ZERO_ERROR;
508 UCollator *coll = NULL;
509 ruleLen = u_unescape(rule, t1, 256);
510
511 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
512
513 if(U_SUCCESS(status)) {
514 size = UPRV_LENGTHOF(impTest);
515 for(i = 0; i < size-1; i++) {
516 for(j = i+1; j < size; j++) {
517 u_unescape(impTest[i], t1, 256);
518 u_unescape(impTest[j], t2, 256);
519 doTest(coll, t1, t2, UCOL_LESS);
520 }
521 }
522 }
523 else {
524 log_err("Can't open collator");
525 }
526 ucol_close(coll);
527 */
528 }
529
TestFCDProblem(void)530 static void TestFCDProblem(void) {
531 UChar t1[256] = {0};
532 UChar t2[256] = {0};
533
534 const char *s1 = "\\u0430\\u0306\\u0325";
535 const char *s2 = "\\u04D1\\u0325";
536
537 UErrorCode status = U_ZERO_ERROR;
538 UCollator *coll = ucol_open("", &status);
539 u_unescape(s1, t1, 256);
540 u_unescape(s2, t2, 256);
541
542 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
543 doTest(coll, t1, t2, UCOL_EQUAL);
544
545 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
546 doTest(coll, t1, t2, UCOL_EQUAL);
547
548 ucol_close(coll);
549 }
550
551 /*
552 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553 We're only using NFC/NFD in this test.
554 */
555 #define NORM_BUFFER_TEST_LEN 18
556 typedef struct {
557 UChar32 u;
558 UChar NFC[NORM_BUFFER_TEST_LEN];
559 UChar NFD[NORM_BUFFER_TEST_LEN];
560 } tester;
561
TestComposeDecompose(void)562 static void TestComposeDecompose(void) {
563 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564 static const UChar UNICODESET_STR[] = {
565 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568 };
569 int32_t noOfLoc;
570 int32_t i = 0, j = 0;
571
572 UErrorCode status = U_ZERO_ERROR;
573 const char *locName = NULL;
574 uint32_t nfcSize;
575 uint32_t nfdSize;
576 tester **t;
577 uint32_t noCases = 0;
578 UCollator *coll = NULL;
579 UChar32 u = 0;
580 UChar comp[NORM_BUFFER_TEST_LEN];
581 uint32_t len = 0;
582 UCollationElements *iter;
583 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
584 int32_t charsToTestSize;
585
586 noOfLoc = uloc_countAvailable();
587
588 coll = ucol_open("", &status);
589 if (U_FAILURE(status)) {
590 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
591 uset_close(charsToTest);
592 return;
593 }
594 charsToTestSize = uset_size(charsToTest);
595 if (charsToTestSize <= 0) {
596 log_err("Set was zero. Missing data?\n");
597 uset_close(charsToTest);
598 return;
599 }
600 t = (tester **)malloc(charsToTestSize * sizeof(tester *));
601 t[0] = (tester *)malloc(sizeof(tester));
602 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
603
604 for(u = 0; u < charsToTestSize; u++) {
605 UChar32 ch = uset_charAt(charsToTest, u);
606 len = 0;
607 U16_APPEND_UNSAFE(comp, len, ch);
608 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
609 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
610
611 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
612 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
613 t[noCases]->u = ch;
614 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
615 u_strncpy(t[noCases]->NFC, comp, len);
616 t[noCases]->NFC[len] = 0;
617 }
618 noCases++;
619 t[noCases] = (tester *)malloc(sizeof(tester));
620 uprv_memset(t[noCases], 0, sizeof(tester));
621 }
622 }
623 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
624 uset_close(charsToTest);
625 charsToTest = NULL;
626
627 for(u=0; u<(UChar32)noCases; u++) {
628 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
629 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
630 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
631 }
632 }
633 /*
634 for(u = 0; u < charsToTestSize; u++) {
635 if(!(u&0xFFFF)) {
636 log_verbose("%08X ", u);
637 }
638 uprv_memset(t[noCases], 0, sizeof(tester));
639 t[noCases]->u = u;
640 len = 0;
641 U16_APPEND_UNSAFE(comp, len, u);
642 comp[len] = 0;
643 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
644 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
645 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
646 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
647 }
648 */
649
650 ucol_close(coll);
651
652 log_verbose("Testing locales, number of cases = %i\n", noCases);
653 for(i = 0; i<noOfLoc; i++) {
654 status = U_ZERO_ERROR;
655 locName = uloc_getAvailable(i);
656 if(hasCollationElements(locName)) {
657 char cName[256];
658 UChar name[256];
659 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
660
661 for(j = 0; j<nameSize; j++) {
662 cName[j] = (char)name[j];
663 }
664 cName[nameSize] = 0;
665 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
666
667 coll = ucol_open(locName, &status);
668 ucol_setStrength(coll, UCOL_IDENTICAL);
669 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
670
671 for(u=0; u<(UChar32)noCases; u++) {
672 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
673 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
674 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
675 log_verbose("Testing NFC\n");
676 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
677 backAndForth(iter);
678 log_verbose("Testing NFD\n");
679 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
680 backAndForth(iter);
681 }
682 }
683 ucol_closeElements(iter);
684 ucol_close(coll);
685 }
686 }
687 for(u = 0; u <= (UChar32)noCases; u++) {
688 free(t[u]);
689 }
690 free(t);
691 }
692
TestEmptyRule(void)693 static void TestEmptyRule(void) {
694 UErrorCode status = U_ZERO_ERROR;
695 UChar rulez[] = { 0 };
696 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
697
698 ucol_close(coll);
699 }
700
TestUCARules(void)701 static void TestUCARules(void) {
702 UErrorCode status = U_ZERO_ERROR;
703 UChar b[256];
704 UChar *rules = b;
705 uint32_t ruleLen = 0;
706 UCollator *UCAfromRules = NULL;
707 UCollator *coll = ucol_open("", &status);
708 if(status == U_FILE_ACCESS_ERROR) {
709 log_data_err("Is your data around?\n");
710 return;
711 } else if(U_FAILURE(status)) {
712 log_err("Error opening collator\n");
713 return;
714 }
715 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
716
717 log_verbose("TestUCARules\n");
718 if(ruleLen > 256) {
719 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
720 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
721 }
722 log_verbose("Rules length is %d\n", ruleLen);
723 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
724 if(U_SUCCESS(status)) {
725 ucol_close(UCAfromRules);
726 } else {
727 log_verbose("Unable to create a collator from UCARules!\n");
728 }
729 /*
730 u_unescape(blah, b, 256);
731 ucol_getSortKey(coll, b, 1, res, 256);
732 */
733 ucol_close(coll);
734 if(rules != b) {
735 free(rules);
736 }
737 }
738
739
740 /* Pinyin tonal order */
741 /*
742 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
743 (w/macron)< (w/acute)< (w/caron)< (w/grave)
744 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
745 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
746 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
747 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
748 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
749 .. (\u00fc)
750
751 However, in testing we got the following order:
752 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
753 (w/acute)< (w/grave)< (w/caron)< (w/macron)
754 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
755 .. (\u0113)
756 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
757 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
758 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
759 .. (\u01d8)
760 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
761 */
762
TestBefore(void)763 static void TestBefore(void) {
764 const static char *data[] = {
765 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
766 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
767 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
768 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
769 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
770 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
771 };
772 genericRulesStarter(
773 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
774 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
775 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
776 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
777 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
778 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
779 data, UPRV_LENGTHOF(data));
780 }
781
782 #if 0
783 /* superceded by TestBeforePinyin */
784 static void TestJ784(void) {
785 const static char *data[] = {
786 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
787 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
788 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
789 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
790 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
791 "\\u00fc",
792 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
793 };
794 genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
795 }
796 #endif
797
TestUpperCaseFirst(void)798 static void TestUpperCaseFirst(void) {
799 const static char *data[] = {
800 "I",
801 "i",
802 "Y",
803 "y"
804 };
805 genericLocaleStarter("da", data, UPRV_LENGTHOF(data));
806 }
807
TestJ815(void)808 static void TestJ815(void) {
809 const static char *data[] = {
810 "aa",
811 "Aa",
812 "ab",
813 "Ab",
814 "ad",
815 "Ad",
816 "ae",
817 "Ae",
818 "\\u00e6",
819 "\\u00c6",
820 "af",
821 "Af",
822 "b",
823 "B"
824 };
825 genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
826 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
827 }
828
829
TestCase(void)830 static void TestCase(void)
831 {
832 const static UChar gRules[MAX_TOKEN_LEN] =
833 /*" & 0 < 1,\u2461<a,A"*/
834 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
835
836 const static UChar testCase[][MAX_TOKEN_LEN] =
837 {
838 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
839 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
840 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
841 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
842 };
843
844 const static UCollationResult caseTestResults[][9] =
845 {
846 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
847 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
848 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
849 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
850 };
851
852 const static UColAttributeValue caseTestAttributes[][2] =
853 {
854 { UCOL_LOWER_FIRST, UCOL_OFF},
855 { UCOL_UPPER_FIRST, UCOL_OFF},
856 { UCOL_LOWER_FIRST, UCOL_ON},
857 { UCOL_UPPER_FIRST, UCOL_ON}
858 };
859 int32_t i,j,k;
860 UErrorCode status = U_ZERO_ERROR;
861 UCollationElements *iter;
862 UCollator *myCollation;
863 myCollation = ucol_open("en_US", &status);
864
865 if(U_FAILURE(status)){
866 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
867 return;
868 }
869 log_verbose("Testing different case settings\n");
870 ucol_setStrength(myCollation, UCOL_TERTIARY);
871
872 for(k = 0; k<4; k++) {
873 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
874 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
875 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
876 for (i = 0; i < 3 ; i++) {
877 for(j = i+1; j<4; j++) {
878 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
879 }
880 }
881 }
882 ucol_close(myCollation);
883
884 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
885 if(U_FAILURE(status)){
886 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
887 return;
888 }
889 log_verbose("Testing different case settings with custom rules\n");
890 ucol_setStrength(myCollation, UCOL_TERTIARY);
891
892 for(k = 0; k<4; k++) {
893 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
894 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
895 for (i = 0; i < 3 ; i++) {
896 for(j = i+1; j<4; j++) {
897 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
898 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
899 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
900 backAndForth(iter);
901 ucol_closeElements(iter);
902 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
903 backAndForth(iter);
904 ucol_closeElements(iter);
905 }
906 }
907 }
908 ucol_close(myCollation);
909 {
910 const static char *lowerFirst[] = {
911 "h",
912 "H",
913 "ch",
914 "Ch",
915 "CH",
916 "cha",
917 "chA",
918 "Cha",
919 "ChA",
920 "CHa",
921 "CHA",
922 "i",
923 "I"
924 };
925
926 const static char *upperFirst[] = {
927 "H",
928 "h",
929 "CH",
930 "Ch",
931 "ch",
932 "CHA",
933 "CHa",
934 "ChA",
935 "Cha",
936 "chA",
937 "cha",
938 "I",
939 "i"
940 };
941 log_verbose("mixed case test\n");
942 log_verbose("lower first, case level off\n");
943 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
944 log_verbose("upper first, case level off\n");
945 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
946 log_verbose("lower first, case level on\n");
947 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
948 log_verbose("upper first, case level on\n");
949 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
950 }
951
952 }
953
TestIncrementalNormalize(void)954 static void TestIncrementalNormalize(void) {
955
956 /*UChar baseA =0x61;*/
957 UChar baseA =0x41;
958 /* UChar baseB = 0x42;*/
959 static const UChar ccMix[] = {0x316, 0x321, 0x300};
960 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
961 /*
962 0x316 is combining grave accent below, cc=220
963 0x321 is combining palatalized hook below, cc=202
964 0x300 is combining grave accent, cc=230
965 */
966
967 #define MAXSLEN 2000
968 /*int maxSLen = 64000;*/
969 int sLen;
970 int i;
971
972 UCollator *coll;
973 UErrorCode status = U_ZERO_ERROR;
974 UCollationResult result;
975
976 int32_t myQ = getTestOption(QUICK_OPTION);
977
978 if(getTestOption(QUICK_OPTION) < 0) {
979 setTestOption(QUICK_OPTION, 1);
980 }
981
982 {
983 /* Test 1. Run very long unnormalized strings, to force overflow of*/
984 /* most buffers along the way.*/
985 UChar strA[MAXSLEN+1];
986 UChar strB[MAXSLEN+1];
987
988 coll = ucol_open("en_US", &status);
989 if(status == U_FILE_ACCESS_ERROR) {
990 log_data_err("Is your data around?\n");
991 return;
992 } else if(U_FAILURE(status)) {
993 log_err("Error opening collator\n");
994 return;
995 }
996 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
997
998 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
999 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1000 /*for (sLen = 1000; sLen<1001; sLen++) {*/
1001 for (sLen = 500; sLen<501; sLen++) {
1002 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1003 strA[0] = baseA;
1004 strB[0] = baseA;
1005 for (i=1; i<=sLen-1; i++) {
1006 strA[i] = ccMix[i % 3];
1007 strB[sLen-i] = ccMix[i % 3];
1008 }
1009 strA[sLen] = 0;
1010 strB[sLen] = 0;
1011
1012 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
1013 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
1014 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
1015 doTest(coll, strA, strB, UCOL_EQUAL);
1016 }
1017 }
1018
1019 setTestOption(QUICK_OPTION, myQ);
1020
1021
1022 /* Test 2: Non-normal sequence in a string that extends to the last character*/
1023 /* of the string. Checks a couple of edge cases.*/
1024
1025 {
1026 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1027 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1028 ucol_setStrength(coll, UCOL_TERTIARY);
1029 doTest(coll, strA, strB, UCOL_EQUAL);
1030 }
1031
1032 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
1033
1034 {
1035 /* New UCA 3.1.1.
1036 * test below used a code point from Desseret, which sorts differently
1037 * than d800 dc00
1038 */
1039 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1040 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1041 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1042 ucol_setStrength(coll, UCOL_TERTIARY);
1043 doTest(coll, strA, strB, UCOL_GREATER);
1044 }
1045
1046 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
1047
1048 {
1049 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1050 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1051 char sortKeyA[50];
1052 char sortKeyAz[50];
1053 char sortKeyB[50];
1054 char sortKeyBz[50];
1055 int r;
1056
1057 /* there used to be -3 here. Hmmmm.... */
1058 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1059 result = ucol_strcoll(coll, strA, 3, strB, 3);
1060 if (result != UCOL_GREATER) {
1061 log_err("ERROR 1 in test 4\n");
1062 }
1063 result = ucol_strcoll(coll, strA, -1, strB, -1);
1064 if (result != UCOL_EQUAL) {
1065 log_err("ERROR 2 in test 4\n");
1066 }
1067
1068 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1069 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1070 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1071 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1072
1073 r = strcmp(sortKeyA, sortKeyAz);
1074 if (r <= 0) {
1075 log_err("Error 3 in test 4\n");
1076 }
1077 r = strcmp(sortKeyA, sortKeyB);
1078 if (r <= 0) {
1079 log_err("Error 4 in test 4\n");
1080 }
1081 r = strcmp(sortKeyAz, sortKeyBz);
1082 if (r != 0) {
1083 log_err("Error 5 in test 4\n");
1084 }
1085
1086 ucol_setStrength(coll, UCOL_IDENTICAL);
1087 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1088 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1089 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1090 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1091
1092 r = strcmp(sortKeyA, sortKeyAz);
1093 if (r <= 0) {
1094 log_err("Error 6 in test 4\n");
1095 }
1096 r = strcmp(sortKeyA, sortKeyB);
1097 if (r <= 0) {
1098 log_err("Error 7 in test 4\n");
1099 }
1100 r = strcmp(sortKeyAz, sortKeyBz);
1101 if (r != 0) {
1102 log_err("Error 8 in test 4\n");
1103 }
1104 ucol_setStrength(coll, UCOL_TERTIARY);
1105 }
1106
1107
1108 /* Test 5: Null characters in non-normal source strings.*/
1109
1110 {
1111 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1112 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1113 char sortKeyA[50];
1114 char sortKeyAz[50];
1115 char sortKeyB[50];
1116 char sortKeyBz[50];
1117 int r;
1118
1119 result = ucol_strcoll(coll, strA, 6, strB, 6);
1120 if (result != UCOL_GREATER) {
1121 log_err("ERROR 1 in test 5\n");
1122 }
1123 result = ucol_strcoll(coll, strA, -1, strB, -1);
1124 if (result != UCOL_EQUAL) {
1125 log_err("ERROR 2 in test 5\n");
1126 }
1127
1128 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1129 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1130 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1131 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1132
1133 r = strcmp(sortKeyA, sortKeyAz);
1134 if (r <= 0) {
1135 log_err("Error 3 in test 5\n");
1136 }
1137 r = strcmp(sortKeyA, sortKeyB);
1138 if (r <= 0) {
1139 log_err("Error 4 in test 5\n");
1140 }
1141 r = strcmp(sortKeyAz, sortKeyBz);
1142 if (r != 0) {
1143 log_err("Error 5 in test 5\n");
1144 }
1145
1146 ucol_setStrength(coll, UCOL_IDENTICAL);
1147 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1148 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1149 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1150 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1151
1152 r = strcmp(sortKeyA, sortKeyAz);
1153 if (r <= 0) {
1154 log_err("Error 6 in test 5\n");
1155 }
1156 r = strcmp(sortKeyA, sortKeyB);
1157 if (r <= 0) {
1158 log_err("Error 7 in test 5\n");
1159 }
1160 r = strcmp(sortKeyAz, sortKeyBz);
1161 if (r != 0) {
1162 log_err("Error 8 in test 5\n");
1163 }
1164 ucol_setStrength(coll, UCOL_TERTIARY);
1165 }
1166
1167
1168 /* Test 6: Null character as base of a non-normal combining sequence.*/
1169
1170 {
1171 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1172 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1173
1174 result = ucol_strcoll(coll, strA, 5, strB, 5);
1175 if (result != UCOL_LESS) {
1176 log_err("Error 1 in test 6\n");
1177 }
1178 result = ucol_strcoll(coll, strA, -1, strB, -1);
1179 if (result != UCOL_EQUAL) {
1180 log_err("Error 2 in test 6\n");
1181 }
1182 }
1183
1184 ucol_close(coll);
1185 }
1186
1187
1188
1189 #if 0
1190 static void TestGetCaseBit(void) {
1191 static const char *caseBitData[] = {
1192 "a", "A", "ch", "Ch", "CH",
1193 "\\uFF9E", "\\u0009"
1194 };
1195
1196 static const uint8_t results[] = {
1197 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1198 UCOL_UPPER_CASE, UCOL_LOWER_CASE
1199 };
1200
1201 uint32_t i, blen = 0;
1202 UChar b[256] = {0};
1203 UErrorCode status = U_ZERO_ERROR;
1204 UCollator *UCA = ucol_open("", &status);
1205 uint8_t res = 0;
1206
1207 for(i = 0; i<UPRV_LENGTHOF(results); i++) {
1208 blen = u_unescape(caseBitData[i], b, 256);
1209 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1210 if(results[i] != res) {
1211 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1212 }
1213 }
1214 }
1215 #endif
1216
TestHangulTailoring(void)1217 static void TestHangulTailoring(void) {
1218 static const char *koreanData[] = {
1219 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1220 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1221 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1222 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1223 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1224 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1225 };
1226
1227 const char *rules =
1228 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1229 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1230 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1231 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1232 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1233 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1234
1235
1236 UErrorCode status = U_ZERO_ERROR;
1237 UChar rlz[2048] = { 0 };
1238 uint32_t rlen = u_unescape(rules, rlz, 2048);
1239
1240 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1241 if(status == U_FILE_ACCESS_ERROR) {
1242 log_data_err("Is your data around?\n");
1243 return;
1244 } else if(U_FAILURE(status)) {
1245 log_err("Error opening collator\n");
1246 return;
1247 }
1248
1249 log_verbose("Using start of korean rules\n");
1250
1251 if(U_SUCCESS(status)) {
1252 genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
1253 } else {
1254 log_err("Unable to open collator with rules %s\n", rules);
1255 }
1256
1257 ucol_close(coll);
1258
1259 log_verbose("Using ko__LOTUS locale\n");
1260 genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
1261 }
1262
1263 /*
1264 * The secondary/tertiary compression middle byte
1265 * as used by the current implementation.
1266 * Subject to change as the sort key compression changes.
1267 * See class CollationKeys.
1268 */
1269 enum {
1270 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */
1271 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */
1272 };
1273
TestCompressOverlap(void)1274 static void TestCompressOverlap(void) {
1275 UChar secstr[150];
1276 UChar tertstr[150];
1277 UErrorCode status = U_ZERO_ERROR;
1278 UCollator *coll;
1279 uint8_t result[500];
1280 uint32_t resultlen;
1281 int count = 0;
1282 uint8_t *tempptr;
1283
1284 coll = ucol_open("", &status);
1285
1286 if (U_FAILURE(status)) {
1287 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1288 return;
1289 }
1290 while (count < 149) {
1291 secstr[count] = 0x0020; /* [06, 05, 05] */
1292 tertstr[count] = 0x0020;
1293 count ++;
1294 }
1295
1296 /* top down compression ----------------------------------- */
1297 secstr[count] = 0x0332; /* [, 87, 05] */
1298 tertstr[count] = 0x3000; /* [06, 05, 07] */
1299
1300 /* no compression secstr should have 150 secondary bytes, tertstr should
1301 have 150 tertiary bytes.
1302 with correct compression, secstr should have 6 secondary
1303 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1304 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1305 (void)resultlen; /* Suppress set but not used warning. */
1306 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1307 while (*(tempptr + 1) != 1) {
1308 /* the last secondary collation element is not checked since it is not
1309 part of the compression */
1310 if (*tempptr < SEC_COMMON_MIDDLE) {
1311 log_err("Secondary top down compression overlapped\n");
1312 }
1313 tempptr ++;
1314 }
1315
1316 /* tertiary top/bottom/common for en_US is similar to the secondary
1317 top/bottom/common */
1318 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1319 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1320 while (*(tempptr + 1) != 0) {
1321 /* the last secondary collation element is not checked since it is not
1322 part of the compression */
1323 if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1324 log_err("Tertiary top down compression overlapped\n");
1325 }
1326 tempptr ++;
1327 }
1328
1329 /* bottom up compression ------------------------------------- */
1330 secstr[count] = 0;
1331 tertstr[count] = 0;
1332 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1333 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1334 while (*(tempptr + 1) != 1) {
1335 /* the last secondary collation element is not checked since it is not
1336 part of the compression */
1337 if (*tempptr > SEC_COMMON_MIDDLE) {
1338 log_err("Secondary bottom up compression overlapped\n");
1339 }
1340 tempptr ++;
1341 }
1342
1343 /* tertiary top/bottom/common for en_US is similar to the secondary
1344 top/bottom/common */
1345 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1346 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1347 while (*(tempptr + 1) != 0) {
1348 /* the last secondary collation element is not checked since it is not
1349 part of the compression */
1350 if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1351 log_err("Tertiary bottom up compression overlapped\n");
1352 }
1353 tempptr ++;
1354 }
1355
1356 ucol_close(coll);
1357 }
1358
TestCyrillicTailoring(void)1359 static void TestCyrillicTailoring(void) {
1360 static const char *test[] = {
1361 "\\u0410b",
1362 "\\u0410\\u0306a",
1363 "\\u04d0A"
1364 };
1365
1366 /* Russian overrides contractions, so this test is not valid anymore */
1367 /*genericLocaleStarter("ru", test, 3);*/
1368
1369 // Most of the following are commented out because UCA 8.0
1370 // drops most of the Cyrillic contractions from the default order.
1371 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1372
1373 // genericLocaleStarter("root", test, 3);
1374 // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1375 // genericRulesStarter("&Z < \\u0410", test, 3);
1376 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1377 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1378 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1379 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1380 }
1381
TestSuppressContractions(void)1382 static void TestSuppressContractions(void) {
1383
1384 static const char *testNoCont2[] = {
1385 "\\u0410\\u0302a",
1386 "\\u0410\\u0306b",
1387 "\\u0410c"
1388 };
1389 static const char *testNoCont[] = {
1390 "a\\u0410",
1391 "A\\u0410\\u0306",
1392 "\\uFF21\\u0410\\u0302"
1393 };
1394
1395 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1396 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1397 }
1398
TestContraction(void)1399 static void TestContraction(void) {
1400 const static char *testrules[] = {
1401 "&A = AB / B",
1402 "&A = A\\u0306/\\u0306",
1403 "&c = ch / h"
1404 };
1405 const static UChar testdata[][2] = {
1406 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1407 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1408 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1409 };
1410 const static UChar testdata2[][2] = {
1411 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1412 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1413 {0x0063 /* 'c' */, 0x006C /* 'l' */}
1414 };
1415 #if 0
1416 /*
1417 * These pairs of rule strings are not guaranteed to yield the very same mappings.
1418 * In fact, LDML 24 recommends an improved way of creating mappings
1419 * which always yields different mappings for such pairs. See
1420 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1421 */
1422 const static char *testrules3[] = {
1423 "&z < xyz &xyzw << B",
1424 "&z < xyz &xyz << B / w",
1425 "&z < ch &achm << B",
1426 "&z < ch &a << B / chm",
1427 "&\\ud800\\udc00w << B",
1428 "&\\ud800\\udc00 << B / w",
1429 "&a\\ud800\\udc00m << B",
1430 "&a << B / \\ud800\\udc00m",
1431 };
1432 #endif
1433
1434 UErrorCode status = U_ZERO_ERROR;
1435 UCollator *coll;
1436 UChar rule[256] = {0};
1437 uint32_t rlen = 0;
1438 int i;
1439
1440 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1441 UCollationElements *iter1;
1442 int j = 0;
1443 log_verbose("Rule %s for testing\n", testrules[i]);
1444 rlen = u_unescape(testrules[i], rule, 32);
1445 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1446 if (U_FAILURE(status)) {
1447 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1448 return;
1449 }
1450 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1451 if (U_FAILURE(status)) {
1452 log_err("Collation iterator creation failed\n");
1453 return;
1454 }
1455 while (j < 2) {
1456 UCollationElements *iter2 = ucol_openElements(coll,
1457 &(testdata[i][j]),
1458 1, &status);
1459 int32_t ce;
1460 if (U_FAILURE(status)) {
1461 log_err("Collation iterator creation failed\n");
1462 return;
1463 }
1464 ce = ucol_next(iter2, &status);
1465 while (ce != UCOL_NULLORDER) {
1466 if (ucol_next(iter1, &status) != ce) {
1467 log_err("Collation elements in contraction split does not match\n");
1468 return;
1469 }
1470 ce = ucol_next(iter2, &status);
1471 }
1472 j ++;
1473 ucol_closeElements(iter2);
1474 }
1475 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1476 log_err("Collation elements not exhausted\n");
1477 return;
1478 }
1479 ucol_closeElements(iter1);
1480 ucol_close(coll);
1481 }
1482
1483 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1484 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1485 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1486 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1487 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1488 testdata2[1][1]);
1489 return;
1490 }
1491 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1492 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1493 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1494 testdata2[2][1]);
1495 return;
1496 }
1497 ucol_close(coll);
1498 #if 0 /* see above */
1499 for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
1500 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1501 UCollator *coll1,
1502 *coll2;
1503 UCollationElements *iter1,
1504 *iter2;
1505 UChar ch = 0x0042 /* 'B' */;
1506 uint32_t ce;
1507 rlen = u_unescape(testrules3[i], rule, 32);
1508 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1509 rlen = u_unescape(testrules3[i + 1], rule, 32);
1510 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1511 if (U_FAILURE(status)) {
1512 log_err("Collator creation failed %s\n", testrules[i]);
1513 return;
1514 }
1515 iter1 = ucol_openElements(coll1, &ch, 1, &status);
1516 iter2 = ucol_openElements(coll2, &ch, 1, &status);
1517 if (U_FAILURE(status)) {
1518 log_err("Collation iterator creation failed\n");
1519 return;
1520 }
1521 ce = ucol_next(iter1, &status);
1522 if (U_FAILURE(status)) {
1523 log_err("Retrieving ces failed\n");
1524 return;
1525 }
1526 while (ce != UCOL_NULLORDER) {
1527 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1528 if (ce == ce2) {
1529 log_verbose("CEs match: %08x\n", ce);
1530 } else {
1531 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1532 return;
1533 }
1534 ce = ucol_next(iter1, &status);
1535 if (U_FAILURE(status)) {
1536 log_err("Retrieving ces failed\n");
1537 return;
1538 }
1539 }
1540 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1541 log_err("CEs not exhausted\n");
1542 return;
1543 }
1544 ucol_closeElements(iter1);
1545 ucol_closeElements(iter2);
1546 ucol_close(coll1);
1547 ucol_close(coll2);
1548 }
1549 #endif
1550 }
1551
TestExpansion(void)1552 static void TestExpansion(void) {
1553 const static char *testrules[] = {
1554 #if 0
1555 /*
1556 * This seems to have tested that M was not mapped to an expansion.
1557 * I believe the old builder just did that because it computed the extension CEs
1558 * at the very end, which was a bug.
1559 * Among other problems, it violated the core tailoring principle
1560 * by making an earlier rule depend on a later one.
1561 * And, of course, if M did not get an expansion, then it was primary different from K,
1562 * unlike what the rule &K<<M says.
1563 */
1564 "&J << K / B & K << M",
1565 #endif
1566 "&J << K / B << M"
1567 };
1568 const static UChar testdata[][3] = {
1569 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1570 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1571 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1572 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1573 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1574 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1575 };
1576
1577 UErrorCode status = U_ZERO_ERROR;
1578 UCollator *coll;
1579 UChar rule[256] = {0};
1580 uint32_t rlen = 0;
1581 int i;
1582
1583 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1584 int j = 0;
1585 log_verbose("Rule %s for testing\n", testrules[i]);
1586 rlen = u_unescape(testrules[i], rule, 32);
1587 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1588 if (U_FAILURE(status)) {
1589 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1590 return;
1591 }
1592
1593 for (j = 0; j < 5; j ++) {
1594 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1595 }
1596 ucol_close(coll);
1597 }
1598 }
1599
1600 #if 0
1601 /* this test tests the current limitations of the engine */
1602 /* it always fail, so it is disabled by default */
1603 static void TestLimitations(void) {
1604 /* recursive expansions */
1605 {
1606 static const char *rule = "&a=b/c&d=c/e";
1607 static const char *tlimit01[] = {"add","b","adf"};
1608 static const char *tlimit02[] = {"aa","b","af"};
1609 log_verbose("recursive expansions\n");
1610 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1611 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1612 }
1613 /* contractions spanning expansions */
1614 {
1615 static const char *rule = "&a<<<c/e&g<<<eh";
1616 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1617 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1618 log_verbose("contractions spanning expansions\n");
1619 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1620 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1621 }
1622 /* normalization: nulls in contractions */
1623 {
1624 static const char *rule = "&a<<<\\u0000\\u0302";
1625 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1626 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1627 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1628 static const UColAttributeValue valOn[] = { UCOL_ON };
1629 static const UColAttributeValue valOff[] = { UCOL_OFF };
1630
1631 log_verbose("NULL in contractions\n");
1632 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1633 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1634 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1635 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1636
1637 }
1638 /* normalization: contractions spanning normalization */
1639 {
1640 static const char *rule = "&a<<<\\u0000\\u0302";
1641 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1642 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1643 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1644 static const UColAttributeValue valOn[] = { UCOL_ON };
1645 static const UColAttributeValue valOff[] = { UCOL_OFF };
1646
1647 log_verbose("contractions spanning normalization\n");
1648 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1649 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1650 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1651 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1652
1653 }
1654 /* variable top: */
1655 {
1656 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1657 static const char *rule = "&\\u2010<x<[variable top]=z";
1658 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1659 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1660 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1661 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1662 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1663 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1664 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1665
1666 log_verbose("variable top\n");
1667 genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1668 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1669 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1670 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1671 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
1672
1673 }
1674 /* case level */
1675 {
1676 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1677 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1678 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1679 static const UColAttribute att[] = { UCOL_CASE_FIRST};
1680 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1681 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1682 log_verbose("case level\n");
1683 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1684 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1685 /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1686 /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1687 }
1688
1689 }
1690 #endif
1691
TestBocsuCoverage(void)1692 static void TestBocsuCoverage(void) {
1693 UErrorCode status = U_ZERO_ERROR;
1694 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1695 UChar test[256] = {0};
1696 uint32_t tlen = u_unescape(testString, test, 32);
1697 uint8_t key[256] = {0};
1698 uint32_t klen = 0;
1699
1700 UCollator *coll = ucol_open("", &status);
1701 if(U_SUCCESS(status)) {
1702 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1703
1704 klen = ucol_getSortKey(coll, test, tlen, key, 256);
1705 (void)klen; /* Suppress set but not used warning. */
1706
1707 ucol_close(coll);
1708 } else {
1709 log_data_err("Couldn't open UCA\n");
1710 }
1711 }
1712
TestVariableTopSetting(void)1713 static void TestVariableTopSetting(void) {
1714 UErrorCode status = U_ZERO_ERROR;
1715 uint32_t varTopOriginal = 0, varTop1, varTop2;
1716 UCollator *coll = ucol_open("", &status);
1717 if(U_SUCCESS(status)) {
1718
1719 static const UChar nul = 0;
1720 static const UChar space = 0x20;
1721 static const UChar dot = 0x2e; /* punctuation */
1722 static const UChar degree = 0xb0; /* symbol */
1723 static const UChar dollar = 0x24; /* currency symbol */
1724 static const UChar zero = 0x30; /* digit */
1725
1726 varTopOriginal = ucol_getVariableTop(coll, &status);
1727 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1728 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1729
1730 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1731 varTop2 = ucol_getVariableTop(coll, &status);
1732 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1733 if(U_FAILURE(status) || varTop1 != varTop2 ||
1734 !ucol_equal(coll, &nul, 0, &space, 1) ||
1735 ucol_equal(coll, &nul, 0, &dot, 1) ||
1736 ucol_equal(coll, &nul, 0, °ree, 1) ||
1737 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1738 ucol_equal(coll, &nul, 0, &zero, 1) ||
1739 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1740 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1741 }
1742
1743 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1744 varTop2 = ucol_getVariableTop(coll, &status);
1745 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1746 if(U_FAILURE(status) || varTop1 != varTop2 ||
1747 !ucol_equal(coll, &nul, 0, &space, 1) ||
1748 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1749 ucol_equal(coll, &nul, 0, °ree, 1) ||
1750 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1751 ucol_equal(coll, &nul, 0, &zero, 1) ||
1752 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) {
1753 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1754 }
1755
1756 varTop1 = ucol_setVariableTop(coll, °ree, 1, &status);
1757 varTop2 = ucol_getVariableTop(coll, &status);
1758 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1759 if(U_FAILURE(status) || varTop1 != varTop2 ||
1760 !ucol_equal(coll, &nul, 0, &space, 1) ||
1761 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1762 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1763 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1764 ucol_equal(coll, &nul, 0, &zero, 1) ||
1765 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) {
1766 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1767 }
1768
1769 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1770 varTop2 = ucol_getVariableTop(coll, &status);
1771 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1772 if(U_FAILURE(status) || varTop1 != varTop2 ||
1773 !ucol_equal(coll, &nul, 0, &space, 1) ||
1774 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1775 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1776 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1777 ucol_equal(coll, &nul, 0, &zero, 1) ||
1778 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1779 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1780 }
1781
1782 log_verbose("Testing setting variable top to contractions\n");
1783 {
1784 UChar first[4] = { 0 };
1785 first[0] = 0x0040;
1786 first[1] = 0x0050;
1787 first[2] = 0x0000;
1788
1789 status = U_ZERO_ERROR;
1790 ucol_setVariableTop(coll, first, -1, &status);
1791
1792 if(U_SUCCESS(status)) {
1793 log_err("Invalid contraction succeded in setting variable top!\n");
1794 }
1795
1796 }
1797
1798 log_verbose("Test restoring variable top\n");
1799
1800 status = U_ZERO_ERROR;
1801 ucol_restoreVariableTop(coll, varTopOriginal, &status);
1802 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1803 log_err("Couldn't restore old variable top\n");
1804 }
1805
1806 log_verbose("Testing calling with error set\n");
1807
1808 status = U_INTERNAL_PROGRAM_ERROR;
1809 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1810 varTop2 = ucol_getVariableTop(coll, &status);
1811 ucol_restoreVariableTop(coll, varTop2, &status);
1812 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1813 varTop2 = ucol_getVariableTop(NULL, &status);
1814 ucol_restoreVariableTop(NULL, varTop2, &status);
1815 if(status != U_INTERNAL_PROGRAM_ERROR) {
1816 log_err("Bad reaction to passed error!\n");
1817 }
1818 ucol_close(coll);
1819 } else {
1820 log_data_err("Couldn't open UCA collator\n");
1821 }
1822 }
1823
TestMaxVariable(void)1824 static void TestMaxVariable(void) {
1825 UErrorCode status = U_ZERO_ERROR;
1826 UColReorderCode oldMax, max;
1827 UCollator *coll;
1828
1829 static const UChar nul = 0;
1830 static const UChar space = 0x20;
1831 static const UChar dot = 0x2e; /* punctuation */
1832 static const UChar degree = 0xb0; /* symbol */
1833 static const UChar dollar = 0x24; /* currency symbol */
1834 static const UChar zero = 0x30; /* digit */
1835
1836 coll = ucol_open("", &status);
1837 if(U_FAILURE(status)) {
1838 log_data_err("Couldn't open root collator\n");
1839 return;
1840 }
1841
1842 oldMax = ucol_getMaxVariable(coll);
1843 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1844 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1845
1846 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1847 max = ucol_getMaxVariable(coll);
1848 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1849 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1850 !ucol_equal(coll, &nul, 0, &space, 1) ||
1851 ucol_equal(coll, &nul, 0, &dot, 1) ||
1852 ucol_equal(coll, &nul, 0, °ree, 1) ||
1853 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1854 ucol_equal(coll, &nul, 0, &zero, 1) ||
1855 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1856 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1857 }
1858
1859 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1860 max = ucol_getMaxVariable(coll);
1861 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1862 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1863 !ucol_equal(coll, &nul, 0, &space, 1) ||
1864 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1865 ucol_equal(coll, &nul, 0, °ree, 1) ||
1866 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1867 ucol_equal(coll, &nul, 0, &zero, 1) ||
1868 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) {
1869 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1870 }
1871
1872 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1873 max = ucol_getMaxVariable(coll);
1874 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1875 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1876 !ucol_equal(coll, &nul, 0, &space, 1) ||
1877 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1878 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1879 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1880 ucol_equal(coll, &nul, 0, &zero, 1) ||
1881 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) {
1882 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1883 }
1884
1885 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1886 max = ucol_getMaxVariable(coll);
1887 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1888 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1889 !ucol_equal(coll, &nul, 0, &space, 1) ||
1890 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1891 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1892 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1893 ucol_equal(coll, &nul, 0, &zero, 1) ||
1894 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1895 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1896 }
1897
1898 log_verbose("Test restoring maxVariable\n");
1899 status = U_ZERO_ERROR;
1900 ucol_setMaxVariable(coll, oldMax, &status);
1901 if(oldMax != ucol_getMaxVariable(coll)) {
1902 log_err("Couldn't restore old maxVariable\n");
1903 }
1904
1905 log_verbose("Testing calling with error set\n");
1906 status = U_INTERNAL_PROGRAM_ERROR;
1907 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1908 max = ucol_getMaxVariable(coll);
1909 if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1910 log_err("Bad reaction to passed error!\n");
1911 }
1912 ucol_close(coll);
1913 }
1914
TestNonChars(void)1915 static void TestNonChars(void) {
1916 static const char *test[] = {
1917 "\\u0000", /* ignorable */
1918 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
1919 "\\uFDD0", "\\uFDEF",
1920 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
1921 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
1922 "\\U0003FFFE", "\\U0003FFFF",
1923 "\\U0004FFFE", "\\U0004FFFF",
1924 "\\U0005FFFE", "\\U0005FFFF",
1925 "\\U0006FFFE", "\\U0006FFFF",
1926 "\\U0007FFFE", "\\U0007FFFF",
1927 "\\U0008FFFE", "\\U0008FFFF",
1928 "\\U0009FFFE", "\\U0009FFFF",
1929 "\\U000AFFFE", "\\U000AFFFF",
1930 "\\U000BFFFE", "\\U000BFFFF",
1931 "\\U000CFFFE", "\\U000CFFFF",
1932 "\\U000DFFFE", "\\U000DFFFF",
1933 "\\U000EFFFE", "\\U000EFFFF",
1934 "\\U000FFFFE", "\\U000FFFFF",
1935 "\\U0010FFFE", "\\U0010FFFF",
1936 "\\uFFFF" /* special character with maximum primary weight */
1937 };
1938 UErrorCode status = U_ZERO_ERROR;
1939 UCollator *coll = ucol_open("en_US", &status);
1940
1941 log_verbose("Test non characters\n");
1942
1943 if(U_SUCCESS(status)) {
1944 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1945 } else {
1946 log_err_status(status, "Unable to open collator\n");
1947 }
1948
1949 ucol_close(coll);
1950 }
1951
TestExtremeCompression(void)1952 static void TestExtremeCompression(void) {
1953 static char *test[4];
1954 int32_t j = 0, i = 0;
1955
1956 for(i = 0; i<4; i++) {
1957 test[i] = (char *)malloc(2048*sizeof(char));
1958 }
1959
1960 for(j = 20; j < 500; j++) {
1961 for(i = 0; i<4; i++) {
1962 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1963 test[i][j-1] = (char)('a'+i);
1964 test[i][j] = 0;
1965 }
1966 genericLocaleStarter("en_US", (const char **)test, 4);
1967 }
1968
1969
1970 for(i = 0; i<4; i++) {
1971 free(test[i]);
1972 }
1973 }
1974
1975 #if 0
1976 static void TestExtremeCompression(void) {
1977 static char *test[4];
1978 int32_t j = 0, i = 0;
1979 UErrorCode status = U_ZERO_ERROR;
1980 UCollator *coll = ucol_open("en_US", status);
1981 for(i = 0; i<4; i++) {
1982 test[i] = (char *)malloc(2048*sizeof(char));
1983 }
1984 for(j = 10; j < 2048; j++) {
1985 for(i = 0; i<4; i++) {
1986 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1987 test[i][j-1] = (char)('a'+i);
1988 test[i][j] = 0;
1989 }
1990 }
1991 genericLocaleStarter("en_US", (const char **)test, 4);
1992
1993 for(j = 10; j < 2048; j++) {
1994 for(i = 0; i<1; i++) {
1995 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1996 test[i][j] = 0;
1997 }
1998 }
1999 for(i = 0; i<4; i++) {
2000 free(test[i]);
2001 }
2002 }
2003 #endif
2004
TestSurrogates(void)2005 static void TestSurrogates(void) {
2006 static const char *test[] = {
2007 "z","\\ud900\\udc25", "\\ud805\\udc50",
2008 "\\ud800\\udc00y", "\\ud800\\udc00r",
2009 "\\ud800\\udc00f", "\\ud800\\udc00",
2010 "\\ud800\\udc00c", "\\ud800\\udc00b",
2011 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2012 "\\ud800\\udc00a",
2013 "c", "b"
2014 };
2015
2016 static const char *rule =
2017 "&z < \\ud900\\udc25 < \\ud805\\udc50"
2018 "< \\ud800\\udc00y < \\ud800\\udc00r"
2019 "< \\ud800\\udc00f << \\ud800\\udc00"
2020 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2021 "< \\ud800\\udc00a < c < b" ;
2022
2023 genericRulesStarter(rule, test, 14);
2024 }
2025
2026 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)2027 static void TestPrefix(void) {
2028 uint32_t i;
2029
2030 static const struct {
2031 const char *rules;
2032 const char *data[50];
2033 const uint32_t len;
2034 } tests[] = {
2035 { "&z <<< z|a",
2036 {"zz", "za"}, 2 },
2037
2038 { "&z <<< z| a",
2039 {"zz", "za"}, 2 },
2040 { "[strength I]"
2041 "&a=\\ud900\\udc25"
2042 "&z<<<\\ud900\\udc25|a",
2043 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2044 };
2045
2046
2047 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2048 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2049 }
2050 }
2051
2052 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2053 /* JIS X 4061 collation order implementation */
TestNewJapanese(void)2054 static void TestNewJapanese(void) {
2055
2056 static const char * const test1[] = {
2057 "\\u30b7\\u30e3\\u30fc\\u30ec",
2058 "\\u30b7\\u30e3\\u30a4",
2059 "\\u30b7\\u30e4\\u30a3",
2060 "\\u30b7\\u30e3\\u30ec",
2061 "\\u3061\\u3087\\u3053",
2062 "\\u3061\\u3088\\u3053",
2063 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2064 "\\u3066\\u30fc\\u305f",
2065 "\\u30c6\\u30fc\\u30bf",
2066 "\\u30c6\\u30a7\\u30bf",
2067 "\\u3066\\u3048\\u305f",
2068 "\\u3067\\u30fc\\u305f",
2069 "\\u30c7\\u30fc\\u30bf",
2070 "\\u30c7\\u30a7\\u30bf",
2071 "\\u3067\\u3048\\u305f",
2072 "\\u3066\\u30fc\\u305f\\u30fc",
2073 "\\u30c6\\u30fc\\u30bf\\u30a1",
2074 "\\u30c6\\u30a7\\u30bf\\u30fc",
2075 "\\u3066\\u3047\\u305f\\u3041",
2076 "\\u3066\\u3048\\u305f\\u30fc",
2077 "\\u3067\\u30fc\\u305f\\u30fc",
2078 "\\u30c7\\u30fc\\u30bf\\u30a1",
2079 "\\u3067\\u30a7\\u305f\\u30a1",
2080 "\\u30c7\\u3047\\u30bf\\u3041",
2081 "\\u30c7\\u30a8\\u30bf\\u30a2",
2082 "\\u3072\\u3086",
2083 "\\u3073\\u3085\\u3042",
2084 "\\u3074\\u3085\\u3042",
2085 "\\u3073\\u3085\\u3042\\u30fc",
2086 "\\u30d3\\u30e5\\u30a2\\u30fc",
2087 "\\u3074\\u3085\\u3042\\u30fc",
2088 "\\u30d4\\u30e5\\u30a2\\u30fc",
2089 "\\u30d2\\u30e5\\u30a6",
2090 "\\u30d2\\u30e6\\u30a6",
2091 "\\u30d4\\u30e5\\u30a6\\u30a2",
2092 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2093 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2094 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2095 "\\u3072\\u3085\\u3093",
2096 "\\u3074\\u3085\\u3093",
2097 "\\u3075\\u30fc\\u308a",
2098 "\\u30d5\\u30fc\\u30ea",
2099 "\\u3075\\u3045\\u308a",
2100 "\\u3075\\u30a5\\u308a",
2101 "\\u3075\\u30a5\\u30ea",
2102 "\\u30d5\\u30a6\\u30ea",
2103 "\\u3076\\u30fc\\u308a",
2104 "\\u30d6\\u30fc\\u30ea",
2105 "\\u3076\\u3045\\u308a",
2106 "\\u30d6\\u30a5\\u308a",
2107 "\\u3077\\u3046\\u308a",
2108 "\\u30d7\\u30a6\\u30ea",
2109 "\\u3075\\u30fc\\u308a\\u30fc",
2110 "\\u30d5\\u30a5\\u30ea\\u30fc",
2111 "\\u3075\\u30a5\\u308a\\u30a3",
2112 "\\u30d5\\u3045\\u308a\\u3043",
2113 "\\u30d5\\u30a6\\u30ea\\u30fc",
2114 "\\u3075\\u3046\\u308a\\u3043",
2115 "\\u30d6\\u30a6\\u30ea\\u30a4",
2116 "\\u3077\\u30fc\\u308a\\u30fc",
2117 "\\u3077\\u30a5\\u308a\\u30a4",
2118 "\\u3077\\u3046\\u308a\\u30fc",
2119 "\\u30d7\\u30a6\\u30ea\\u30a4",
2120 "\\u30d5\\u30fd",
2121 "\\u3075\\u309e",
2122 "\\u3076\\u309d",
2123 "\\u3076\\u3075",
2124 "\\u3076\\u30d5",
2125 "\\u30d6\\u3075",
2126 "\\u30d6\\u30d5",
2127 "\\u3076\\u309e",
2128 "\\u3076\\u3077",
2129 "\\u30d6\\u3077",
2130 "\\u3077\\u309d",
2131 "\\u30d7\\u30fd",
2132 "\\u3077\\u3075",
2133 };
2134
2135 static const char *test2[] = {
2136 "\\u306f\\u309d", /* H\\u309d */
2137 "\\u30cf\\u30fd", /* K\\u30fd */
2138 "\\u306f\\u306f", /* HH */
2139 "\\u306f\\u30cf", /* HK */
2140 "\\u30cf\\u30cf", /* KK */
2141 "\\u306f\\u309e", /* H\\u309e */
2142 "\\u30cf\\u30fe", /* K\\u30fe */
2143 "\\u306f\\u3070", /* HH\\u309b */
2144 "\\u30cf\\u30d0", /* KK\\u309b */
2145 "\\u306f\\u3071", /* HH\\u309c */
2146 "\\u30cf\\u3071", /* KH\\u309c */
2147 "\\u30cf\\u30d1", /* KK\\u309c */
2148 "\\u3070\\u309d", /* H\\u309b\\u309d */
2149 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2150 "\\u3070\\u306f", /* H\\u309bH */
2151 "\\u30d0\\u30cf", /* K\\u309bK */
2152 "\\u3070\\u309e", /* H\\u309b\\u309e */
2153 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2154 "\\u3070\\u3070", /* H\\u309bH\\u309b */
2155 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2156 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2157 "\\u3070\\u3071", /* H\\u309bH\\u309c */
2158 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2159 "\\u3071\\u309d", /* H\\u309c\\u309d */
2160 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2161 "\\u3071\\u306f", /* H\\u309cH */
2162 "\\u30d1\\u30cf", /* K\\u309cK */
2163 "\\u3071\\u3070", /* H\\u309cH\\u309b */
2164 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2165 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2166 "\\u3071\\u3071", /* H\\u309cH\\u309c */
2167 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2168 };
2169 /*
2170 static const char *test3[] = {
2171 "\\u221er\\u221e",
2172 "\\u221eR#",
2173 "\\u221et\\u221e",
2174 "#r\\u221e",
2175 "#R#",
2176 "#t%",
2177 "#T%",
2178 "8t\\u221e",
2179 "8T\\u221e",
2180 "8t#",
2181 "8T#",
2182 "8t%",
2183 "8T%",
2184 "8t8",
2185 "8T8",
2186 "\\u03c9r\\u221e",
2187 "\\u03a9R%",
2188 "rr\\u221e",
2189 "rR\\u221e",
2190 "Rr\\u221e",
2191 "RR\\u221e",
2192 "RT%",
2193 "rt8",
2194 "tr\\u221e",
2195 "tr8",
2196 "TR8",
2197 "tt8",
2198 "\\u30b7\\u30e3\\u30fc\\u30ec",
2199 };
2200 */
2201 static const UColAttribute att[] = { UCOL_STRENGTH };
2202 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2203
2204 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2205 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2206
2207 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2208 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2209 /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2210 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2211 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
2212 }
2213
TestStrCollIdenticalPrefix(void)2214 static void TestStrCollIdenticalPrefix(void) {
2215 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2216 const char* test[] = {
2217 "ab\\ud9b0\\udc70",
2218 "ab\\ud9b0\\udc71"
2219 };
2220 genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
2221 }
2222 /* Contractions should have all their canonically equivalent */
2223 /* strings included */
TestContractionClosure(void)2224 static void TestContractionClosure(void) {
2225 static const struct {
2226 const char *rules;
2227 const char *data[10];
2228 const uint32_t len;
2229 } tests[] = {
2230 { "&b=\\u00e4\\u00e4",
2231 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2232 { "&b=\\u00C5",
2233 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2234 };
2235 uint32_t i;
2236
2237
2238 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2239 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2240 }
2241 }
2242
2243 /* This tests also fails*/
TestBeforePrefixFailure(void)2244 static void TestBeforePrefixFailure(void) {
2245 static const struct {
2246 const char *rules;
2247 const char *data[10];
2248 const uint32_t len;
2249 } tests[] = {
2250 { "&g <<< a"
2251 "&[before 3]\\uff41 <<< x",
2252 {"x", "\\uff41"}, 2 },
2253 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2254 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2255 "&[before 3]\\u30a7<<<\\u30a9",
2256 {"\\u30a9", "\\u30a7"}, 2 },
2257 { "&[before 3]\\u30a7<<<\\u30a9"
2258 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2259 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2260 {"\\u30a9", "\\u30a7"}, 2 },
2261 };
2262 uint32_t i;
2263
2264
2265 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2266 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2267 }
2268
2269 #if 0
2270 const char* rule1 =
2271 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2272 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2273 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2274 const char* rule2 =
2275 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2276 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2277 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2278 const char* test[] = {
2279 "\\u30c6\\u30fc\\u30bf",
2280 "\\u30c6\\u30a7\\u30bf",
2281 };
2282 genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2283 genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
2284 /* this piece of code should be in some sort of verbose mode */
2285 /* it gets the collation elements for elements and prints them */
2286 /* This is useful when trying to see whether the problem is */
2287 {
2288 UErrorCode status = U_ZERO_ERROR;
2289 uint32_t i = 0;
2290 UCollationElements *it = NULL;
2291 uint32_t CE;
2292 UChar string[256];
2293 uint32_t uStringLen;
2294 UCollator *coll = NULL;
2295
2296 uStringLen = u_unescape(rule1, string, 256);
2297
2298 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2299
2300 /*coll = ucol_open("ja_JP_JIS", &status);*/
2301 it = ucol_openElements(coll, string, 0, &status);
2302
2303 for(i = 0; i < UPRV_LENGTHOF(test); i++) {
2304 log_verbose("%s\n", test[i]);
2305 uStringLen = u_unescape(test[i], string, 256);
2306 ucol_setText(it, string, uStringLen, &status);
2307
2308 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2309 log_verbose("%08X\n", CE);
2310 }
2311 log_verbose("\n");
2312
2313 }
2314
2315 ucol_closeElements(it);
2316 ucol_close(coll);
2317 }
2318 #endif
2319 }
2320
TestPrefixCompose(void)2321 static void TestPrefixCompose(void) {
2322 const char* rule1 =
2323 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2324 /*
2325 const char* test[] = {
2326 "\\u30c6\\u30fc\\u30bf",
2327 "\\u30c6\\u30a7\\u30bf",
2328 };
2329 */
2330 {
2331 UErrorCode status = U_ZERO_ERROR;
2332 /*uint32_t i = 0;*/
2333 /*UCollationElements *it = NULL;*/
2334 /* uint32_t CE;*/
2335 UChar string[256];
2336 uint32_t uStringLen;
2337 UCollator *coll = NULL;
2338
2339 uStringLen = u_unescape(rule1, string, 256);
2340
2341 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2342 ucol_close(coll);
2343 }
2344
2345
2346 }
2347
2348 /*
2349 [last variable] last variable value
2350 [last primary ignorable] largest CE for primary ignorable
2351 [last secondary ignorable] largest CE for secondary ignorable
2352 [last tertiary ignorable] largest CE for tertiary ignorable
2353 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2354 */
2355
TestRuleOptions(void)2356 static void TestRuleOptions(void) {
2357 /* values here are hardcoded and are correct for the current UCA
2358 * when the UCA changes, one might be forced to change these
2359 * values.
2360 */
2361
2362 /*
2363 * These strings contain the last character before [variable top]
2364 * and the first and second characters (by primary weights) after it.
2365 * See FractionalUCA.txt. For example:
2366 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2367 [variable top = 0C FE]
2368 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2369 and
2370 00B4; [0D 0C, 05, 05]
2371 *
2372 * Note: Starting with UCA 6.0, the [variable top] collation element
2373 * is not the weight of any character or string,
2374 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2375 */
2376 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2377 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2378 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2379
2380 /*
2381 * This string has to match the character that has the [last regular] weight
2382 * which changes with each UCA version.
2383 * See the bottom of FractionalUCA.txt which says something like
2384 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2385 *
2386 * Note: Starting with UCA 6.0, the [last regular] collation element
2387 * is not the weight of any character or string,
2388 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2389 */
2390 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2391
2392 static const struct {
2393 const char *rules;
2394 const char *data[10];
2395 const uint32_t len;
2396 } tests[] = {
2397 #if 0
2398 /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2399 /* - all befores here amount to zero */
2400 { "&[before 3][first tertiary ignorable]<<<a",
2401 { "\\u0000", "a"}, 2
2402 }, /* you cannot go before first tertiary ignorable */
2403
2404 { "&[before 3][last tertiary ignorable]<<<a",
2405 { "\\u0000", "a"}, 2
2406 }, /* you cannot go before last tertiary ignorable */
2407 #endif
2408 /*
2409 * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2410 * and it *is* possible to "go before" that.
2411 */
2412 { "&[before 3][first secondary ignorable]<<<a",
2413 { "\\u0000", "a"}, 2
2414 },
2415
2416 { "&[before 3][last secondary ignorable]<<<a",
2417 { "\\u0000", "a"}, 2
2418 },
2419
2420 /* 'normal' befores */
2421
2422 /*
2423 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2424 * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2425 * because there is no tailoring space before that boundary.
2426 * Made the tests work by tailoring to a space instead.
2427 */
2428 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */
2429 { "c", "b", "\\u0332", "a" }, 4
2430 },
2431
2432 /* we don't have a code point that corresponds to
2433 * the last primary ignorable
2434 */
2435 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */
2436 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2437 },
2438
2439 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2440 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
2441 },
2442
2443 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2444 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2445 },
2446
2447 { "&[first regular]<a"
2448 "&[before 1][first regular]<b",
2449 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2450 },
2451
2452 { "&[before 1][last regular]<b"
2453 "&[last regular]<a",
2454 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2455 },
2456
2457 { "&[before 1][first implicit]<b"
2458 "&[first implicit]<a",
2459 { "b", "\\u4e00", "a", "\\u4e01"}, 4
2460 },
2461 #if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2462 { "&[before 1][last implicit]<b"
2463 "&[last implicit]<a",
2464 { "b", "\\U0010FFFD", "a" }, 3
2465 },
2466 #endif
2467 { "&[last variable]<z"
2468 "&' '<x" /* was &[last primary ignorable]<x, see above */
2469 "&[last secondary ignorable]<<y"
2470 "&[last tertiary ignorable]<<<w"
2471 "&[top]<u",
2472 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2473 }
2474
2475 };
2476 uint32_t i;
2477
2478 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2479 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2480 }
2481 }
2482
2483
TestOptimize(void)2484 static void TestOptimize(void) {
2485 /* this is not really a test - just trying out
2486 * whether copying of UCA contents will fail
2487 * Cannot really test, since the functionality
2488 * remains the same.
2489 */
2490 static const struct {
2491 const char *rules;
2492 const char *data[10];
2493 const uint32_t len;
2494 } tests[] = {
2495 /* - all befores here amount to zero */
2496 { "[optimize [\\uAC00-\\uD7FF]]",
2497 { "a", "b"}, 2}
2498 };
2499 uint32_t i;
2500
2501 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2502 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2503 }
2504 }
2505
2506 /*
2507 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2508 weiv ucol_strcollIter?
2509 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2510 weiv these are the input strings?
2511 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2512 weiv will check - could be a problem with utf-8 iterator
2513 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2514 weiv hmmm
2515 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2516 weiv that doesn't sound right
2517 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2518 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
2519 cycheng@ca.ibm.c... yes
2520 weiv and then do the comparison
2521 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2522 weiv utf-16 strings look like a little endian ones in the example you sent me
2523 weiv It could be a bug - let me try to test it out
2524 cycheng@ca.ibm.c... ok
2525 cycheng@ca.ibm.c... we can wait till the conf. call
2526 cycheng@ca.ibm.c... next weke
2527 weiv that would be great
2528 weiv hmmm
2529 weiv I might be wrong
2530 weiv let me play with it some more
2531 cycheng@ca.ibm.c... ok
2532 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
2533 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2534 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2535 weiv ok
2536 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2537 weiv thanks
2538 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2539 */
2540 #if 0
2541 static void Alexis(void) {
2542 UErrorCode status = U_ZERO_ERROR;
2543 UCollator *coll = ucol_open("", &status);
2544
2545
2546 const char utf16be[2][4] = {
2547 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2548 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2549 };
2550
2551 const char utf8[2][4] = {
2552 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2553 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2554 };
2555
2556 UCharIterator iterU161, iterU162;
2557 UCharIterator iterU81, iterU82;
2558
2559 UCollationResult resU16, resU8;
2560
2561 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2562 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2563
2564 uiter_setUTF8(&iterU81, utf8[0], 4);
2565 uiter_setUTF8(&iterU82, utf8[1], 4);
2566
2567 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2568
2569 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2570 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2571
2572
2573 if(resU16 != resU8) {
2574 log_err("different results\n");
2575 }
2576
2577 ucol_close(coll);
2578 }
2579 #endif
2580
2581 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)2582 static void Alexis2(void) {
2583 UErrorCode status = U_ZERO_ERROR;
2584 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2585 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2586 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2587 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2588
2589 UConverter *conv = NULL;
2590
2591 UCharIterator U16BEItS, U16BEItT;
2592 UCharIterator U8ItS, U8ItT;
2593
2594 UCollationResult resU16, resU16BE, resU8;
2595
2596 static const char* const pairs[][2] = {
2597 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2598 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2599 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2600 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2601 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2602 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2603 { "\\u0020", "\\u0020\\u0000"}
2604 /*
2605 5F20 (my result here)
2606 5F204E008E3F
2607 5F20 (your result here)
2608 */
2609 };
2610
2611 int32_t i = 0;
2612
2613 UCollator *coll = ucol_open("", &status);
2614 if(status == U_FILE_ACCESS_ERROR) {
2615 log_data_err("Is your data around?\n");
2616 return;
2617 } else if(U_FAILURE(status)) {
2618 log_err("Error opening collator\n");
2619 return;
2620 }
2621 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2622 conv = ucnv_open("UTF16BE", &status);
2623 for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
2624 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2625 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2626
2627 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2628
2629 log_verbose("Result of strcoll is %i\n", resU16);
2630
2631 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2632 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2633 (void)U16BELenS; /* Suppress set but not used warnings. */
2634 (void)U16BELenT;
2635
2636 /* use the original sizes, as the result from converter is in bytes */
2637 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2638 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2639
2640 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2641
2642 log_verbose("Result of U16BE is %i\n", resU16BE);
2643
2644 if(resU16 != resU16BE) {
2645 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2646 }
2647
2648 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2649 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2650
2651 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2652 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2653
2654 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2655
2656 if(resU16 != resU8) {
2657 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2658 }
2659
2660 }
2661
2662 ucol_close(coll);
2663 ucnv_close(conv);
2664 }
2665
TestHebrewUCA(void)2666 static void TestHebrewUCA(void) {
2667 UErrorCode status = U_ZERO_ERROR;
2668 static const char *first[] = {
2669 "d790d6b8d79cd795d6bcd7a9",
2670 "d790d79cd79ed7a7d799d799d7a1",
2671 "d790d6b4d79ed795d6bcd7a9",
2672 };
2673
2674 char utf8String[3][256];
2675 UChar utf16String[3][256];
2676
2677 int32_t i = 0, j = 0;
2678 int32_t sizeUTF8[3];
2679 int32_t sizeUTF16[3];
2680
2681 UCollator *coll = ucol_open("", &status);
2682 if (U_FAILURE(status)) {
2683 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2684 return;
2685 }
2686 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2687
2688 for(i = 0; i < UPRV_LENGTHOF(first); i++) {
2689 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2690 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2691 log_verbose("%i: ");
2692 for(j = 0; j < sizeUTF16[i]; j++) {
2693 /*log_verbose("\\u%04X", utf16String[i][j]);*/
2694 log_verbose("%04X", utf16String[i][j]);
2695 }
2696 log_verbose("\n");
2697 }
2698 for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2699 for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
2700 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2701 }
2702 }
2703
2704 ucol_close(coll);
2705
2706 }
2707
TestPartialSortKeyTermination(void)2708 static void TestPartialSortKeyTermination(void) {
2709 static const char* cases[] = {
2710 "\\u1234\\u1234\\udc00",
2711 "\\udc00\\ud800\\ud800"
2712 };
2713
2714 int32_t i;
2715
2716 UErrorCode status = U_ZERO_ERROR;
2717
2718 UCollator *coll = ucol_open("", &status);
2719
2720 UCharIterator iter;
2721
2722 UChar currCase[256];
2723 int32_t length = 0;
2724 int32_t pKeyLen = 0;
2725
2726 uint8_t key[256];
2727
2728 for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
2729 uint32_t state[2] = {0, 0};
2730 length = u_unescape(cases[i], currCase, 256);
2731 uiter_setString(&iter, currCase, length);
2732 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2733 (void)pKeyLen; /* Suppress set but not used warning. */
2734
2735 log_verbose("Done\n");
2736
2737 }
2738 ucol_close(coll);
2739 }
2740
TestSettings(void)2741 static void TestSettings(void) {
2742 static const char* cases[] = {
2743 "apple",
2744 "Apple"
2745 };
2746
2747 static const char* locales[] = {
2748 "",
2749 "en"
2750 };
2751
2752 UErrorCode status = U_ZERO_ERROR;
2753
2754 int32_t i = 0, j = 0;
2755
2756 UChar source[256], target[256];
2757 int32_t sLen = 0, tLen = 0;
2758
2759 UCollator *collateObject = NULL;
2760 for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
2761 collateObject = ucol_open(locales[i], &status);
2762 ucol_setStrength(collateObject, UCOL_PRIMARY);
2763 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2764 for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
2765 sLen = u_unescape(cases[j-1], source, 256);
2766 source[sLen] = 0;
2767 tLen = u_unescape(cases[j], target, 256);
2768 source[tLen] = 0;
2769 doTest(collateObject, source, target, UCOL_EQUAL);
2770 }
2771 ucol_close(collateObject);
2772 }
2773 }
2774
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)2775 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2776 UErrorCode status = U_ZERO_ERROR;
2777 int32_t errorNo = 0;
2778 const UChar *sourceRules = NULL;
2779 int32_t sourceRulesLen = 0;
2780 UParseError parseError;
2781 UColAttributeValue french = UCOL_OFF;
2782
2783 if(!ucol_equals(source, target)) {
2784 log_err("Same collators, different address not equal\n");
2785 errorNo++;
2786 }
2787 ucol_close(target);
2788 if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2789 target = ucol_safeClone(source, NULL, NULL, &status);
2790 if(U_FAILURE(status)) {
2791 log_err("Error creating clone\n");
2792 errorNo++;
2793 return errorNo;
2794 }
2795 if(!ucol_equals(source, target)) {
2796 log_err("Collator different from it's clone\n");
2797 errorNo++;
2798 }
2799 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2800 if(french == UCOL_ON) {
2801 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2802 } else {
2803 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2804 }
2805 if(U_FAILURE(status)) {
2806 log_err("Error setting attributes\n");
2807 errorNo++;
2808 return errorNo;
2809 }
2810 if(ucol_equals(source, target)) {
2811 log_err("Collators same even when options changed\n");
2812 errorNo++;
2813 }
2814 ucol_close(target);
2815
2816 sourceRules = ucol_getRules(source, &sourceRulesLen);
2817 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2818 if(U_FAILURE(status)) {
2819 log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2820 errorNo++;
2821 return errorNo;
2822 }
2823 /* Note: The tailoring rule string is an optional data item. */
2824 if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2825 log_err("Collator different from collator that was created from the same rules\n");
2826 errorNo++;
2827 }
2828 ucol_close(target);
2829 }
2830 return errorNo;
2831 }
2832
2833
TestEquals(void)2834 static void TestEquals(void) {
2835 /* ucol_equals is not currently a public API. There is a chance that it will become
2836 * something like this.
2837 */
2838 /* test whether the two collators instantiated from the same locale are equal */
2839 UErrorCode status = U_ZERO_ERROR;
2840 UParseError parseError;
2841 int32_t noOfLoc = uloc_countAvailable();
2842 const char *locName = NULL;
2843 UCollator *source = NULL, *target = NULL;
2844 int32_t i = 0;
2845
2846 const char* rules[] = {
2847 "&l < lj <<< Lj <<< LJ",
2848 "&n < nj <<< Nj <<< NJ",
2849 "&ae <<< \\u00e4",
2850 "&AE <<< \\u00c4"
2851 };
2852 /*
2853 const char* badRules[] = {
2854 "&l <<< Lj",
2855 "&n < nj <<< nJ <<< NJ",
2856 "&a <<< \\u00e4",
2857 "&AE <<< \\u00c4 <<< x"
2858 };
2859 */
2860
2861 UChar sourceRules[1024], targetRules[1024];
2862 int32_t sourceRulesSize = 0, targetRulesSize = 0;
2863 int32_t rulesSize = UPRV_LENGTHOF(rules);
2864
2865 for(i = 0; i < rulesSize; i++) {
2866 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2867 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2868 }
2869
2870 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2871 if(status == U_FILE_ACCESS_ERROR) {
2872 log_data_err("Is your data around?\n");
2873 return;
2874 } else if(U_FAILURE(status)) {
2875 log_err("Error opening collator\n");
2876 return;
2877 }
2878 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2879 if(!ucol_equals(source, target)) {
2880 log_err("Equivalent collators not equal!\n");
2881 }
2882 ucol_close(source);
2883 ucol_close(target);
2884
2885 source = ucol_open("root", &status);
2886 target = ucol_open("root", &status);
2887 log_verbose("Testing root\n");
2888 if(!ucol_equals(source, source)) {
2889 log_err("Same collator not equal\n");
2890 }
2891 if(TestEqualsForCollator("root", source, target)) {
2892 log_err("Errors for root\n");
2893 }
2894 ucol_close(source);
2895
2896 for(i = 0; i<noOfLoc; i++) {
2897 status = U_ZERO_ERROR;
2898 locName = uloc_getAvailable(i);
2899 /*if(hasCollationElements(locName)) {*/
2900 log_verbose("Testing equality for locale %s\n", locName);
2901 source = ucol_open(locName, &status);
2902 target = ucol_open(locName, &status);
2903 if (U_FAILURE(status)) {
2904 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
2905 continue;
2906 }
2907 if(TestEqualsForCollator(locName, source, target)) {
2908 log_err("Errors for locale %s\n", locName);
2909 }
2910 ucol_close(source);
2911 /*}*/
2912 }
2913 }
2914
TestJ2726(void)2915 static void TestJ2726(void) {
2916 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2917 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2918 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2919 UErrorCode status = U_ZERO_ERROR;
2920 UCollator *coll = ucol_open("en", &status);
2921 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2922 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2923 doTest(coll, a, aSpace, UCOL_EQUAL);
2924 doTest(coll, aSpace, a, UCOL_EQUAL);
2925 doTest(coll, a, spaceA, UCOL_EQUAL);
2926 doTest(coll, spaceA, a, UCOL_EQUAL);
2927 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2928 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2929 ucol_close(coll);
2930 }
2931
NullRule(void)2932 static void NullRule(void) {
2933 UChar r[3] = {0};
2934 UErrorCode status = U_ZERO_ERROR;
2935 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2936 if(U_SUCCESS(status)) {
2937 log_err("This should have been an error!\n");
2938 ucol_close(coll);
2939 } else {
2940 status = U_ZERO_ERROR;
2941 }
2942 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2943 if(U_FAILURE(status)) {
2944 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2945 } else {
2946 ucol_close(coll);
2947 }
2948 }
2949
2950 /**
2951 * Test for CollationElementIterator previous and next for the whole set of
2952 * unicode characters with normalization on.
2953 */
TestNumericCollation(void)2954 static void TestNumericCollation(void)
2955 {
2956 UErrorCode status = U_ZERO_ERROR;
2957
2958 const static char *basicTestStrings[]={
2959 "hello1",
2960 "hello2",
2961 "hello2002",
2962 "hello2003",
2963 "hello123456",
2964 "hello1234567",
2965 "hello10000000",
2966 "hello100000000",
2967 "hello1000000000",
2968 "hello10000000000",
2969 };
2970
2971 const static char *preZeroTestStrings[]={
2972 "avery10000",
2973 "avery010000",
2974 "avery0010000",
2975 "avery00010000",
2976 "avery000010000",
2977 "avery0000010000",
2978 "avery00000010000",
2979 "avery000000010000",
2980 };
2981
2982 const static char *thirtyTwoBitNumericStrings[]={
2983 "avery42949672960",
2984 "avery42949672961",
2985 "avery42949672962",
2986 "avery429496729610"
2987 };
2988
2989 const static char *longNumericStrings[]={
2990 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2991 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2992 are treated as multiple collation elements. */
2993 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2994 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2995 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2996 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2997 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2998 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2999 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3000 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3001 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3002 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3003 };
3004
3005 const static char *supplementaryDigits[] = {
3006 "\\uD835\\uDFCE", /* 0 */
3007 "\\uD835\\uDFCF", /* 1 */
3008 "\\uD835\\uDFD0", /* 2 */
3009 "\\uD835\\uDFD1", /* 3 */
3010 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3011 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3012 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3013 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3014 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3015 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3016 };
3017
3018 const static char *foreignDigits[] = {
3019 "\\u0661",
3020 "\\u0662",
3021 "\\u0663",
3022 "\\u0661\\u0660",
3023 "\\u0661\\u0662",
3024 "\\u0661\\u0663",
3025 "\\u0662\\u0660",
3026 "\\u0662\\u0662",
3027 "\\u0662\\u0663",
3028 "\\u0663\\u0660",
3029 "\\u0663\\u0662",
3030 "\\u0663\\u0663"
3031 };
3032
3033 const static char *evenZeroes[] = {
3034 "2000",
3035 "2001",
3036 "2002",
3037 "2003"
3038 };
3039
3040 UColAttribute att = UCOL_NUMERIC_COLLATION;
3041 UColAttributeValue val = UCOL_ON;
3042
3043 /* Open our collator. */
3044 UCollator* coll = ucol_open("root", &status);
3045 if (U_FAILURE(status)){
3046 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3047 myErrorName(status));
3048 return;
3049 }
3050 genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3051 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3052 genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3053 genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3054 genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3055 genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
3056
3057 /* Setting up our collator to do digits. */
3058 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3059 if (U_FAILURE(status)){
3060 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3061 myErrorName(status));
3062 return;
3063 }
3064
3065 /*
3066 Testing that prepended zeroes still yield the correct collation behavior.
3067 We expect that every element in our strings array will be equal.
3068 */
3069 genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
3070
3071 ucol_close(coll);
3072 }
3073
TestTibetanConformance(void)3074 static void TestTibetanConformance(void)
3075 {
3076 const char* test[] = {
3077 "\\u0FB2\\u0591\\u0F71\\u0061",
3078 "\\u0FB2\\u0F71\\u0061"
3079 };
3080
3081 UErrorCode status = U_ZERO_ERROR;
3082 UCollator *coll = ucol_open("", &status);
3083 UChar source[100];
3084 UChar target[100];
3085 int result;
3086 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3087 if (U_SUCCESS(status)) {
3088 u_unescape(test[0], source, 100);
3089 u_unescape(test[1], target, 100);
3090 doTest(coll, source, target, UCOL_EQUAL);
3091 result = ucol_strcoll(coll, source, -1, target, -1);
3092 log_verbose("result %d\n", result);
3093 if (UCOL_EQUAL != result) {
3094 log_err("Tibetan comparison error\n");
3095 }
3096 }
3097 ucol_close(coll);
3098
3099 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3100 }
3101
TestPinyinProblem(void)3102 static void TestPinyinProblem(void) {
3103 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3104 genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
3105 }
3106
3107 /**
3108 * Iterate through the given iterator, checking to see that all the strings
3109 * in the expected array are present.
3110 * @param expected array of strings we expect to see, or NULL
3111 * @param expectedCount number of elements of expected, or 0
3112 */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)3113 static int32_t checkUEnumeration(const char* msg,
3114 UEnumeration* iter,
3115 const char** expected,
3116 int32_t expectedCount) {
3117 UErrorCode ec = U_ZERO_ERROR;
3118 int32_t i = 0, n, j, bit;
3119 int32_t seenMask = 0;
3120
3121 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3122 n = uenum_count(iter, &ec);
3123 if (!assertSuccess("count", &ec)) return -1;
3124 log_verbose("%s = [", msg);
3125 for (;; ++i) {
3126 const char* s = uenum_next(iter, NULL, &ec);
3127 if (!assertSuccess("snext", &ec) || s == NULL) break;
3128 if (i != 0) log_verbose(",");
3129 log_verbose("%s", s);
3130 /* check expected list */
3131 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3132 if ((seenMask&bit) == 0 &&
3133 uprv_strcmp(s, expected[j]) == 0) {
3134 seenMask |= bit;
3135 break;
3136 }
3137 }
3138 }
3139 log_verbose("] (%d)\n", i);
3140 assertTrue("count verified", i==n);
3141 /* did we see all expected strings? */
3142 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3143 if ((seenMask&bit)!=0) {
3144 log_verbose("Ok: \"%s\" seen\n", expected[j]);
3145 } else {
3146 log_err("FAIL: \"%s\" not seen\n", expected[j]);
3147 }
3148 }
3149 return n;
3150 }
3151
3152 /**
3153 * Test new API added for separate collation tree.
3154 */
TestSeparateTrees(void)3155 static void TestSeparateTrees(void) {
3156 UErrorCode ec = U_ZERO_ERROR;
3157 UEnumeration *e = NULL;
3158 int32_t n = -1;
3159 UBool isAvailable;
3160 char loc[256];
3161
3162 static const char* AVAIL[] = { "en", "de" };
3163
3164 static const char* KW[] = { "collation" };
3165
3166 static const char* KWVAL[] = { "phonebook", "stroke" };
3167
3168 #if !UCONFIG_NO_SERVICE
3169 e = ucol_openAvailableLocales(&ec);
3170 if (e != NULL) {
3171 assertSuccess("ucol_openAvailableLocales", &ec);
3172 assertTrue("ucol_openAvailableLocales!=0", e!=0);
3173 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
3174 (void)n; /* Suppress set but not used warnings. */
3175 /* Don't need to check n because we check list */
3176 uenum_close(e);
3177 } else {
3178 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3179 }
3180 #endif
3181
3182 e = ucol_getKeywords(&ec);
3183 if (e != NULL) {
3184 assertSuccess("ucol_getKeywords", &ec);
3185 assertTrue("ucol_getKeywords!=0", e!=0);
3186 n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
3187 /* Don't need to check n because we check list */
3188 uenum_close(e);
3189 } else {
3190 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3191 }
3192
3193 e = ucol_getKeywordValues(KW[0], &ec);
3194 if (e != NULL) {
3195 assertSuccess("ucol_getKeywordValues", &ec);
3196 assertTrue("ucol_getKeywordValues!=0", e!=0);
3197 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3198 /* Don't need to check n because we check list */
3199 uenum_close(e);
3200 } else {
3201 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3202 }
3203
3204 /* Try setting a warning before calling ucol_getKeywordValues */
3205 ec = U_USING_FALLBACK_WARNING;
3206 e = ucol_getKeywordValues(KW[0], &ec);
3207 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3208 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3209 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3210 /* Don't need to check n because we check list */
3211 uenum_close(e);
3212 }
3213
3214 /*
3215 U_CAPI int32_t U_EXPORT2
3216 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3217 const char* locale, UBool* isAvailable,
3218 UErrorCode* status);
3219 }
3220 */
3221 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3222 &isAvailable, &ec);
3223 if (assertSuccess("getFunctionalEquivalent", &ec)) {
3224 assertEquals("getFunctionalEquivalent(de)", "root", loc);
3225 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3226 isAvailable == TRUE);
3227 }
3228
3229 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3230 &isAvailable, &ec);
3231 if (assertSuccess("getFunctionalEquivalent", &ec)) {
3232 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3233 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3234 isAvailable == FALSE);
3235 }
3236 }
3237
3238 /* supercedes TestJ784 */
TestBeforePinyin(void)3239 static void TestBeforePinyin(void) {
3240 const static char rules[] = {
3241 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3242 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3243 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3244 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3245 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3246 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3247 };
3248
3249 const static char *test[] = {
3250 "l\\u0101",
3251 "la",
3252 "l\\u0101n",
3253 "lan ",
3254 "l\\u0113",
3255 "le",
3256 "l\\u0113n",
3257 "len"
3258 };
3259
3260 const static char *test2[] = {
3261 "x\\u0101",
3262 "x\\u0100",
3263 "X\\u0101",
3264 "X\\u0100",
3265 "x\\u00E1",
3266 "x\\u00C1",
3267 "X\\u00E1",
3268 "X\\u00C1",
3269 "x\\u01CE",
3270 "x\\u01CD",
3271 "X\\u01CE",
3272 "X\\u01CD",
3273 "x\\u00E0",
3274 "x\\u00C0",
3275 "X\\u00E0",
3276 "X\\u00C0",
3277 "xa",
3278 "xA",
3279 "Xa",
3280 "XA",
3281 "x\\u0101x",
3282 "x\\u0100x",
3283 "x\\u00E1x",
3284 "x\\u00C1x",
3285 "x\\u01CEx",
3286 "x\\u01CDx",
3287 "x\\u00E0x",
3288 "x\\u00C0x",
3289 "xax",
3290 "xAx"
3291 };
3292
3293 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3294 genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3295 genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3296 genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3297 }
3298
TestBeforeTightening(void)3299 static void TestBeforeTightening(void) {
3300 static const struct {
3301 const char *rules;
3302 UErrorCode expectedStatus;
3303 } tests[] = {
3304 { "&[before 1]a<x", U_ZERO_ERROR },
3305 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3306 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3307 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3308 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3309 { "&[before 2]a<<x",U_ZERO_ERROR },
3310 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3311 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3312 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
3313 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
3314 { "&[before 3]a<<<x",U_ZERO_ERROR },
3315 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
3316 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3317 };
3318
3319 int32_t i = 0;
3320
3321 UErrorCode status = U_ZERO_ERROR;
3322 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3323 uint32_t rlen = 0;
3324
3325 UCollator *coll = NULL;
3326
3327
3328 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3329 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3330 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3331 if(status != tests[i].expectedStatus) {
3332 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3333 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3334 }
3335 ucol_close(coll);
3336 status = U_ZERO_ERROR;
3337 }
3338
3339 }
3340
3341 /*
3342 &m < a
3343 &[before 1] a < x <<< X << q <<< Q < z
3344 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3345
3346 &m < a
3347 &[before 2] a << x <<< X << q <<< Q < z
3348 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3349
3350 &m < a
3351 &[before 3] a <<< x <<< X << q <<< Q < z
3352 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3353
3354
3355 &m << a
3356 &[before 1] a < x <<< X << q <<< Q < z
3357 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3358
3359 &m << a
3360 &[before 2] a << x <<< X << q <<< Q < z
3361 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3362
3363 &m << a
3364 &[before 3] a <<< x <<< X << q <<< Q < z
3365 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3366
3367
3368 &m <<< a
3369 &[before 1] a < x <<< X << q <<< Q < z
3370 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3371
3372 &m <<< a
3373 &[before 2] a << x <<< X << q <<< Q < z
3374 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
3375
3376 &m <<< a
3377 &[before 3] a <<< x <<< X << q <<< Q < z
3378 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
3379
3380
3381 &[before 1] s < x <<< X << q <<< Q < z
3382 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3383
3384 &[before 2] s << x <<< X << q <<< Q < z
3385 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3386
3387 &[before 3] s <<< x <<< X << q <<< Q < z
3388 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3389
3390
3391 &[before 1] \u24DC < x <<< X << q <<< Q < z
3392 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3393
3394 &[before 2] \u24DC << x <<< X << q <<< Q < z
3395 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3396
3397 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3398 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
3399 */
3400
3401
3402 #if 0
3403 /* requires features not yet supported */
3404 static void TestMoreBefore(void) {
3405 static const struct {
3406 const char* rules;
3407 const char* order[16];
3408 int32_t size;
3409 } tests[] = {
3410 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3411 { "m","M","x","X","q","Q","z","a","n" }, 9},
3412 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3413 { "m","M","x","X","q","Q","a","z","n" }, 9},
3414 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3415 { "m","M","x","X","a","q","Q","z","n" }, 9},
3416 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3417 { "x","X","q","Q","z","m","M","a","n" }, 9},
3418 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3419 { "m","M","x","X","q","Q","a","z","n" }, 9},
3420 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3421 { "m","M","x","X","a","q","Q","z","n" }, 9},
3422 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3423 { "x","X","q","Q","z","n","m","a","M" }, 9},
3424 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3425 { "x","X","q","Q","m","a","M","z","n" }, 9},
3426 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3427 { "m","x","X","a","M","q","Q","z","n" }, 9},
3428 { "&[before 1] s < x <<< X << q <<< Q < z",
3429 { "r","R","x","X","q","Q","z","s","n" }, 9},
3430 { "&[before 2] s << x <<< X << q <<< Q < z",
3431 { "r","R","x","X","q","Q","s","z","n" }, 9},
3432 { "&[before 3] s <<< x <<< X << q <<< Q < z",
3433 { "r","R","x","X","s","q","Q","z","n" }, 9},
3434 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3435 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3436 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3437 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3438 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3439 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3440 };
3441
3442 int32_t i = 0;
3443
3444 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3445 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3446 }
3447 }
3448 #endif
3449
TestTailorNULL(void)3450 static void TestTailorNULL( void ) {
3451 const static char* rule = "&a <<< '\\u0000'";
3452 UErrorCode status = U_ZERO_ERROR;
3453 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3454 uint32_t rlen = 0;
3455 UChar a = 1, null = 0;
3456 UCollationResult res = UCOL_EQUAL;
3457
3458 UCollator *coll = NULL;
3459
3460
3461 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3462 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3463
3464 if(U_FAILURE(status)) {
3465 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3466 } else {
3467 res = ucol_strcoll(coll, &a, 1, &null, 1);
3468
3469 if(res != UCOL_LESS) {
3470 log_err("NULL was not tailored properly!\n");
3471 }
3472 }
3473
3474 ucol_close(coll);
3475 }
3476
3477 static void
TestUpperFirstQuaternary(void)3478 TestUpperFirstQuaternary(void)
3479 {
3480 const char* tests[] = { "B", "b", "Bb", "bB" };
3481 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3482 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3483 genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
3484 }
3485
3486 static void
TestJ4960(void)3487 TestJ4960(void)
3488 {
3489 const char* tests[] = { "\\u00e2T", "aT" };
3490 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3491 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3492 const char* tests2[] = { "a", "A" };
3493 const char* rule = "&[first tertiary ignorable]=A=a";
3494 UColAttribute att2[] = { UCOL_CASE_LEVEL };
3495 UColAttributeValue attVals2[] = { UCOL_ON };
3496 /* Test whether we correctly ignore primary ignorables on case level when */
3497 /* we have only primary & case level */
3498 genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
3499 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3500 /* and case level */
3501 genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
3502 /* Test whether completely ignorable letters have case level info (they shouldn't) */
3503 genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
3504 }
3505
3506 static void
TestJ5223(void)3507 TestJ5223(void)
3508 {
3509 static const char *test = "this is a test string";
3510 UChar ustr[256];
3511 int32_t ustr_length = u_unescape(test, ustr, 256);
3512 unsigned char sortkey[256];
3513 int32_t sortkey_length;
3514 UErrorCode status = U_ZERO_ERROR;
3515 static UCollator *coll = NULL;
3516 coll = ucol_open("root", &status);
3517 if(U_FAILURE(status)) {
3518 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3519 return;
3520 }
3521 ucol_setStrength(coll, UCOL_PRIMARY);
3522 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3523 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3524 if (U_FAILURE(status)) {
3525 log_err("Failed setting atributes\n");
3526 return;
3527 }
3528 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3529 if (sortkey_length > 256) return;
3530
3531 /* we mark the position where the null byte should be written in advance */
3532 sortkey[sortkey_length-1] = 0xAA;
3533
3534 /* we set the buffer size one byte higher than needed */
3535 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3536 sortkey_length+1);
3537
3538 /* no error occurs (for me) */
3539 if (sortkey[sortkey_length-1] == 0xAA) {
3540 log_err("Hit bug at first try\n");
3541 }
3542
3543 /* we mark the position where the null byte should be written again */
3544 sortkey[sortkey_length-1] = 0xAA;
3545
3546 /* this time we set the buffer size to the exact amount needed */
3547 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3548 sortkey_length);
3549
3550 /* now the trailing null byte is not written */
3551 if (sortkey[sortkey_length-1] == 0xAA) {
3552 log_err("Hit bug at second try\n");
3553 }
3554
3555 ucol_close(coll);
3556 }
3557
3558 /* Regression test for Thai partial sort key problem */
3559 static void
TestJ5232(void)3560 TestJ5232(void)
3561 {
3562 const static char *test[] = {
3563 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3564 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3565 };
3566
3567 genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
3568 }
3569
3570 static void
TestJ5367(void)3571 TestJ5367(void)
3572 {
3573 const static char *test[] = { "a", "y" };
3574 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3575 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3576 }
3577
3578 static void
TestVI5913(void)3579 TestVI5913(void)
3580 {
3581 UErrorCode status = U_ZERO_ERROR;
3582 int32_t i, j;
3583 UCollator *coll =NULL;
3584 uint8_t resColl[100], expColl[100];
3585 int32_t rLen, tLen, ruleLen, sLen, kLen;
3586 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/
3587 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
3588 /*
3589 * Note: Just tailoring &z<ae^ does not work as expected:
3590 * The UCA spec requires for discontiguous contractions that they
3591 * extend an *existing match* by one combining mark at a time.
3592 * Therefore, ae must be a contraction so that the builder finds
3593 * discontiguous contractions for ae^, for example with an intervening underdot.
3594 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3595 */
3596 UChar rule3[256]={
3597 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
3598 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
3599 0};
3600 static const UChar tData[][20]={
3601 {0x1EAC, 0},
3602 {0x0041, 0x0323, 0x0302, 0},
3603 {0x1EA0, 0x0302, 0},
3604 {0x00C2, 0x0323, 0},
3605 {0x1ED8, 0}, /* O with dot and circumflex */
3606 {0x1ECC, 0x0302, 0},
3607 {0x1EB7, 0},
3608 {0x1EA1, 0x0306, 0},
3609 };
3610 static const UChar tailorData[][20]={
3611 {0x1FA2, 0}, /* Omega with 3 combining marks */
3612 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3613 {0x1FF3, 0x0313, 0x0300, 0},
3614 {0x1F60, 0x0300, 0x0345, 0},
3615 {0x1F62, 0x0345, 0},
3616 {0x1FA0, 0x0300, 0},
3617 };
3618 static const UChar tailorData2[][20]={
3619 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
3620 {0x0073, 0x0323, 0x030C, 0},
3621 {0x0073, 0x030C, 0x0323, 0},
3622 };
3623 static const UChar tailorData3[][20]={
3624 {0x007a, 0}, /* z */
3625 {0x0061, 0x0065, 0}, /* a + e */
3626 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3627 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
3628 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3629 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
3630 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
3631 {0x00EA, 0}, /* e with circumflex */
3632 };
3633
3634 /* Test Vietnamese sort. */
3635 coll = ucol_open("vi", &status);
3636 if(U_FAILURE(status)) {
3637 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3638 return;
3639 }
3640 log_verbose("\n\nVI collation:");
3641 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3642 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3643 }
3644 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3645 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3646 }
3647 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3648 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3649 }
3650 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3651 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3652 }
3653
3654 for (j=0; j<8; j++) {
3655 tLen = u_strlen(tData[j]);
3656 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3657 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3658 for(i = 0; i<rLen; i++) {
3659 log_verbose(" %02X", resColl[i]);
3660 }
3661 }
3662
3663 ucol_close(coll);
3664
3665 /* Test Romanian sort. */
3666 coll = ucol_open("ro", &status);
3667 log_verbose("\n\nRO collation:");
3668 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3669 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3670 }
3671 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3672 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3673 }
3674 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3675 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3676 }
3677
3678 for (j=4; j<8; j++) {
3679 tLen = u_strlen(tData[j]);
3680 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3681 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3682 for(i = 0; i<rLen; i++) {
3683 log_verbose(" %02X", resColl[i]);
3684 }
3685 }
3686 ucol_close(coll);
3687
3688 /* Test the precomposed Greek character with 3 combining marks. */
3689 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3690 ruleLen = u_strlen(rule);
3691 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3692 if (U_FAILURE(status)) {
3693 log_err("ucol_openRules failed with %s\n", u_errorName(status));
3694 return;
3695 }
3696 sLen = u_strlen(tailorData[0]);
3697 for (j=1; j<6; j++) {
3698 tLen = u_strlen(tailorData[j]);
3699 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
3700 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3701 }
3702 }
3703 /* Test getSortKey. */
3704 tLen = u_strlen(tailorData[0]);
3705 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3706 for (j=0; j<6; j++) {
3707 tLen = u_strlen(tailorData[j]);
3708 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3709 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3710 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3711 for(i = 0; i<rLen; i++) {
3712 log_err(" %02X", resColl[i]);
3713 }
3714 }
3715 }
3716 ucol_close(coll);
3717
3718 log_verbose("\n\nTailoring test for s with caron:");
3719 ruleLen = u_strlen(rule2);
3720 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3721 tLen = u_strlen(tailorData2[0]);
3722 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3723 for (j=1; j<3; j++) {
3724 tLen = u_strlen(tailorData2[j]);
3725 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3726 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3727 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3728 for(i = 0; i<rLen; i++) {
3729 log_err(" %02X", resColl[i]);
3730 }
3731 }
3732 }
3733 ucol_close(coll);
3734
3735 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3736 ruleLen = u_strlen(rule3);
3737 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3738 tLen = u_strlen(tailorData3[3]);
3739 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3740 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3741 for(i = 0; i<kLen; i++) {
3742 log_verbose(" %02X", expColl[i]);
3743 }
3744 for (j=4; j<6; j++) {
3745 tLen = u_strlen(tailorData3[j]);
3746 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3747
3748 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3749 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3750 for(i = 0; i<rLen; i++) {
3751 log_err(" %02X", resColl[i]);
3752 }
3753 }
3754
3755 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3756 for(i = 0; i<rLen; i++) {
3757 log_verbose(" %02X", resColl[i]);
3758 }
3759 }
3760 ucol_close(coll);
3761 }
3762
3763 static void
TestTailor6179(void)3764 TestTailor6179(void)
3765 {
3766 UErrorCode status = U_ZERO_ERROR;
3767 int32_t i;
3768 UCollator *coll =NULL;
3769 uint8_t resColl[100];
3770 int32_t rLen, tLen, ruleLen;
3771 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
3772 static const UChar rule1[]={
3773 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3774 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3775 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3776 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3777 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3778 static const UChar rule2[]={
3779 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3780 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3781 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3782 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3783 0x3C,0x3C,0x20,0x62,0};
3784
3785 static const UChar tData1[][4]={
3786 {0x61, 0},
3787 {0x62, 0},
3788 { 0xFDD0,0x009E, 0}
3789 };
3790 static const UChar tData2[][4]={
3791 {0x61, 0},
3792 {0x62, 0},
3793 { 0xFDD0,0x009E, 0}
3794 };
3795
3796 /*
3797 * These values from FractionalUCA.txt will change,
3798 * and need to be updated here.
3799 * TODO: Make this not check for particular sort keys.
3800 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3801 */
3802 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3803 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3804 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3805 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3806
3807 UParseError parseError;
3808
3809 /* Test [Last Primary ignorable] */
3810
3811 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
3812 ruleLen = u_strlen(rule1);
3813 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3814 if (U_FAILURE(status)) {
3815 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3816 return;
3817 }
3818 tLen = u_strlen(tData1[0]);
3819 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3820 if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3821 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
3822 for(i = 0; i<rLen; i++) {
3823 log_err(" %02X", resColl[i]);
3824 }
3825 log_err("\n");
3826 }
3827 tLen = u_strlen(tData1[1]);
3828 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3829 if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3830 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
3831 for(i = 0; i<rLen; i++) {
3832 log_err(" %02X", resColl[i]);
3833 }
3834 log_err("\n");
3835 }
3836 ucol_close(coll);
3837
3838
3839 /* Test [Last Secondary ignorable] */
3840 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
3841 ruleLen = u_strlen(rule2);
3842 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3843 if (U_FAILURE(status)) {
3844 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3845 log_info(" offset=%d \"%s\" | \"%s\"\n",
3846 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3847 return;
3848 }
3849 tLen = u_strlen(tData2[0]);
3850 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3851 if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3852 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
3853 for(i = 0; i<rLen; i++) {
3854 log_err(" %02X", resColl[i]);
3855 }
3856 log_err("\n");
3857 }
3858 tLen = u_strlen(tData2[1]);
3859 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3860 if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3861 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
3862 for(i = 0; i<rLen; i++) {
3863 log_err(" %02X", resColl[i]);
3864 }
3865 log_err("\n");
3866 }
3867 ucol_close(coll);
3868 }
3869
3870 static void
TestUCAPrecontext(void)3871 TestUCAPrecontext(void)
3872 {
3873 UErrorCode status = U_ZERO_ERROR;
3874 int32_t i, j;
3875 UCollator *coll =NULL;
3876 uint8_t resColl[100], prevColl[100];
3877 int32_t rLen, tLen, ruleLen;
3878 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3879 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3880 /* & l middle-dot << a a is an expansion. */
3881
3882 UChar tData1[][20]={
3883 { 0xb7, 0}, /* standalone middle dot(0xb7) */
3884 { 0x387, 0}, /* standalone middle dot(0x387) */
3885 { 0x61, 0}, /* a */
3886 { 0x6C, 0}, /* l */
3887 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
3888 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
3889 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3890 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
3891 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3892 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
3893 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
3894 };
3895
3896 log_verbose("\n\nEN collation:");
3897 coll = ucol_open("en", &status);
3898 if (U_FAILURE(status)) {
3899 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3900 return;
3901 }
3902 for (j=0; j<11; j++) {
3903 tLen = u_strlen(tData1[j]);
3904 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3905 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3906 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3907 j, tData1[j]);
3908 }
3909 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3910 for(i = 0; i<rLen; i++) {
3911 log_verbose(" %02X", resColl[i]);
3912 }
3913 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3914 }
3915 ucol_close(coll);
3916
3917
3918 log_verbose("\n\nJA collation:");
3919 coll = ucol_open("ja", &status);
3920 if (U_FAILURE(status)) {
3921 log_err("Tailoring test: &z <<a|- failed!");
3922 return;
3923 }
3924 for (j=0; j<11; j++) {
3925 tLen = u_strlen(tData1[j]);
3926 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3927 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3928 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3929 j, tData1[j]);
3930 }
3931 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3932 for(i = 0; i<rLen; i++) {
3933 log_verbose(" %02X", resColl[i]);
3934 }
3935 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3936 }
3937 ucol_close(coll);
3938
3939
3940 log_verbose("\n\nTailoring test: & middle dot < a ");
3941 ruleLen = u_strlen(rule1);
3942 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3943 if (U_FAILURE(status)) {
3944 log_err("Tailoring test: & middle dot < a failed!");
3945 return;
3946 }
3947 for (j=0; j<11; j++) {
3948 tLen = u_strlen(tData1[j]);
3949 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3950 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3951 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3952 j, tData1[j]);
3953 }
3954 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3955 for(i = 0; i<rLen; i++) {
3956 log_verbose(" %02X", resColl[i]);
3957 }
3958 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3959 }
3960 ucol_close(coll);
3961
3962
3963 log_verbose("\n\nTailoring test: & l middle-dot << a ");
3964 ruleLen = u_strlen(rule2);
3965 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3966 if (U_FAILURE(status)) {
3967 log_err("Tailoring test: & l middle-dot << a failed!");
3968 return;
3969 }
3970 for (j=0; j<11; j++) {
3971 tLen = u_strlen(tData1[j]);
3972 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3973 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3974 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3975 j, tData1[j]);
3976 }
3977 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3978 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3979 j, tData1[j]);
3980 }
3981 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3982 for(i = 0; i<rLen; i++) {
3983 log_verbose(" %02X", resColl[i]);
3984 }
3985 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3986 }
3987 ucol_close(coll);
3988 }
3989
3990 static void
TestOutOfBuffer5468(void)3991 TestOutOfBuffer5468(void)
3992 {
3993 static const char *test = "\\u4e00";
3994 UChar ustr[256];
3995 int32_t ustr_length = u_unescape(test, ustr, 256);
3996 unsigned char shortKeyBuf[1];
3997 int32_t sortkey_length;
3998 UErrorCode status = U_ZERO_ERROR;
3999 static UCollator *coll = NULL;
4000
4001 coll = ucol_open("root", &status);
4002 if(U_FAILURE(status)) {
4003 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4004 return;
4005 }
4006 ucol_setStrength(coll, UCOL_PRIMARY);
4007 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4008 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4009 if (U_FAILURE(status)) {
4010 log_err("Failed setting atributes\n");
4011 return;
4012 }
4013
4014 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4015 if (sortkey_length != 4) {
4016 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
4017 }
4018 log_verbose("length of sortKey is %d", sortkey_length);
4019 ucol_close(coll);
4020 }
4021
4022 #define TSKC_DATA_SIZE 5
4023 #define TSKC_BUF_SIZE 50
4024 static void
TestSortKeyConsistency(void)4025 TestSortKeyConsistency(void)
4026 {
4027 UErrorCode icuRC = U_ZERO_ERROR;
4028 UCollator* ucol;
4029 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4030
4031 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4032 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4033 int32_t i, j, i2;
4034
4035 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4036 if (U_FAILURE(icuRC))
4037 {
4038 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4039 return;
4040 }
4041
4042 for (i = 0; i < TSKC_DATA_SIZE; i++)
4043 {
4044 UCharIterator uiter;
4045 uint32_t state[2] = { 0, 0 };
4046 int32_t dataLen = i+1;
4047 for (j=0; j<TSKC_BUF_SIZE; j++)
4048 bufFull[i][j] = bufPart[i][j] = 0;
4049
4050 /* Full sort key */
4051 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4052
4053 /* Partial sort key */
4054 uiter_setString(&uiter, data, dataLen);
4055 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4056 if (U_FAILURE(icuRC))
4057 {
4058 log_err("ucol_nextSortKeyPart failed\n");
4059 ucol_close(ucol);
4060 return;
4061 }
4062
4063 for (i2=0; i2<i; i2++)
4064 {
4065 UBool fullMatch = TRUE;
4066 UBool partMatch = TRUE;
4067 for (j=0; j<TSKC_BUF_SIZE; j++)
4068 {
4069 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4070 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4071 }
4072 if (fullMatch != partMatch) {
4073 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4074 : "partial key was consistent, but full key changed\n");
4075 ucol_close(ucol);
4076 return;
4077 }
4078 }
4079 }
4080
4081 /*=============================================*/
4082 ucol_close(ucol);
4083 }
4084
4085 /* ticket: 6101 */
TestCroatianSortKey(void)4086 static void TestCroatianSortKey(void) {
4087 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4088 UErrorCode status = U_ZERO_ERROR;
4089 UCollator *ucol;
4090 UCharIterator iter;
4091
4092 static const UChar text[] = { 0x0044, 0xD81A };
4093
4094 size_t length = UPRV_LENGTHOF(text);
4095
4096 uint8_t textSortKey[32];
4097 size_t lenSortKey = 32;
4098 size_t actualSortKeyLen;
4099 uint32_t uStateInfo[2] = { 0, 0 };
4100
4101 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4102 if (U_FAILURE(status)) {
4103 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4104 return;
4105 }
4106
4107 uiter_setString(&iter, text, (int32_t)length);
4108
4109 actualSortKeyLen = ucol_nextSortKeyPart(
4110 ucol, &iter, (uint32_t*)uStateInfo,
4111 textSortKey, (int32_t)lenSortKey, &status
4112 );
4113
4114 if (actualSortKeyLen == lenSortKey) {
4115 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4116 }
4117
4118 ucol_close(ucol);
4119 }
4120
4121 /* ticket: 6140 */
4122 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4123 * they are both Hiragana and Katakana
4124 */
4125 #define SORTKEYLEN 50
TestHiragana(void)4126 static void TestHiragana(void) {
4127 UErrorCode status = U_ZERO_ERROR;
4128 UCollator* ucol;
4129 UCollationResult strcollresult;
4130 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4131 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4132 int32_t data1Len = UPRV_LENGTHOF(data1);
4133 int32_t data2Len = UPRV_LENGTHOF(data2);
4134 int32_t i, j;
4135 uint8_t sortKey1[SORTKEYLEN];
4136 uint8_t sortKey2[SORTKEYLEN];
4137
4138 UCharIterator uiter1;
4139 UCharIterator uiter2;
4140 uint32_t state1[2] = { 0, 0 };
4141 uint32_t state2[2] = { 0, 0 };
4142 int32_t keySize1;
4143 int32_t keySize2;
4144
4145 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4146 &status);
4147 if (U_FAILURE(status)) {
4148 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4149 return;
4150 }
4151
4152 /* Start of full sort keys */
4153 /* Full sort key1 */
4154 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4155 /* Full sort key2 */
4156 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4157 if (keySize1 == keySize2) {
4158 for (i = 0; i < keySize1; i++) {
4159 if (sortKey1[i] != sortKey2[i]) {
4160 log_err("Full sort keys are different. Should be equal.");
4161 }
4162 }
4163 } else {
4164 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4165 }
4166 /* End of full sort keys */
4167
4168 /* Start of partial sort keys */
4169 /* Partial sort key1 */
4170 uiter_setString(&uiter1, data1, data1Len);
4171 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4172 /* Partial sort key2 */
4173 uiter_setString(&uiter2, data2, data2Len);
4174 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4175 if (U_SUCCESS(status) && keySize1 == keySize2) {
4176 for (j = 0; j < keySize1; j++) {
4177 if (sortKey1[j] != sortKey2[j]) {
4178 log_err("Partial sort keys are different. Should be equal");
4179 }
4180 }
4181 } else {
4182 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4183 }
4184 /* End of partial sort keys */
4185
4186 /* Start of strcoll */
4187 /* Use ucol_strcoll() to determine ordering */
4188 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4189 if (strcollresult != UCOL_EQUAL) {
4190 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4191 }
4192
4193 ucol_close(ucol);
4194 }
4195
4196 /* Convenient struct for running collation tests */
4197 typedef struct {
4198 const UChar source[MAX_TOKEN_LEN]; /* String on left */
4199 const UChar target[MAX_TOKEN_LEN]; /* String on right */
4200 UCollationResult result; /* -1, 0 or +1, depending on collation */
4201 } OneTestCase;
4202
4203 /*
4204 * Utility function to test one collation test case.
4205 * @param testcases Array of test cases.
4206 * @param n_testcases Size of the array testcases.
4207 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
4208 * @param n_rules Size of the array str_rules.
4209 */
doTestOneTestCase(const OneTestCase testcases[],int n_testcases,const char * str_rules[],int n_rules)4210 static void doTestOneTestCase(const OneTestCase testcases[],
4211 int n_testcases,
4212 const char* str_rules[],
4213 int n_rules)
4214 {
4215 int rule_no, testcase_no;
4216 UChar rule[500];
4217 int32_t length = 0;
4218 UErrorCode status = U_ZERO_ERROR;
4219 UParseError parse_error;
4220 UCollator *myCollation;
4221
4222 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4223
4224 length = u_unescape(str_rules[rule_no], rule, 500);
4225 if (length == 0) {
4226 log_err("ERROR: The rule cannot be unescaped: %s\n");
4227 return;
4228 }
4229 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4230 if(U_FAILURE(status)){
4231 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4232 log_info(" offset=%d \"%s\" | \"%s\"\n",
4233 parse_error.offset,
4234 aescstrdup(parse_error.preContext, -1),
4235 aescstrdup(parse_error.postContext, -1));
4236 return;
4237 }
4238 log_verbose("Testing the <<* syntax\n");
4239 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4240 ucol_setStrength(myCollation, UCOL_TERTIARY);
4241 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4242 doTest(myCollation,
4243 testcases[testcase_no].source,
4244 testcases[testcase_no].target,
4245 testcases[testcase_no].result
4246 );
4247 }
4248 ucol_close(myCollation);
4249 }
4250 }
4251
4252 const static OneTestCase rangeTestcases[] = {
4253 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */
4254 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */
4255 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */
4256
4257 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */
4258 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */
4259 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */
4260 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */
4261 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */
4262
4263 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */
4264 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */
4265 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */
4266 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */
4267
4268 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */
4269 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */
4270 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */
4271 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */
4272 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */
4273 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */
4274 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */
4275 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */
4276 };
4277
4278 static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
4279
4280 const static OneTestCase rangeTestcasesSupplemental[] = {
4281 { {0x4e00}, {0xfffb}, UCOL_LESS }, /* U+4E00 < U+FFFB */
4282 { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFB < U+10000 */
4283 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
4284 { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+4E00 < U+10001 */
4285 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
4286 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
4287 { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+4E00 < U+10001 */
4288 };
4289
4290 static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
4291
4292 const static OneTestCase rangeTestcasesQwerty[] = {
4293 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */
4294 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */
4295
4296 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */
4297 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */
4298
4299 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */
4300 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */
4301
4302 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */
4303 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */
4304
4305 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4306 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */
4307 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4308 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */
4309 };
4310
4311 static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
4312
TestSameStrengthList(void)4313 static void TestSameStrengthList(void)
4314 {
4315 const char* strRules[] = {
4316 /* Normal */
4317 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
4318
4319 /* Lists */
4320 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4321 };
4322 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4323 }
4324
TestSameStrengthListQuoted(void)4325 static void TestSameStrengthListQuoted(void)
4326 {
4327 const char* strRules[] = {
4328 /* Lists with quoted characters */
4329 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4330 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4331
4332 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4333 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4334
4335 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
4336 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4337 };
4338 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4339 }
4340
TestSameStrengthListSupplemental(void)4341 static void TestSameStrengthListSupplemental(void)
4342 {
4343 const char* strRules[] = {
4344 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4345 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4346 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4347 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4348 };
4349 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4350 }
4351
TestSameStrengthListQwerty(void)4352 static void TestSameStrengthListQwerty(void)
4353 {
4354 const char* strRules[] = {
4355 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4356 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4357 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4358 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4359 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4360
4361 /* Quoted characters also will work if two quoted characters are not consecutive. */
4362 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4363
4364 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4365 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4366
4367 };
4368 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4369 }
4370
TestSameStrengthListQuotedQwerty(void)4371 static void TestSameStrengthListQuotedQwerty(void)
4372 {
4373 const char* strRules[] = {
4374 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4375 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4376 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
4377
4378 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4379 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4380 };
4381 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4382 }
4383
TestSameStrengthListRanges(void)4384 static void TestSameStrengthListRanges(void)
4385 {
4386 const char* strRules[] = {
4387 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4388 };
4389 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4390 }
4391
TestSameStrengthListSupplementalRanges(void)4392 static void TestSameStrengthListSupplementalRanges(void)
4393 {
4394 const char* strRules[] = {
4395 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4396 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4397 };
4398 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4399 }
4400
TestSpecialCharacters(void)4401 static void TestSpecialCharacters(void)
4402 {
4403 const char* strRules[] = {
4404 /* Normal */
4405 "&';'<'+'<','<'-'<'&'<'*'",
4406
4407 /* List */
4408 "&';'<*'+,-&*'",
4409
4410 /* Range */
4411 "&';'<*'+'-'-&*'",
4412 };
4413
4414 const static OneTestCase specialCharacterStrings[] = {
4415 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */
4416 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */
4417 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */
4418 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */
4419 };
4420 doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4421 }
4422
TestPrivateUseCharacters(void)4423 static void TestPrivateUseCharacters(void)
4424 {
4425 const char* strRules[] = {
4426 /* Normal */
4427 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4428 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4429 };
4430
4431 const static OneTestCase privateUseCharacterStrings[] = {
4432 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4433 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4434 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4435 { {0xe2da}, {0xe2db}, UCOL_LESS },
4436 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4437 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4438 };
4439 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4440 }
4441
TestPrivateUseCharactersInList(void)4442 static void TestPrivateUseCharactersInList(void)
4443 {
4444 const char* strRules[] = {
4445 /* List */
4446 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4447 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4448 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4449 };
4450
4451 const static OneTestCase privateUseCharacterStrings[] = {
4452 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4453 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4454 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4455 { {0xe2da}, {0xe2db}, UCOL_LESS },
4456 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4457 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4458 };
4459 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4460 }
4461
TestPrivateUseCharactersInRange(void)4462 static void TestPrivateUseCharactersInRange(void)
4463 {
4464 const char* strRules[] = {
4465 /* Range */
4466 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4467 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4468 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4469 };
4470
4471 const static OneTestCase privateUseCharacterStrings[] = {
4472 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4473 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4474 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4475 { {0xe2da}, {0xe2db}, UCOL_LESS },
4476 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4477 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4478 };
4479 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4480 }
4481
TestInvalidListsAndRanges(void)4482 static void TestInvalidListsAndRanges(void)
4483 {
4484 const char* invalidRules[] = {
4485 /* Range not in starred expression */
4486 "&\\ufffe<\\uffff-\\U00010002",
4487
4488 /* Range without start */
4489 "&a<*-c",
4490
4491 /* Range without end */
4492 "&a<*b-",
4493
4494 /* More than one hyphen */
4495 "&a<*b-g-l",
4496
4497 /* Range in the wrong order */
4498 "&a<*k-b",
4499
4500 };
4501
4502 UChar rule[500];
4503 UErrorCode status = U_ZERO_ERROR;
4504 UParseError parse_error;
4505 int n_rules = UPRV_LENGTHOF(invalidRules);
4506 int rule_no;
4507 int length;
4508 UCollator *myCollation;
4509
4510 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4511
4512 length = u_unescape(invalidRules[rule_no], rule, 500);
4513 if (length == 0) {
4514 log_err("ERROR: The rule cannot be unescaped: %s\n");
4515 return;
4516 }
4517 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4518 (void)myCollation; /* Suppress set but not used warning. */
4519 if(!U_FAILURE(status)){
4520 log_err("ERROR: Could not cause a failure as expected: \n");
4521 }
4522 status = U_ZERO_ERROR;
4523 }
4524 }
4525
4526 /*
4527 * This test ensures that characters placed before a character in a different script have the same lead byte
4528 * in their collation key before and after script reordering.
4529 */
TestBeforeRuleWithScriptReordering(void)4530 static void TestBeforeRuleWithScriptReordering(void)
4531 {
4532 UParseError error;
4533 UErrorCode status = U_ZERO_ERROR;
4534 UCollator *myCollation;
4535 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4536 UChar rules[500];
4537 uint32_t rulesLength = 0;
4538 int32_t reorderCodes[1] = {USCRIPT_GREEK};
4539 UCollationResult collResult;
4540
4541 uint8_t baseKey[256];
4542 uint32_t baseKeyLength;
4543 uint8_t beforeKey[256];
4544 uint32_t beforeKeyLength;
4545
4546 UChar base[] = { 0x03b1 }; /* base */
4547 int32_t baseLen = UPRV_LENGTHOF(base);
4548
4549 UChar before[] = { 0x0e01 }; /* ko kai */
4550 int32_t beforeLen = UPRV_LENGTHOF(before);
4551
4552 /*UChar *data[] = { before, base };
4553 genericRulesStarter(srules, data, 2);*/
4554
4555 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4556
4557 (void)beforeKeyLength; /* Suppress set but not used warnings. */
4558 (void)baseKeyLength;
4559
4560 /* build collator */
4561 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4562
4563 rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
4564 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4565 if(U_FAILURE(status)) {
4566 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4567 return;
4568 }
4569
4570 /* check collation results - before rule applied but not script reordering */
4571 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4572 if (collResult != UCOL_GREATER) {
4573 log_err("Collation result not correct before script reordering = %d\n", collResult);
4574 }
4575
4576 /* check the lead byte of the collation keys before script reordering */
4577 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4578 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4579 if (baseKey[0] != beforeKey[0]) {
4580 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4581 }
4582
4583 /* reorder the scripts */
4584 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4585 if(U_FAILURE(status)) {
4586 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4587 return;
4588 }
4589
4590 /* check collation results - before rule applied and after script reordering */
4591 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4592 if (collResult != UCOL_GREATER) {
4593 log_err("Collation result not correct after script reordering = %d\n", collResult);
4594 }
4595
4596 /* check the lead byte of the collation keys after script reordering */
4597 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4598 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4599 if (baseKey[0] != beforeKey[0]) {
4600 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4601 }
4602
4603 ucol_close(myCollation);
4604 }
4605
4606 /*
4607 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4608 */
TestNonLeadBytesDuringCollationReordering(void)4609 static void TestNonLeadBytesDuringCollationReordering(void)
4610 {
4611 UErrorCode status = U_ZERO_ERROR;
4612 UCollator *myCollation;
4613 int32_t reorderCodes[1] = {USCRIPT_GREEK};
4614
4615 uint8_t baseKey[256];
4616 uint32_t baseKeyLength;
4617 uint8_t reorderKey[256];
4618 uint32_t reorderKeyLength;
4619
4620 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4621
4622 uint32_t i;
4623
4624
4625 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4626
4627 /* build collator tertiary */
4628 myCollation = ucol_open("", &status);
4629 ucol_setStrength(myCollation, UCOL_TERTIARY);
4630 if(U_FAILURE(status)) {
4631 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4632 return;
4633 }
4634 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4635
4636 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4637 if(U_FAILURE(status)) {
4638 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4639 return;
4640 }
4641 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4642
4643 if (baseKeyLength != reorderKeyLength) {
4644 log_err("Key lengths not the same during reordering.\n");
4645 return;
4646 }
4647
4648 for (i = 1; i < baseKeyLength; i++) {
4649 if (baseKey[i] != reorderKey[i]) {
4650 log_err("Collation key bytes not the same at position %d.\n", i);
4651 return;
4652 }
4653 }
4654 ucol_close(myCollation);
4655
4656 /* build collator quaternary */
4657 myCollation = ucol_open("", &status);
4658 ucol_setStrength(myCollation, UCOL_QUATERNARY);
4659 if(U_FAILURE(status)) {
4660 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4661 return;
4662 }
4663 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4664
4665 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4666 if(U_FAILURE(status)) {
4667 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4668 return;
4669 }
4670 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4671
4672 if (baseKeyLength != reorderKeyLength) {
4673 log_err("Key lengths not the same during reordering.\n");
4674 return;
4675 }
4676
4677 for (i = 1; i < baseKeyLength; i++) {
4678 if (baseKey[i] != reorderKey[i]) {
4679 log_err("Collation key bytes not the same at position %d.\n", i);
4680 return;
4681 }
4682 }
4683 ucol_close(myCollation);
4684 }
4685
4686 /*
4687 * Test reordering API.
4688 */
TestReorderingAPI(void)4689 static void TestReorderingAPI(void)
4690 {
4691 UErrorCode status = U_ZERO_ERROR;
4692 UCollator *myCollation;
4693 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4694 int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4695 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4696 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4697 UCollationResult collResult;
4698 int32_t retrievedReorderCodesLength;
4699 int32_t retrievedReorderCodes[10];
4700 UChar greekString[] = { 0x03b1 };
4701 UChar punctuationString[] = { 0x203e };
4702 int loopIndex;
4703
4704 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4705
4706 /* build collator tertiary */
4707 myCollation = ucol_open("", &status);
4708 ucol_setStrength(myCollation, UCOL_TERTIARY);
4709 if(U_FAILURE(status)) {
4710 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4711 return;
4712 }
4713
4714 /* set the reorderding */
4715 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4716 if (U_FAILURE(status)) {
4717 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4718 return;
4719 }
4720
4721 /* get the reordering */
4722 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4723 if (status != U_BUFFER_OVERFLOW_ERROR) {
4724 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4725 return;
4726 }
4727 status = U_ZERO_ERROR;
4728 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4729 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4730 return;
4731 }
4732 /* now let's really get it */
4733 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4734 if (U_FAILURE(status)) {
4735 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4736 return;
4737 }
4738 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4739 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4740 return;
4741 }
4742 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4743 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4744 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4745 return;
4746 }
4747 }
4748 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4749 if (collResult != UCOL_LESS) {
4750 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4751 return;
4752 }
4753
4754 /* clear the reordering */
4755 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4756 if (U_FAILURE(status)) {
4757 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4758 return;
4759 }
4760
4761 /* get the reordering again */
4762 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4763 if (retrievedReorderCodesLength != 0) {
4764 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4765 return;
4766 }
4767
4768 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4769 if (collResult != UCOL_GREATER) {
4770 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4771 return;
4772 }
4773
4774 /* clear the reordering using [NONE] */
4775 ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4776 if (U_FAILURE(status)) {
4777 log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4778 return;
4779 }
4780
4781 /* get the reordering again */
4782 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4783 if (retrievedReorderCodesLength != 0) {
4784 log_err_status(status,
4785 "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4786 retrievedReorderCodesLength);
4787 return;
4788 }
4789
4790 /* test for error condition on duplicate reorder codes */
4791 ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4792 if (!U_FAILURE(status)) {
4793 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4794 return;
4795 }
4796
4797 status = U_ZERO_ERROR;
4798 /* test for reorder codes after a reset code */
4799 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4800 if (!U_FAILURE(status)) {
4801 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4802 return;
4803 }
4804
4805 ucol_close(myCollation);
4806 }
4807
4808 /*
4809 * Test reordering API.
4810 */
TestReorderingAPIWithRuleCreatedCollator(void)4811 static void TestReorderingAPIWithRuleCreatedCollator(void)
4812 {
4813 UErrorCode status = U_ZERO_ERROR;
4814 UCollator *myCollation;
4815 UChar rules[90];
4816 static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4817 static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4818 static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4819 UCollationResult collResult;
4820 int32_t retrievedReorderCodesLength;
4821 int32_t retrievedReorderCodes[10];
4822 static const UChar greekString[] = { 0x03b1 };
4823 static const UChar punctuationString[] = { 0x203e };
4824 static const UChar hanString[] = { 0x65E5, 0x672C };
4825 int loopIndex;
4826
4827 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4828
4829 /* build collator from rules */
4830 u_uastrcpy(rules, "[reorder Hani Grek]");
4831 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4832 if(U_FAILURE(status)) {
4833 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4834 return;
4835 }
4836
4837 /* get the reordering */
4838 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4839 if (U_FAILURE(status)) {
4840 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4841 return;
4842 }
4843 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4844 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4845 return;
4846 }
4847 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4848 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4849 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4850 return;
4851 }
4852 }
4853 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4854 if (collResult != UCOL_GREATER) {
4855 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4856 return;
4857 }
4858
4859 /* set the reordering */
4860 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4861 if (U_FAILURE(status)) {
4862 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4863 return;
4864 }
4865
4866 /* get the reordering */
4867 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4868 if (status != U_BUFFER_OVERFLOW_ERROR) {
4869 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4870 return;
4871 }
4872 status = U_ZERO_ERROR;
4873 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4874 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4875 return;
4876 }
4877 /* now let's really get it */
4878 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4879 if (U_FAILURE(status)) {
4880 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4881 return;
4882 }
4883 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4884 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4885 return;
4886 }
4887 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4888 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4889 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4890 return;
4891 }
4892 }
4893 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4894 if (collResult != UCOL_LESS) {
4895 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4896 return;
4897 }
4898
4899 /* clear the reordering */
4900 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4901 if (U_FAILURE(status)) {
4902 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4903 return;
4904 }
4905
4906 /* get the reordering again */
4907 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4908 if (retrievedReorderCodesLength != 0) {
4909 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4910 return;
4911 }
4912
4913 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4914 if (collResult != UCOL_GREATER) {
4915 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4916 return;
4917 }
4918
4919 /* reset the reordering */
4920 ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4921 if (U_FAILURE(status)) {
4922 log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4923 return;
4924 }
4925 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4926 if (U_FAILURE(status)) {
4927 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4928 return;
4929 }
4930 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4931 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4932 return;
4933 }
4934 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4935 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4936 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4937 return;
4938 }
4939 }
4940
4941 ucol_close(myCollation);
4942 }
4943
containsExpectedScript(const int32_t scripts[],int32_t length,int32_t expectedScript)4944 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4945 int32_t i;
4946 for (i = 0; i < length; ++i) {
4947 if (expectedScript == scripts[i]) { return TRUE; }
4948 }
4949 return FALSE;
4950 }
4951
TestEquivalentReorderingScripts(void)4952 static void TestEquivalentReorderingScripts(void) {
4953 // Beginning with ICU 55, collation reordering moves single scripts
4954 // rather than groups of scripts,
4955 // except where scripts share a range and sort primary-equal.
4956 UErrorCode status = U_ZERO_ERROR;
4957 int32_t equivalentScripts[100];
4958 int32_t length;
4959 int i;
4960 int32_t prevScript;
4961 /* These scripts are expected to be equivalent. */
4962 static const int32_t expectedScripts[] = {
4963 USCRIPT_HIRAGANA,
4964 USCRIPT_KATAKANA,
4965 USCRIPT_KATAKANA_OR_HIRAGANA
4966 };
4967
4968 equivalentScripts[0] = 0;
4969 length = ucol_getEquivalentReorderCodes(
4970 USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4971 if (U_FAILURE(status)) {
4972 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4973 return;
4974 }
4975 if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4976 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4977 "length expected 1, was = %d; expected [%d] was [%d]\n",
4978 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4979 }
4980
4981 length = ucol_getEquivalentReorderCodes(
4982 USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4983 if (U_FAILURE(status)) {
4984 log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4985 return;
4986 }
4987 if (length != UPRV_LENGTHOF(expectedScripts)) {
4988 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4989 "expected %d, was = %d\n",
4990 UPRV_LENGTHOF(expectedScripts), length);
4991 }
4992 prevScript = -1;
4993 for (i = 0; i < length; ++i) {
4994 int32_t script = equivalentScripts[i];
4995 if (script <= prevScript) {
4996 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4997 }
4998 prevScript = script;
4999 }
5000 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5001 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5002 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5003 expectedScripts[i]);
5004 }
5005 }
5006
5007 length = ucol_getEquivalentReorderCodes(
5008 USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5009 if (U_FAILURE(status)) {
5010 log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5011 return;
5012 }
5013 if (length != UPRV_LENGTHOF(expectedScripts)) {
5014 log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5015 "expected %d, was = %d\n",
5016 UPRV_LENGTHOF(expectedScripts), length);
5017 }
5018 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5019 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5020 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5021 expectedScripts[i]);
5022 }
5023 }
5024
5025 length = ucol_getEquivalentReorderCodes(
5026 USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5027 if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
5028 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5029 "expected %d, was = %d\n",
5030 UPRV_LENGTHOF(expectedScripts), length);
5031 }
5032
5033 length = ucol_getEquivalentReorderCodes(
5034 USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5035 if (U_FAILURE(status) || length != 3) {
5036 log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5037 "expected 3, was = %d\n", length);
5038 }
5039 length = ucol_getEquivalentReorderCodes(
5040 USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5041 if (U_FAILURE(status) || length != 3) {
5042 log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5043 "expected 3, was = %d\n", length);
5044 }
5045 length = ucol_getEquivalentReorderCodes(
5046 USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5047 if (U_FAILURE(status) || length != 3) {
5048 log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5049 "expected 3, was = %d\n", length);
5050 }
5051
5052 length = ucol_getEquivalentReorderCodes(
5053 USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5054 if (U_FAILURE(status) || length != 2) {
5055 log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5056 "expected 2, was = %d\n", length);
5057 }
5058 length = ucol_getEquivalentReorderCodes(
5059 USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5060 if (U_FAILURE(status) || length != 2) {
5061 log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5062 "expected 2, was = %d\n", length);
5063 }
5064 }
5065
TestReorderingAcrossCloning(void)5066 static void TestReorderingAcrossCloning(void)
5067 {
5068 UErrorCode status = U_ZERO_ERROR;
5069 UCollator *myCollation;
5070 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5071 UCollator *clonedCollation;
5072 int32_t retrievedReorderCodesLength;
5073 int32_t retrievedReorderCodes[10];
5074 int loopIndex;
5075
5076 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5077
5078 /* build collator tertiary */
5079 myCollation = ucol_open("", &status);
5080 ucol_setStrength(myCollation, UCOL_TERTIARY);
5081 if(U_FAILURE(status)) {
5082 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5083 return;
5084 }
5085
5086 /* set the reorderding */
5087 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5088 if (U_FAILURE(status)) {
5089 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5090 return;
5091 }
5092
5093 /* clone the collator */
5094 clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5095 if (U_FAILURE(status)) {
5096 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5097 return;
5098 }
5099
5100 /* get the reordering */
5101 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
5102 if (U_FAILURE(status)) {
5103 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5104 return;
5105 }
5106 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5107 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
5108 return;
5109 }
5110 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5111 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5112 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5113 return;
5114 }
5115 }
5116
5117 /*uprv_free(buffer);*/
5118 ucol_close(myCollation);
5119 ucol_close(clonedCollation);
5120 }
5121
5122 /*
5123 * Utility function to test one collation reordering test case set.
5124 * @param testcases Array of test cases.
5125 * @param n_testcases Size of the array testcases.
5126 * @param reorderTokens Array of reordering codes.
5127 * @param reorderTokensLen Size of the array reorderTokens.
5128 */
doTestOneReorderingAPITestCase(const OneTestCase testCases[],uint32_t testCasesLen,const int32_t reorderTokens[],int32_t reorderTokensLen)5129 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5130 {
5131 uint32_t testCaseNum;
5132 UErrorCode status = U_ZERO_ERROR;
5133 UCollator *myCollation;
5134
5135 myCollation = ucol_open("", &status);
5136 if (U_FAILURE(status)) {
5137 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5138 return;
5139 }
5140 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5141 if(U_FAILURE(status)) {
5142 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5143 return;
5144 }
5145
5146 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5147 doTest(myCollation,
5148 testCases[testCaseNum].source,
5149 testCases[testCaseNum].target,
5150 testCases[testCaseNum].result
5151 );
5152 }
5153 ucol_close(myCollation);
5154 }
5155
TestGreekFirstReorder(void)5156 static void TestGreekFirstReorder(void)
5157 {
5158 const char* strRules[] = {
5159 "[reorder Grek]"
5160 };
5161
5162 const int32_t apiRules[] = {
5163 USCRIPT_GREEK
5164 };
5165
5166 const static OneTestCase privateUseCharacterStrings[] = {
5167 { {0x0391}, {0x0391}, UCOL_EQUAL },
5168 { {0x0041}, {0x0391}, UCOL_GREATER },
5169 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5170 { {0x0060}, {0x0391}, UCOL_LESS },
5171 { {0x0391}, {0xe2dc}, UCOL_LESS },
5172 { {0x0391}, {0x0060}, UCOL_GREATER },
5173 };
5174
5175 /* Test rules creation */
5176 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5177
5178 /* Test collation reordering API */
5179 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5180 }
5181
TestGreekLastReorder(void)5182 static void TestGreekLastReorder(void)
5183 {
5184 const char* strRules[] = {
5185 "[reorder Zzzz Grek]"
5186 };
5187
5188 const int32_t apiRules[] = {
5189 USCRIPT_UNKNOWN, USCRIPT_GREEK
5190 };
5191
5192 const static OneTestCase privateUseCharacterStrings[] = {
5193 { {0x0391}, {0x0391}, UCOL_EQUAL },
5194 { {0x0041}, {0x0391}, UCOL_LESS },
5195 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5196 { {0x0060}, {0x0391}, UCOL_LESS },
5197 { {0x0391}, {0xe2dc}, UCOL_GREATER },
5198 };
5199
5200 /* Test rules creation */
5201 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5202
5203 /* Test collation reordering API */
5204 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5205 }
5206
TestNonScriptReorder(void)5207 static void TestNonScriptReorder(void)
5208 {
5209 const char* strRules[] = {
5210 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5211 };
5212
5213 const int32_t apiRules[] = {
5214 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5215 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5216 UCOL_REORDER_CODE_CURRENCY
5217 };
5218
5219 const static OneTestCase privateUseCharacterStrings[] = {
5220 { {0x0391}, {0x0041}, UCOL_LESS },
5221 { {0x0041}, {0x0391}, UCOL_GREATER },
5222 { {0x0060}, {0x0041}, UCOL_LESS },
5223 { {0x0060}, {0x0391}, UCOL_GREATER },
5224 { {0x0024}, {0x0041}, UCOL_GREATER },
5225 };
5226
5227 /* Test rules creation */
5228 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5229
5230 /* Test collation reordering API */
5231 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5232 }
5233
TestHaniReorder(void)5234 static void TestHaniReorder(void)
5235 {
5236 const char* strRules[] = {
5237 "[reorder Hani]"
5238 };
5239 const int32_t apiRules[] = {
5240 USCRIPT_HAN
5241 };
5242
5243 const static OneTestCase privateUseCharacterStrings[] = {
5244 { {0x4e00}, {0x0041}, UCOL_LESS },
5245 { {0x4e00}, {0x0060}, UCOL_GREATER },
5246 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5247 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5248 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5249 { {0xfa27}, {0x0041}, UCOL_LESS },
5250 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5251 };
5252
5253 /* Test rules creation */
5254 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5255
5256 /* Test collation reordering API */
5257 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5258 }
5259
TestHaniReorderWithOtherRules(void)5260 static void TestHaniReorderWithOtherRules(void)
5261 {
5262 const char* strRules[] = {
5263 "[reorder Hani] &b<a"
5264 };
5265 /*const int32_t apiRules[] = {
5266 USCRIPT_HAN
5267 };*/
5268
5269 const static OneTestCase privateUseCharacterStrings[] = {
5270 { {0x4e00}, {0x0041}, UCOL_LESS },
5271 { {0x4e00}, {0x0060}, UCOL_GREATER },
5272 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5273 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5274 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5275 { {0xfa27}, {0x0041}, UCOL_LESS },
5276 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5277 { {0x0062}, {0x0061}, UCOL_LESS },
5278 };
5279
5280 /* Test rules creation */
5281 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5282 }
5283
TestMultipleReorder(void)5284 static void TestMultipleReorder(void)
5285 {
5286 const char* strRules[] = {
5287 "[reorder Grek Zzzz DIGIT Latn Hani]"
5288 };
5289
5290 const int32_t apiRules[] = {
5291 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5292 };
5293
5294 const static OneTestCase collationTestCases[] = {
5295 { {0x0391}, {0x0041}, UCOL_LESS},
5296 { {0x0031}, {0x0041}, UCOL_LESS},
5297 { {0x0041}, {0x4e00}, UCOL_LESS},
5298 };
5299
5300 /* Test rules creation */
5301 doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
5302
5303 /* Test collation reordering API */
5304 doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
5305 }
5306
5307 /*
5308 * Test that covers issue reported in ticket 8814
5309 */
TestReorderWithNumericCollation(void)5310 static void TestReorderWithNumericCollation(void)
5311 {
5312 UErrorCode status = U_ZERO_ERROR;
5313 UCollator *myCollation;
5314 UCollator *myReorderCollation;
5315 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5316 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5317 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5318 UChar fortyS[] = { 0x0053 };
5319 UChar fortyThreeP[] = { 0x0050 };
5320 uint8_t fortyS_sortKey[128];
5321 int32_t fortyS_sortKey_Length;
5322 uint8_t fortyThreeP_sortKey[128];
5323 int32_t fortyThreeP_sortKey_Length;
5324 uint8_t fortyS_sortKey_reorder[128];
5325 int32_t fortyS_sortKey_reorder_Length;
5326 uint8_t fortyThreeP_sortKey_reorder[128];
5327 int32_t fortyThreeP_sortKey_reorder_Length;
5328 UCollationResult collResult;
5329 UCollationResult collResultReorder;
5330
5331 log_verbose("Testing reordering with and without numeric collation\n");
5332
5333 /* build collator tertiary with numeric */
5334 myCollation = ucol_open("", &status);
5335 /*
5336 ucol_setStrength(myCollation, UCOL_TERTIARY);
5337 */
5338 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5339 if(U_FAILURE(status)) {
5340 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5341 return;
5342 }
5343
5344 /* build collator tertiary with numeric and reordering */
5345 myReorderCollation = ucol_open("", &status);
5346 /*
5347 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5348 */
5349 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5350 ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5351 if(U_FAILURE(status)) {
5352 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5353 return;
5354 }
5355
5356 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5357 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5358 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5359 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5360
5361 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5362 log_err_status(status, "ERROR: couldn't generate sort keys\n");
5363 return;
5364 }
5365 collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5366 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5367 /*
5368 fprintf(stderr, "\tcollResult = %x\n", collResult);
5369 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5370 fprintf(stderr, "\nfortyS\n");
5371 for (i = 0; i < fortyS_sortKey_Length; i++) {
5372 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5373 }
5374 fprintf(stderr, "\nfortyThreeP\n");
5375 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5376 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5377 }
5378 */
5379 if (collResult != collResultReorder) {
5380 log_err_status(status, "ERROR: collation results should have been the same.\n");
5381 return;
5382 }
5383
5384 ucol_close(myCollation);
5385 ucol_close(myReorderCollation);
5386 }
5387
compare_uint8_t_arrays(const uint8_t * a,const uint8_t * b)5388 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5389 {
5390 for (; *a == *b; ++a, ++b) {
5391 if (*a == 0) {
5392 return 0;
5393 }
5394 }
5395 return (*a < *b ? -1 : 1);
5396 }
5397
TestImportRulesDeWithPhonebook(void)5398 static void TestImportRulesDeWithPhonebook(void)
5399 {
5400 const char* normalRules[] = {
5401 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5402 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5403 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5404 };
5405 const OneTestCase normalTests[] = {
5406 { {0x00e6}, {0x00c6}, UCOL_LESS},
5407 { {0x00fc}, {0x00dc}, UCOL_GREATER},
5408 };
5409
5410 const char* importRules[] = {
5411 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5412 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5413 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5414 };
5415 const OneTestCase importTests[] = {
5416 { {0x00e6}, {0x00c6}, UCOL_LESS},
5417 { {0x00fc}, {0x00dc}, UCOL_LESS},
5418 };
5419
5420 doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5421 doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
5422 }
5423
5424 #if 0
5425 static void TestImportRulesFiWithEor(void)
5426 {
5427 /* DUCET. */
5428 const char* defaultRules[] = {
5429 "&a<b", /* Dummy rule. */
5430 };
5431
5432 const OneTestCase defaultTests[] = {
5433 { {0x0110}, {0x00F0}, UCOL_LESS},
5434 { {0x00a3}, {0x00a5}, UCOL_LESS},
5435 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5436 };
5437
5438 /* European Ordering rules: ignore currency characters. */
5439 const char* eorRules[] = {
5440 "[import root-u-co-eor]",
5441 };
5442
5443 const OneTestCase eorTests[] = {
5444 { {0x0110}, {0x00F0}, UCOL_LESS},
5445 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5446 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5447 };
5448
5449 const char* fiStdRules[] = {
5450 "[import fi-u-co-standard]",
5451 };
5452
5453 const OneTestCase fiStdTests[] = {
5454 { {0x0110}, {0x00F0}, UCOL_GREATER},
5455 { {0x00a3}, {0x00a5}, UCOL_LESS},
5456 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5457 };
5458
5459 /* Both European Ordering Rules and Fi Standard Rules. */
5460 const char* eorFiStdRules[] = {
5461 "[import root-u-co-eor][import fi-u-co-standard]",
5462 };
5463
5464 /* This is essentially same as the one before once fi.txt is updated with import. */
5465 const char* fiEorRules[] = {
5466 "[import fi-u-co-eor]",
5467 };
5468
5469 const OneTestCase fiEorTests[] = {
5470 { {0x0110}, {0x00F0}, UCOL_GREATER},
5471 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5472 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5473 };
5474
5475 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5476 doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5477 doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5478 doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
5479
5480 log_knownIssue("8962", NULL);
5481 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5482 eor{
5483 Sequence{
5484 "[import root-u-co-eor][import fi-u-co-standard]"
5485 }
5486 Version{"21.0"}
5487 }
5488 */
5489 /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5490
5491 }
5492 #endif
5493
5494 #if 0
5495 /*
5496 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5497 * the resource files are built with -includeUnihanColl option.
5498 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5499 */
5500 static void TestImportRulesCJKWithUnihan(void)
5501 {
5502 /* DUCET. */
5503 const char* defaultRules[] = {
5504 "&a<b", /* Dummy rule. */
5505 };
5506
5507 const OneTestCase defaultTests[] = {
5508 { {0x3402}, {0x4e1e}, UCOL_GREATER},
5509 };
5510
5511 /* European Ordering rules: ignore currency characters. */
5512 const char* unihanRules[] = {
5513 "[import ko-u-co-unihan]",
5514 };
5515
5516 const OneTestCase unihanTests[] = {
5517 { {0x3402}, {0x4e1e}, UCOL_LESS},
5518 };
5519
5520 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5521 doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
5522
5523 }
5524 #endif
5525
TestImport(void)5526 static void TestImport(void)
5527 {
5528 UCollator* vicoll;
5529 UCollator* escoll;
5530 UCollator* viescoll;
5531 UCollator* importviescoll;
5532 UParseError error;
5533 UErrorCode status = U_ZERO_ERROR;
5534 UChar* virules;
5535 int32_t viruleslength;
5536 UChar* esrules;
5537 int32_t esruleslength;
5538 UChar* viesrules;
5539 int32_t viesruleslength;
5540 char srules[500] = "[import vi][import es]";
5541 UChar rules[500];
5542 uint32_t length = 0;
5543 int32_t itemCount;
5544 int32_t i, k;
5545 UChar32 start;
5546 UChar32 end;
5547 UChar str[500];
5548 int32_t strLength;
5549
5550 uint8_t sk1[500];
5551 uint8_t sk2[500];
5552
5553 UBool b;
5554 USet* tailoredSet;
5555 USet* importTailoredSet;
5556
5557
5558 vicoll = ucol_open("vi", &status);
5559 if(U_FAILURE(status)){
5560 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5561 return;
5562 }
5563
5564 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5565 if(viruleslength == 0) {
5566 log_data_err("missing vi tailoring rule string\n");
5567 ucol_close(vicoll);
5568 return;
5569 }
5570 escoll = ucol_open("es", &status);
5571 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5572 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5573 viesrules[0] = 0;
5574 u_strcat(viesrules, virules);
5575 u_strcat(viesrules, esrules);
5576 viesruleslength = viruleslength + esruleslength;
5577 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5578
5579 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5580 length = u_unescape(srules, rules, 500);
5581 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5582 if(U_FAILURE(status)){
5583 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5584 return;
5585 }
5586
5587 tailoredSet = ucol_getTailoredSet(viescoll, &status);
5588 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5589
5590 if(!uset_equals(tailoredSet, importTailoredSet)){
5591 log_err("Tailored sets not equal");
5592 }
5593
5594 uset_close(importTailoredSet);
5595
5596 itemCount = uset_getItemCount(tailoredSet);
5597
5598 for( i = 0; i < itemCount; i++){
5599 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5600 if(strLength < 2){
5601 for (; start <= end; start++){
5602 k = 0;
5603 U16_APPEND(str, k, 500, start, b);
5604 (void)b; /* Suppress set but not used warning. */
5605 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5606 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5607 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5608 log_err("Sort key for %s not equal\n", str);
5609 break;
5610 }
5611 }
5612 }else{
5613 ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5614 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5615 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5616 log_err("ZZSort key for %s not equal\n", str);
5617 break;
5618 }
5619
5620 }
5621 }
5622
5623 uset_close(tailoredSet);
5624
5625 uprv_free(viesrules);
5626
5627 ucol_close(vicoll);
5628 ucol_close(escoll);
5629 ucol_close(viescoll);
5630 ucol_close(importviescoll);
5631 }
5632
TestImportWithType(void)5633 static void TestImportWithType(void)
5634 {
5635 UCollator* vicoll;
5636 UCollator* decoll;
5637 UCollator* videcoll;
5638 UCollator* importvidecoll;
5639 UParseError error;
5640 UErrorCode status = U_ZERO_ERROR;
5641 const UChar* virules;
5642 int32_t viruleslength;
5643 const UChar* derules;
5644 int32_t deruleslength;
5645 UChar* viderules;
5646 int32_t videruleslength;
5647 const char srules[500] = "[import vi][import de-u-co-phonebk]";
5648 UChar rules[500];
5649 uint32_t length = 0;
5650 int32_t itemCount;
5651 int32_t i, k;
5652 UChar32 start;
5653 UChar32 end;
5654 UChar str[500];
5655 int32_t strLength;
5656
5657 uint8_t sk1[500];
5658 uint8_t sk2[500];
5659
5660 USet* tailoredSet;
5661 USet* importTailoredSet;
5662
5663 vicoll = ucol_open("vi", &status);
5664 if(U_FAILURE(status)){
5665 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5666 return;
5667 }
5668 virules = ucol_getRules(vicoll, &viruleslength);
5669 if(viruleslength == 0) {
5670 log_data_err("missing vi tailoring rule string\n");
5671 ucol_close(vicoll);
5672 return;
5673 }
5674 /* decoll = ucol_open("de@collation=phonebook", &status); */
5675 decoll = ucol_open("de-u-co-phonebk", &status);
5676 if(U_FAILURE(status)){
5677 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5678 return;
5679 }
5680
5681
5682 derules = ucol_getRules(decoll, &deruleslength);
5683 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5684 viderules[0] = 0;
5685 u_strcat(viderules, virules);
5686 u_strcat(viderules, derules);
5687 videruleslength = viruleslength + deruleslength;
5688 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5689
5690 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5691 length = u_unescape(srules, rules, 500);
5692 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5693 if(U_FAILURE(status)){
5694 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5695 return;
5696 }
5697
5698 tailoredSet = ucol_getTailoredSet(videcoll, &status);
5699 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5700
5701 if(!uset_equals(tailoredSet, importTailoredSet)){
5702 log_err("Tailored sets not equal");
5703 }
5704
5705 uset_close(importTailoredSet);
5706
5707 itemCount = uset_getItemCount(tailoredSet);
5708
5709 for( i = 0; i < itemCount; i++){
5710 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5711 if(strLength < 2){
5712 for (; start <= end; start++){
5713 k = 0;
5714 U16_APPEND_UNSAFE(str, k, start);
5715 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5716 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5717 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5718 log_err("Sort key for %s not equal\n", str);
5719 break;
5720 }
5721 }
5722 }else{
5723 ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5724 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5725 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5726 log_err("Sort key for %s not equal\n", str);
5727 break;
5728 }
5729
5730 }
5731 }
5732
5733 uset_close(tailoredSet);
5734
5735 uprv_free(viderules);
5736
5737 ucol_close(videcoll);
5738 ucol_close(importvidecoll);
5739 ucol_close(vicoll);
5740 ucol_close(decoll);
5741 }
5742
5743 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5744 static const UChar longUpperStr1[]= { /* 155 chars */
5745 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5746 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5747 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5748 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5749 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5750 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5751 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5752 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5753 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5754 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5755 };
5756
5757 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5758 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5759 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5760 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5763 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5764 };
5765
5766 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5767 static const UChar longUpperStr3[]= { /* 324 chars */
5768 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5769 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5780 };
5781
5782 typedef struct {
5783 const UChar * longUpperStrPtr;
5784 int32_t longUpperStrLen;
5785 } LongUpperStrItem;
5786
5787 /* String pointers must be in reverse collation order of the corresponding strings */
5788 static const LongUpperStrItem longUpperStrItems[] = {
5789 { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5790 { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5791 { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
5792 { NULL, 0 }
5793 };
5794
5795 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5796
5797 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
TestCaseLevelBufferOverflow(void)5798 static void TestCaseLevelBufferOverflow(void)
5799 {
5800 UErrorCode status = U_ZERO_ERROR;
5801 UCollator * ucol = ucol_open("root", &status);
5802 if ( U_SUCCESS(status) ) {
5803 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5804 if ( U_SUCCESS(status) ) {
5805 const LongUpperStrItem * itemPtr;
5806 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5807 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5808 int32_t sortKeyLen;
5809 if (itemPtr > longUpperStrItems) {
5810 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5811 }
5812 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5813 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5814 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5815 break;
5816 }
5817 if ( itemPtr > longUpperStrItems ) {
5818 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5819 if (compareResult >= 0) {
5820 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5821 }
5822 }
5823 }
5824 } else {
5825 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5826 }
5827 ucol_close(ucol);
5828 } else {
5829 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5830 }
5831 }
5832
5833 /* Test for #10595 */
5834 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5835 #define KEY_PART_SIZE 16
5836
TestNextSortKeyPartJaIdentical(void)5837 static void TestNextSortKeyPartJaIdentical(void)
5838 {
5839 UErrorCode status = U_ZERO_ERROR;
5840 UCollator *coll;
5841 uint8_t keyPart[KEY_PART_SIZE];
5842 UCharIterator iter;
5843 uint32_t state[2] = {0, 0};
5844 int32_t keyPartLen;
5845
5846 coll = ucol_open("ja", &status);
5847 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5848 if (U_FAILURE(status)) {
5849 log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5850 return;
5851 }
5852
5853 uiter_setString(&iter, testJapaneseName, 5);
5854 keyPartLen = KEY_PART_SIZE;
5855 while (keyPartLen == KEY_PART_SIZE) {
5856 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5857 if (U_FAILURE(status)) {
5858 log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5859 break;
5860 }
5861 }
5862
5863 ucol_close(coll);
5864 }
5865
5866 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5867
addMiscCollTest(TestNode ** root)5868 void addMiscCollTest(TestNode** root)
5869 {
5870 TEST(TestRuleOptions);
5871 TEST(TestBeforePrefixFailure);
5872 TEST(TestContractionClosure);
5873 TEST(TestPrefixCompose);
5874 TEST(TestStrCollIdenticalPrefix);
5875 TEST(TestPrefix);
5876 TEST(TestNewJapanese);
5877 /*TEST(TestLimitations);*/
5878 TEST(TestNonChars);
5879 TEST(TestExtremeCompression);
5880 TEST(TestSurrogates);
5881 TEST(TestVariableTopSetting);
5882 TEST(TestMaxVariable);
5883 TEST(TestBocsuCoverage);
5884 TEST(TestCyrillicTailoring);
5885 TEST(TestCase);
5886 TEST(IncompleteCntTest);
5887 TEST(BlackBirdTest);
5888 TEST(FunkyATest);
5889 TEST(BillFairmanTest);
5890 TEST(TestChMove);
5891 TEST(TestImplicitTailoring);
5892 TEST(TestFCDProblem);
5893 TEST(TestEmptyRule);
5894 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5895 TEST(TestJ815);
5896 TEST(TestUpperCaseFirst);
5897 TEST(TestBefore);
5898 TEST(TestHangulTailoring);
5899 TEST(TestUCARules);
5900 TEST(TestIncrementalNormalize);
5901 TEST(TestComposeDecompose);
5902 TEST(TestCompressOverlap);
5903 TEST(TestContraction);
5904 TEST(TestExpansion);
5905 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5906 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5907 TEST(TestOptimize);
5908 TEST(TestSuppressContractions);
5909 TEST(Alexis2);
5910 TEST(TestHebrewUCA);
5911 TEST(TestPartialSortKeyTermination);
5912 TEST(TestSettings);
5913 TEST(TestEquals);
5914 TEST(TestJ2726);
5915 TEST(NullRule);
5916 TEST(TestNumericCollation);
5917 TEST(TestTibetanConformance);
5918 TEST(TestPinyinProblem);
5919 TEST(TestSeparateTrees);
5920 TEST(TestBeforePinyin);
5921 TEST(TestBeforeTightening);
5922 /*TEST(TestMoreBefore);*/
5923 TEST(TestTailorNULL);
5924 TEST(TestUpperFirstQuaternary);
5925 TEST(TestJ4960);
5926 TEST(TestJ5223);
5927 TEST(TestJ5232);
5928 TEST(TestJ5367);
5929 TEST(TestHiragana);
5930 TEST(TestSortKeyConsistency);
5931 TEST(TestVI5913); /* VI, RO tailored rules */
5932 TEST(TestCroatianSortKey);
5933 TEST(TestTailor6179);
5934 TEST(TestUCAPrecontext);
5935 TEST(TestOutOfBuffer5468);
5936 TEST(TestSameStrengthList);
5937
5938 TEST(TestSameStrengthListQuoted);
5939 TEST(TestSameStrengthListSupplemental);
5940 TEST(TestSameStrengthListQwerty);
5941 TEST(TestSameStrengthListQuotedQwerty);
5942 TEST(TestSameStrengthListRanges);
5943 TEST(TestSameStrengthListSupplementalRanges);
5944 TEST(TestSpecialCharacters);
5945 TEST(TestPrivateUseCharacters);
5946 TEST(TestPrivateUseCharactersInList);
5947 TEST(TestPrivateUseCharactersInRange);
5948 TEST(TestInvalidListsAndRanges);
5949 TEST(TestImportRulesDeWithPhonebook);
5950 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5951 /* TEST(TestImportRulesCJKWithUnihan); */
5952 TEST(TestImport);
5953 TEST(TestImportWithType);
5954
5955 TEST(TestBeforeRuleWithScriptReordering);
5956 TEST(TestNonLeadBytesDuringCollationReordering);
5957 TEST(TestReorderingAPI);
5958 TEST(TestReorderingAPIWithRuleCreatedCollator);
5959 TEST(TestEquivalentReorderingScripts);
5960 TEST(TestGreekFirstReorder);
5961 TEST(TestGreekLastReorder);
5962 TEST(TestNonScriptReorder);
5963 TEST(TestHaniReorder);
5964 TEST(TestHaniReorderWithOtherRules);
5965 TEST(TestMultipleReorder);
5966 TEST(TestReorderingAcrossCloning);
5967 TEST(TestReorderWithNumericCollation);
5968
5969 TEST(TestCaseLevelBufferOverflow);
5970 TEST(TestNextSortKeyPartJaIdentical);
5971 }
5972
5973 #endif /* #if !UCONFIG_NO_COLLATION */
5974