1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
9 *
10 * File cmsccoll.C
11 *
12 *******************************************************************************/
13 /**
14 * These are the tests specific to ICU 1.8 and above, that I didn't know where
15 * to fit.
16 */
17
18 #include <stdio.h>
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_COLLATION
23
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
27 #include "cintltst.h"
28 #include "ccolltst.h"
29 #include "callcoll.h"
30 #include "unicode/ustring.h"
31 #include "string.h"
32 #include "ucol_imp.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
41 #include "uparse.h"
42 #include "putilimp.h"
43
44
45 #define MAX_TOKEN_LEN 16
46
47 typedef UCollationResult tst_strcoll(void *collator, const int object,
48 const UChar *source, const int sLen,
49 const UChar *target, const int tLen);
50
51
52
53 const static char cnt1[][10] = {
54
55 "AA",
56 "AC",
57 "AZ",
58 "AQ",
59 "AB",
60 "ABZ",
61 "ABQ",
62 "Z",
63 "ABC",
64 "Q",
65 "B"
66 };
67
68 const static char cnt2[][10] = {
69 "DA",
70 "DAD",
71 "DAZ",
72 "MAR",
73 "Z",
74 "DAVIS",
75 "MARK",
76 "DAV",
77 "DAVI"
78 };
79
IncompleteCntTest(void)80 static void IncompleteCntTest(void)
81 {
82 UErrorCode status = U_ZERO_ERROR;
83 UChar temp[90];
84 UChar t1[90];
85 UChar t2[90];
86
87 UCollator *coll = NULL;
88 uint32_t i = 0, j = 0;
89 uint32_t size = 0;
90
91 u_uastrcpy(temp, " & Z < ABC < Q < B");
92
93 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
94
95 if(U_SUCCESS(status)) {
96 size = UPRV_LENGTHOF(cnt1);
97 for(i = 0; i < size-1; i++) {
98 for(j = i+1; j < size; j++) {
99 UCollationElements *iter;
100 u_uastrcpy(t1, cnt1[i]);
101 u_uastrcpy(t2, cnt1[j]);
102 doTest(coll, t1, t2, UCOL_LESS);
103 /* synwee : added collation element iterator test */
104 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
105 if (U_FAILURE(status)) {
106 log_err("Creation of iterator failed\n");
107 break;
108 }
109 backAndForth(iter);
110 ucol_closeElements(iter);
111 }
112 }
113 }
114
115 ucol_close(coll);
116
117
118 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
119 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
120
121 if(U_SUCCESS(status)) {
122 size = UPRV_LENGTHOF(cnt2);
123 for(i = 0; i < size-1; i++) {
124 for(j = i+1; j < size; j++) {
125 UCollationElements *iter;
126 u_uastrcpy(t1, cnt2[i]);
127 u_uastrcpy(t2, cnt2[j]);
128 doTest(coll, t1, t2, UCOL_LESS);
129
130 /* synwee : added collation element iterator test */
131 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
132 if (U_FAILURE(status)) {
133 log_err("Creation of iterator failed\n");
134 break;
135 }
136 backAndForth(iter);
137 ucol_closeElements(iter);
138 }
139 }
140 }
141
142 ucol_close(coll);
143
144
145 }
146
147 const static char shifted[][20] = {
148 "black bird",
149 "black-bird",
150 "blackbird",
151 "black Bird",
152 "black-Bird",
153 "blackBird",
154 "black birds",
155 "black-birds",
156 "blackbirds"
157 };
158
159 const static UCollationResult shiftedTert[] = {
160 UCOL_EQUAL,
161 UCOL_EQUAL,
162 UCOL_EQUAL,
163 UCOL_LESS,
164 UCOL_EQUAL,
165 UCOL_EQUAL,
166 UCOL_LESS,
167 UCOL_EQUAL,
168 UCOL_EQUAL
169 };
170
171 const static char nonignorable[][20] = {
172 "black bird",
173 "black Bird",
174 "black birds",
175 "black-bird",
176 "black-Bird",
177 "black-birds",
178 "blackbird",
179 "blackBird",
180 "blackbirds"
181 };
182
BlackBirdTest(void)183 static void BlackBirdTest(void) {
184 UErrorCode status = U_ZERO_ERROR;
185 UChar t1[90];
186 UChar t2[90];
187
188 uint32_t i = 0, j = 0;
189 uint32_t size = 0;
190 UCollator *coll = ucol_open("en_US", &status);
191
192 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
193 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
194
195 if(U_SUCCESS(status)) {
196 size = UPRV_LENGTHOF(nonignorable);
197 for(i = 0; i < size-1; i++) {
198 for(j = i+1; j < size; j++) {
199 u_uastrcpy(t1, nonignorable[i]);
200 u_uastrcpy(t2, nonignorable[j]);
201 doTest(coll, t1, t2, UCOL_LESS);
202 }
203 }
204 }
205
206 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
207 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
208
209 if(U_SUCCESS(status)) {
210 size = UPRV_LENGTHOF(shifted);
211 for(i = 0; i < size-1; i++) {
212 for(j = i+1; j < size; j++) {
213 u_uastrcpy(t1, shifted[i]);
214 u_uastrcpy(t2, shifted[j]);
215 doTest(coll, t1, t2, UCOL_LESS);
216 }
217 }
218 }
219
220 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
221 if(U_SUCCESS(status)) {
222 size = UPRV_LENGTHOF(shifted);
223 for(i = 1; i < size; i++) {
224 u_uastrcpy(t1, shifted[i-1]);
225 u_uastrcpy(t2, shifted[i]);
226 doTest(coll, t1, t2, shiftedTert[i]);
227 }
228 }
229
230 ucol_close(coll);
231 }
232
233 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
234 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236 {0x0041/*'A'*/, 0x0300, 0x0000},
237 {0x00C0, 0x0301, 0x0000},
238 /* this would work with forced normalization */
239 {0x00C0, 0x0316, 0x0000}
240 };
241
242 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
243 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
245 {0x00C0, 0},
246 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247 /* this would work with forced normalization */
248 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249 };
250
251 const static UCollationResult results[] = {
252 UCOL_GREATER,
253 UCOL_EQUAL,
254 UCOL_EQUAL,
255 UCOL_GREATER,
256 UCOL_EQUAL
257 };
258
FunkyATest(void)259 static void FunkyATest(void)
260 {
261
262 int32_t i;
263 UErrorCode status = U_ZERO_ERROR;
264 UCollator *myCollation;
265 myCollation = ucol_open("en_US", &status);
266 if(U_FAILURE(status)){
267 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
268 return;
269 }
270 log_verbose("Testing some A letters, for some reason\n");
271 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
272 ucol_setStrength(myCollation, UCOL_TERTIARY);
273 for (i = 0; i < 4 ; i++)
274 {
275 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
276 }
277 ucol_close(myCollation);
278 }
279
280 UColAttributeValue caseFirst[] = {
281 UCOL_OFF,
282 UCOL_LOWER_FIRST,
283 UCOL_UPPER_FIRST
284 };
285
286
287 UColAttributeValue alternateHandling[] = {
288 UCOL_NON_IGNORABLE,
289 UCOL_SHIFTED
290 };
291
292 UColAttributeValue caseLevel[] = {
293 UCOL_OFF,
294 UCOL_ON
295 };
296
297 UColAttributeValue strengths[] = {
298 UCOL_PRIMARY,
299 UCOL_SECONDARY,
300 UCOL_TERTIARY,
301 UCOL_QUATERNARY,
302 UCOL_IDENTICAL
303 };
304
305 #if 0
306 static const char * strengthsC[] = {
307 "UCOL_PRIMARY",
308 "UCOL_SECONDARY",
309 "UCOL_TERTIARY",
310 "UCOL_QUATERNARY",
311 "UCOL_IDENTICAL"
312 };
313
314 static const char * caseFirstC[] = {
315 "UCOL_OFF",
316 "UCOL_LOWER_FIRST",
317 "UCOL_UPPER_FIRST"
318 };
319
320
321 static const char * alternateHandlingC[] = {
322 "UCOL_NON_IGNORABLE",
323 "UCOL_SHIFTED"
324 };
325
326 static const char * caseLevelC[] = {
327 "UCOL_OFF",
328 "UCOL_ON"
329 };
330
331 /* not used currently - does not test only prints */
332 static void PrintMarkDavis(void)
333 {
334 UErrorCode status = U_ZERO_ERROR;
335 UChar m[256];
336 uint8_t sortkey[256];
337 UCollator *coll = ucol_open("en_US", &status);
338 uint32_t h,i,j,k, sortkeysize;
339 uint32_t sizem = 0;
340 char buffer[512];
341 uint32_t len = 512;
342
343 log_verbose("PrintMarkDavis");
344
345 u_uastrcpy(m, "Mark Davis");
346 sizem = u_strlen(m);
347
348
349 m[1] = 0xe4;
350
351 for(i = 0; i<sizem; i++) {
352 fprintf(stderr, "\\u%04X ", m[i]);
353 }
354 fprintf(stderr, "\n");
355
356 for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
357 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
358 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
359
360 for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
361 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
362 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
363
364 for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
365 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
366 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
367
368 for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
369 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
370 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
371 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
372 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
373 }
374
375 }
376
377 }
378
379 }
380 }
381 #endif
382
BillFairmanTest(void)383 static void BillFairmanTest(void) {
384 /*
385 ** check for actual locale via ICU resource bundles
386 **
387 ** lp points to the original locale ("fr_FR_....")
388 */
389
390 UResourceBundle *lr,*cr;
391 UErrorCode lec = U_ZERO_ERROR;
392 const char *lp = "fr_FR_you_ll_never_find_this_locale";
393
394 log_verbose("BillFairmanTest\n");
395
396 lr = ures_open(NULL,lp,&lec);
397 if (lr) {
398 cr = ures_getByKey(lr,"collations",0,&lec);
399 if (cr) {
400 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
401 if (lp) {
402 if (U_SUCCESS(lec)) {
403 if(strcmp(lp, "fr") != 0) {
404 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
405 }
406 }
407 }
408 ures_close(cr);
409 }
410 ures_close(lr);
411 }
412 }
413
414 const static char chTest[][20] = {
415 "c",
416 "C",
417 "ca", "cb", "cx", "cy", "CZ",
418 "c\\u030C", "C\\u030C",
419 "h",
420 "H",
421 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422 "ch", "cH", "Ch", "CH",
423 "cha", "charly", "che", "chh", "chch", "chr",
424 "i", "I", "iarly",
425 "r", "R",
426 "r\\u030C", "R\\u030C",
427 "s",
428 "S",
429 "s\\u030C", "S\\u030C",
430 "z", "Z",
431 "z\\u030C", "Z\\u030C"
432 };
433
TestChMove(void)434 static void TestChMove(void) {
435 UChar t1[256] = {0};
436 UChar t2[256] = {0};
437
438 uint32_t i = 0, j = 0;
439 uint32_t size = 0;
440 UErrorCode status = U_ZERO_ERROR;
441
442 UCollator *coll = ucol_open("cs", &status);
443
444 if(U_SUCCESS(status)) {
445 size = UPRV_LENGTHOF(chTest);
446 for(i = 0; i < size-1; i++) {
447 for(j = i+1; j < size; j++) {
448 u_unescape(chTest[i], t1, 256);
449 u_unescape(chTest[j], t2, 256);
450 doTest(coll, t1, t2, UCOL_LESS);
451 }
452 }
453 }
454 else {
455 log_data_err("Can't open collator");
456 }
457 ucol_close(coll);
458 }
459
460
461
462
463 /*
464 const static char impTest[][20] = {
465 "\\u4e00",
466 "a",
467 "A",
468 "b",
469 "B",
470 "\\u4e01"
471 };
472 */
473
474
TestImplicitTailoring(void)475 static void TestImplicitTailoring(void) {
476 static const struct {
477 const char *rules;
478 const char *data[10];
479 const uint32_t len;
480 } tests[] = {
481 {
482 /* Tailor b and c before U+4E00. */
483 "&[before 1]\\u4e00 < b < c "
484 /* Now, before U+4E00 is c; put d and e after that. */
485 "&[before 1]\\u4e00 < d < e",
486 { "b", "c", "d", "e", "\\u4e00"}, 5 },
487 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
490 };
491
492 int32_t i = 0;
493
494 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
495 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
496 }
497
498 /*
499 UChar t1[256] = {0};
500 UChar t2[256] = {0};
501
502 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
503
504 uint32_t i = 0, j = 0;
505 uint32_t size = 0;
506 uint32_t ruleLen = 0;
507 UErrorCode status = U_ZERO_ERROR;
508 UCollator *coll = NULL;
509 ruleLen = u_unescape(rule, t1, 256);
510
511 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
512
513 if(U_SUCCESS(status)) {
514 size = UPRV_LENGTHOF(impTest);
515 for(i = 0; i < size-1; i++) {
516 for(j = i+1; j < size; j++) {
517 u_unescape(impTest[i], t1, 256);
518 u_unescape(impTest[j], t2, 256);
519 doTest(coll, t1, t2, UCOL_LESS);
520 }
521 }
522 }
523 else {
524 log_err("Can't open collator");
525 }
526 ucol_close(coll);
527 */
528 }
529
TestFCDProblem(void)530 static void TestFCDProblem(void) {
531 UChar t1[256] = {0};
532 UChar t2[256] = {0};
533
534 const char *s1 = "\\u0430\\u0306\\u0325";
535 const char *s2 = "\\u04D1\\u0325";
536
537 UErrorCode status = U_ZERO_ERROR;
538 UCollator *coll = ucol_open("", &status);
539 u_unescape(s1, t1, 256);
540 u_unescape(s2, t2, 256);
541
542 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
543 doTest(coll, t1, t2, UCOL_EQUAL);
544
545 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
546 doTest(coll, t1, t2, UCOL_EQUAL);
547
548 ucol_close(coll);
549 }
550
551 /*
552 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553 We're only using NFC/NFD in this test.
554 */
555 #define NORM_BUFFER_TEST_LEN 18
556 typedef struct {
557 UChar32 u;
558 UChar NFC[NORM_BUFFER_TEST_LEN];
559 UChar NFD[NORM_BUFFER_TEST_LEN];
560 } tester;
561
TestComposeDecompose(void)562 static void TestComposeDecompose(void) {
563 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564 static const UChar UNICODESET_STR[] = {
565 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568 };
569 int32_t noOfLoc;
570 int32_t i = 0, j = 0;
571
572 UErrorCode status = U_ZERO_ERROR;
573 const char *locName = NULL;
574 uint32_t nfcSize;
575 uint32_t nfdSize;
576 tester **t;
577 uint32_t noCases = 0;
578 UCollator *coll = NULL;
579 UChar32 u = 0;
580 UChar comp[NORM_BUFFER_TEST_LEN];
581 uint32_t len = 0;
582 UCollationElements *iter;
583 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
584 int32_t charsToTestSize;
585
586 noOfLoc = uloc_countAvailable();
587
588 coll = ucol_open("", &status);
589 if (U_FAILURE(status)) {
590 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
591 return;
592 }
593 charsToTestSize = uset_size(charsToTest);
594 if (charsToTestSize <= 0) {
595 log_err("Set was zero. Missing data?\n");
596 return;
597 }
598 t = (tester **)malloc(charsToTestSize * sizeof(tester *));
599 t[0] = (tester *)malloc(sizeof(tester));
600 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
601
602 for(u = 0; u < charsToTestSize; u++) {
603 UChar32 ch = uset_charAt(charsToTest, u);
604 len = 0;
605 U16_APPEND_UNSAFE(comp, len, ch);
606 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
607 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
608
609 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
610 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
611 t[noCases]->u = ch;
612 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
613 u_strncpy(t[noCases]->NFC, comp, len);
614 t[noCases]->NFC[len] = 0;
615 }
616 noCases++;
617 t[noCases] = (tester *)malloc(sizeof(tester));
618 uprv_memset(t[noCases], 0, sizeof(tester));
619 }
620 }
621 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
622 uset_close(charsToTest);
623 charsToTest = NULL;
624
625 for(u=0; u<(UChar32)noCases; u++) {
626 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
627 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
628 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
629 }
630 }
631 /*
632 for(u = 0; u < charsToTestSize; u++) {
633 if(!(u&0xFFFF)) {
634 log_verbose("%08X ", u);
635 }
636 uprv_memset(t[noCases], 0, sizeof(tester));
637 t[noCases]->u = u;
638 len = 0;
639 U16_APPEND_UNSAFE(comp, len, u);
640 comp[len] = 0;
641 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
642 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
643 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
644 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
645 }
646 */
647
648 ucol_close(coll);
649
650 log_verbose("Testing locales, number of cases = %i\n", noCases);
651 for(i = 0; i<noOfLoc; i++) {
652 status = U_ZERO_ERROR;
653 locName = uloc_getAvailable(i);
654 if(hasCollationElements(locName)) {
655 char cName[256];
656 UChar name[256];
657 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
658
659 for(j = 0; j<nameSize; j++) {
660 cName[j] = (char)name[j];
661 }
662 cName[nameSize] = 0;
663 log_verbose("\nTesting locale %s (%s)\n", locName, cName);
664
665 coll = ucol_open(locName, &status);
666 ucol_setStrength(coll, UCOL_IDENTICAL);
667 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
668
669 for(u=0; u<(UChar32)noCases; u++) {
670 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
671 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
672 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
673 log_verbose("Testing NFC\n");
674 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
675 backAndForth(iter);
676 log_verbose("Testing NFD\n");
677 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
678 backAndForth(iter);
679 }
680 }
681 ucol_closeElements(iter);
682 ucol_close(coll);
683 }
684 }
685 for(u = 0; u <= (UChar32)noCases; u++) {
686 free(t[u]);
687 }
688 free(t);
689 }
690
TestEmptyRule(void)691 static void TestEmptyRule(void) {
692 UErrorCode status = U_ZERO_ERROR;
693 UChar rulez[] = { 0 };
694 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
695
696 ucol_close(coll);
697 }
698
TestUCARules(void)699 static void TestUCARules(void) {
700 UErrorCode status = U_ZERO_ERROR;
701 UChar b[256];
702 UChar *rules = b;
703 uint32_t ruleLen = 0;
704 UCollator *UCAfromRules = NULL;
705 UCollator *coll = ucol_open("", &status);
706 if(status == U_FILE_ACCESS_ERROR) {
707 log_data_err("Is your data around?\n");
708 return;
709 } else if(U_FAILURE(status)) {
710 log_err("Error opening collator\n");
711 return;
712 }
713 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
714
715 log_verbose("TestUCARules\n");
716 if(ruleLen > 256) {
717 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
718 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
719 }
720 log_verbose("Rules length is %d\n", ruleLen);
721 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
722 if(U_SUCCESS(status)) {
723 ucol_close(UCAfromRules);
724 } else {
725 log_verbose("Unable to create a collator from UCARules!\n");
726 }
727 /*
728 u_unescape(blah, b, 256);
729 ucol_getSortKey(coll, b, 1, res, 256);
730 */
731 ucol_close(coll);
732 if(rules != b) {
733 free(rules);
734 }
735 }
736
737
738 /* Pinyin tonal order */
739 /*
740 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
741 (w/macron)< (w/acute)< (w/caron)< (w/grave)
742 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
743 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
744 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
745 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
746 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
747 .. (\u00fc)
748
749 However, in testing we got the following order:
750 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
751 (w/acute)< (w/grave)< (w/caron)< (w/macron)
752 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
753 .. (\u0113)
754 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
755 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
756 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
757 .. (\u01d8)
758 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
759 */
760
TestBefore(void)761 static void TestBefore(void) {
762 const static char *data[] = {
763 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
764 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
765 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
766 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
767 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
768 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
769 };
770 genericRulesStarter(
771 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
772 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
773 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
774 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
775 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
776 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
777 data, UPRV_LENGTHOF(data));
778 }
779
780 #if 0
781 /* superceded by TestBeforePinyin */
782 static void TestJ784(void) {
783 const static char *data[] = {
784 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
785 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
786 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
787 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
788 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
789 "\\u00fc",
790 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
791 };
792 genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
793 }
794 #endif
795
TestUpperCaseFirst(void)796 static void TestUpperCaseFirst(void) {
797 const static char *data[] = {
798 "I",
799 "i",
800 "Y",
801 "y"
802 };
803 genericLocaleStarter("da", data, UPRV_LENGTHOF(data));
804 }
805
TestJ815(void)806 static void TestJ815(void) {
807 const static char *data[] = {
808 "aa",
809 "Aa",
810 "ab",
811 "Ab",
812 "ad",
813 "Ad",
814 "ae",
815 "Ae",
816 "\\u00e6",
817 "\\u00c6",
818 "af",
819 "Af",
820 "b",
821 "B"
822 };
823 genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
824 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
825 }
826
827
TestCase(void)828 static void TestCase(void)
829 {
830 const static UChar gRules[MAX_TOKEN_LEN] =
831 /*" & 0 < 1,\u2461<a,A"*/
832 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
833
834 const static UChar testCase[][MAX_TOKEN_LEN] =
835 {
836 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
837 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
838 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
839 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
840 };
841
842 const static UCollationResult caseTestResults[][9] =
843 {
844 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
845 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
846 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
847 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
848 };
849
850 const static UColAttributeValue caseTestAttributes[][2] =
851 {
852 { UCOL_LOWER_FIRST, UCOL_OFF},
853 { UCOL_UPPER_FIRST, UCOL_OFF},
854 { UCOL_LOWER_FIRST, UCOL_ON},
855 { UCOL_UPPER_FIRST, UCOL_ON}
856 };
857 int32_t i,j,k;
858 UErrorCode status = U_ZERO_ERROR;
859 UCollationElements *iter;
860 UCollator *myCollation;
861 myCollation = ucol_open("en_US", &status);
862
863 if(U_FAILURE(status)){
864 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
865 return;
866 }
867 log_verbose("Testing different case settings\n");
868 ucol_setStrength(myCollation, UCOL_TERTIARY);
869
870 for(k = 0; k<4; k++) {
871 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
872 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
873 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
874 for (i = 0; i < 3 ; i++) {
875 for(j = i+1; j<4; j++) {
876 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
877 }
878 }
879 }
880 ucol_close(myCollation);
881
882 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
883 if(U_FAILURE(status)){
884 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
885 return;
886 }
887 log_verbose("Testing different case settings with custom rules\n");
888 ucol_setStrength(myCollation, UCOL_TERTIARY);
889
890 for(k = 0; k<4; k++) {
891 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
892 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
893 for (i = 0; i < 3 ; i++) {
894 for(j = i+1; j<4; j++) {
895 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
896 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
897 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
898 backAndForth(iter);
899 ucol_closeElements(iter);
900 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
901 backAndForth(iter);
902 ucol_closeElements(iter);
903 }
904 }
905 }
906 ucol_close(myCollation);
907 {
908 const static char *lowerFirst[] = {
909 "h",
910 "H",
911 "ch",
912 "Ch",
913 "CH",
914 "cha",
915 "chA",
916 "Cha",
917 "ChA",
918 "CHa",
919 "CHA",
920 "i",
921 "I"
922 };
923
924 const static char *upperFirst[] = {
925 "H",
926 "h",
927 "CH",
928 "Ch",
929 "ch",
930 "CHA",
931 "CHa",
932 "ChA",
933 "Cha",
934 "chA",
935 "cha",
936 "I",
937 "i"
938 };
939 log_verbose("mixed case test\n");
940 log_verbose("lower first, case level off\n");
941 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
942 log_verbose("upper first, case level off\n");
943 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
944 log_verbose("lower first, case level on\n");
945 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
946 log_verbose("upper first, case level on\n");
947 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
948 }
949
950 }
951
TestIncrementalNormalize(void)952 static void TestIncrementalNormalize(void) {
953
954 /*UChar baseA =0x61;*/
955 UChar baseA =0x41;
956 /* UChar baseB = 0x42;*/
957 static const UChar ccMix[] = {0x316, 0x321, 0x300};
958 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
959 /*
960 0x316 is combining grave accent below, cc=220
961 0x321 is combining palatalized hook below, cc=202
962 0x300 is combining grave accent, cc=230
963 */
964
965 #define MAXSLEN 2000
966 /*int maxSLen = 64000;*/
967 int sLen;
968 int i;
969
970 UCollator *coll;
971 UErrorCode status = U_ZERO_ERROR;
972 UCollationResult result;
973
974 int32_t myQ = getTestOption(QUICK_OPTION);
975
976 if(getTestOption(QUICK_OPTION) < 0) {
977 setTestOption(QUICK_OPTION, 1);
978 }
979
980 {
981 /* Test 1. Run very long unnormalized strings, to force overflow of*/
982 /* most buffers along the way.*/
983 UChar strA[MAXSLEN+1];
984 UChar strB[MAXSLEN+1];
985
986 coll = ucol_open("en_US", &status);
987 if(status == U_FILE_ACCESS_ERROR) {
988 log_data_err("Is your data around?\n");
989 return;
990 } else if(U_FAILURE(status)) {
991 log_err("Error opening collator\n");
992 return;
993 }
994 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
995
996 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
997 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
998 /*for (sLen = 1000; sLen<1001; sLen++) {*/
999 for (sLen = 500; sLen<501; sLen++) {
1000 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1001 strA[0] = baseA;
1002 strB[0] = baseA;
1003 for (i=1; i<=sLen-1; i++) {
1004 strA[i] = ccMix[i % 3];
1005 strB[sLen-i] = ccMix[i % 3];
1006 }
1007 strA[sLen] = 0;
1008 strB[sLen] = 0;
1009
1010 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/
1011 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/
1012 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/
1013 doTest(coll, strA, strB, UCOL_EQUAL);
1014 }
1015 }
1016
1017 setTestOption(QUICK_OPTION, myQ);
1018
1019
1020 /* Test 2: Non-normal sequence in a string that extends to the last character*/
1021 /* of the string. Checks a couple of edge cases.*/
1022
1023 {
1024 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1025 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1026 ucol_setStrength(coll, UCOL_TERTIARY);
1027 doTest(coll, strA, strB, UCOL_EQUAL);
1028 }
1029
1030 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
1031
1032 {
1033 /* New UCA 3.1.1.
1034 * test below used a code point from Desseret, which sorts differently
1035 * than d800 dc00
1036 */
1037 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1038 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1039 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1040 ucol_setStrength(coll, UCOL_TERTIARY);
1041 doTest(coll, strA, strB, UCOL_GREATER);
1042 }
1043
1044 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
1045
1046 {
1047 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1048 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1049 char sortKeyA[50];
1050 char sortKeyAz[50];
1051 char sortKeyB[50];
1052 char sortKeyBz[50];
1053 int r;
1054
1055 /* there used to be -3 here. Hmmmm.... */
1056 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1057 result = ucol_strcoll(coll, strA, 3, strB, 3);
1058 if (result != UCOL_GREATER) {
1059 log_err("ERROR 1 in test 4\n");
1060 }
1061 result = ucol_strcoll(coll, strA, -1, strB, -1);
1062 if (result != UCOL_EQUAL) {
1063 log_err("ERROR 2 in test 4\n");
1064 }
1065
1066 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1067 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1068 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1069 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1070
1071 r = strcmp(sortKeyA, sortKeyAz);
1072 if (r <= 0) {
1073 log_err("Error 3 in test 4\n");
1074 }
1075 r = strcmp(sortKeyA, sortKeyB);
1076 if (r <= 0) {
1077 log_err("Error 4 in test 4\n");
1078 }
1079 r = strcmp(sortKeyAz, sortKeyBz);
1080 if (r != 0) {
1081 log_err("Error 5 in test 4\n");
1082 }
1083
1084 ucol_setStrength(coll, UCOL_IDENTICAL);
1085 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1086 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1087 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1088 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1089
1090 r = strcmp(sortKeyA, sortKeyAz);
1091 if (r <= 0) {
1092 log_err("Error 6 in test 4\n");
1093 }
1094 r = strcmp(sortKeyA, sortKeyB);
1095 if (r <= 0) {
1096 log_err("Error 7 in test 4\n");
1097 }
1098 r = strcmp(sortKeyAz, sortKeyBz);
1099 if (r != 0) {
1100 log_err("Error 8 in test 4\n");
1101 }
1102 ucol_setStrength(coll, UCOL_TERTIARY);
1103 }
1104
1105
1106 /* Test 5: Null characters in non-normal source strings.*/
1107
1108 {
1109 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1110 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1111 char sortKeyA[50];
1112 char sortKeyAz[50];
1113 char sortKeyB[50];
1114 char sortKeyBz[50];
1115 int r;
1116
1117 result = ucol_strcoll(coll, strA, 6, strB, 6);
1118 if (result != UCOL_GREATER) {
1119 log_err("ERROR 1 in test 5\n");
1120 }
1121 result = ucol_strcoll(coll, strA, -1, strB, -1);
1122 if (result != UCOL_EQUAL) {
1123 log_err("ERROR 2 in test 5\n");
1124 }
1125
1126 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1127 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1128 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1129 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1130
1131 r = strcmp(sortKeyA, sortKeyAz);
1132 if (r <= 0) {
1133 log_err("Error 3 in test 5\n");
1134 }
1135 r = strcmp(sortKeyA, sortKeyB);
1136 if (r <= 0) {
1137 log_err("Error 4 in test 5\n");
1138 }
1139 r = strcmp(sortKeyAz, sortKeyBz);
1140 if (r != 0) {
1141 log_err("Error 5 in test 5\n");
1142 }
1143
1144 ucol_setStrength(coll, UCOL_IDENTICAL);
1145 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1146 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1147 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1148 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1149
1150 r = strcmp(sortKeyA, sortKeyAz);
1151 if (r <= 0) {
1152 log_err("Error 6 in test 5\n");
1153 }
1154 r = strcmp(sortKeyA, sortKeyB);
1155 if (r <= 0) {
1156 log_err("Error 7 in test 5\n");
1157 }
1158 r = strcmp(sortKeyAz, sortKeyBz);
1159 if (r != 0) {
1160 log_err("Error 8 in test 5\n");
1161 }
1162 ucol_setStrength(coll, UCOL_TERTIARY);
1163 }
1164
1165
1166 /* Test 6: Null character as base of a non-normal combining sequence.*/
1167
1168 {
1169 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1170 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1171
1172 result = ucol_strcoll(coll, strA, 5, strB, 5);
1173 if (result != UCOL_LESS) {
1174 log_err("Error 1 in test 6\n");
1175 }
1176 result = ucol_strcoll(coll, strA, -1, strB, -1);
1177 if (result != UCOL_EQUAL) {
1178 log_err("Error 2 in test 6\n");
1179 }
1180 }
1181
1182 ucol_close(coll);
1183 }
1184
1185
1186
1187 #if 0
1188 static void TestGetCaseBit(void) {
1189 static const char *caseBitData[] = {
1190 "a", "A", "ch", "Ch", "CH",
1191 "\\uFF9E", "\\u0009"
1192 };
1193
1194 static const uint8_t results[] = {
1195 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1196 UCOL_UPPER_CASE, UCOL_LOWER_CASE
1197 };
1198
1199 uint32_t i, blen = 0;
1200 UChar b[256] = {0};
1201 UErrorCode status = U_ZERO_ERROR;
1202 UCollator *UCA = ucol_open("", &status);
1203 uint8_t res = 0;
1204
1205 for(i = 0; i<UPRV_LENGTHOF(results); i++) {
1206 blen = u_unescape(caseBitData[i], b, 256);
1207 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1208 if(results[i] != res) {
1209 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1210 }
1211 }
1212 }
1213 #endif
1214
TestHangulTailoring(void)1215 static void TestHangulTailoring(void) {
1216 static const char *koreanData[] = {
1217 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1218 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1219 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1220 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1221 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1222 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1223 };
1224
1225 const char *rules =
1226 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1227 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1228 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1229 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1230 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1231 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1232
1233
1234 UErrorCode status = U_ZERO_ERROR;
1235 UChar rlz[2048] = { 0 };
1236 uint32_t rlen = u_unescape(rules, rlz, 2048);
1237
1238 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1239 if(status == U_FILE_ACCESS_ERROR) {
1240 log_data_err("Is your data around?\n");
1241 return;
1242 } else if(U_FAILURE(status)) {
1243 log_err("Error opening collator\n");
1244 return;
1245 }
1246
1247 log_verbose("Using start of korean rules\n");
1248
1249 if(U_SUCCESS(status)) {
1250 genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
1251 } else {
1252 log_err("Unable to open collator with rules %s\n", rules);
1253 }
1254
1255 ucol_close(coll);
1256
1257 log_verbose("Using ko__LOTUS locale\n");
1258 genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
1259 }
1260
1261 /*
1262 * The secondary/tertiary compression middle byte
1263 * as used by the current implementation.
1264 * Subject to change as the sort key compression changes.
1265 * See class CollationKeys.
1266 */
1267 enum {
1268 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */
1269 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */
1270 };
1271
TestCompressOverlap(void)1272 static void TestCompressOverlap(void) {
1273 UChar secstr[150];
1274 UChar tertstr[150];
1275 UErrorCode status = U_ZERO_ERROR;
1276 UCollator *coll;
1277 uint8_t result[500];
1278 uint32_t resultlen;
1279 int count = 0;
1280 uint8_t *tempptr;
1281
1282 coll = ucol_open("", &status);
1283
1284 if (U_FAILURE(status)) {
1285 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1286 return;
1287 }
1288 while (count < 149) {
1289 secstr[count] = 0x0020; /* [06, 05, 05] */
1290 tertstr[count] = 0x0020;
1291 count ++;
1292 }
1293
1294 /* top down compression ----------------------------------- */
1295 secstr[count] = 0x0332; /* [, 87, 05] */
1296 tertstr[count] = 0x3000; /* [06, 05, 07] */
1297
1298 /* no compression secstr should have 150 secondary bytes, tertstr should
1299 have 150 tertiary bytes.
1300 with correct compression, secstr should have 6 secondary
1301 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1302 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1303 (void)resultlen; /* Suppress set but not used warning. */
1304 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1305 while (*(tempptr + 1) != 1) {
1306 /* the last secondary collation element is not checked since it is not
1307 part of the compression */
1308 if (*tempptr < SEC_COMMON_MIDDLE) {
1309 log_err("Secondary top down compression overlapped\n");
1310 }
1311 tempptr ++;
1312 }
1313
1314 /* tertiary top/bottom/common for en_US is similar to the secondary
1315 top/bottom/common */
1316 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1317 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1318 while (*(tempptr + 1) != 0) {
1319 /* the last secondary collation element is not checked since it is not
1320 part of the compression */
1321 if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1322 log_err("Tertiary top down compression overlapped\n");
1323 }
1324 tempptr ++;
1325 }
1326
1327 /* bottom up compression ------------------------------------- */
1328 secstr[count] = 0;
1329 tertstr[count] = 0;
1330 resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1331 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1332 while (*(tempptr + 1) != 1) {
1333 /* the last secondary collation element is not checked since it is not
1334 part of the compression */
1335 if (*tempptr > SEC_COMMON_MIDDLE) {
1336 log_err("Secondary bottom up compression overlapped\n");
1337 }
1338 tempptr ++;
1339 }
1340
1341 /* tertiary top/bottom/common for en_US is similar to the secondary
1342 top/bottom/common */
1343 resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1344 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1345 while (*(tempptr + 1) != 0) {
1346 /* the last secondary collation element is not checked since it is not
1347 part of the compression */
1348 if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1349 log_err("Tertiary bottom up compression overlapped\n");
1350 }
1351 tempptr ++;
1352 }
1353
1354 ucol_close(coll);
1355 }
1356
TestCyrillicTailoring(void)1357 static void TestCyrillicTailoring(void) {
1358 static const char *test[] = {
1359 "\\u0410b",
1360 "\\u0410\\u0306a",
1361 "\\u04d0A"
1362 };
1363
1364 /* Russian overrides contractions, so this test is not valid anymore */
1365 /*genericLocaleStarter("ru", test, 3);*/
1366
1367 // Most of the following are commented out because UCA 8.0
1368 // drops most of the Cyrillic contractions from the default order.
1369 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1370
1371 // genericLocaleStarter("root", test, 3);
1372 // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1373 // genericRulesStarter("&Z < \\u0410", test, 3);
1374 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1375 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1376 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1377 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1378 }
1379
TestSuppressContractions(void)1380 static void TestSuppressContractions(void) {
1381
1382 static const char *testNoCont2[] = {
1383 "\\u0410\\u0302a",
1384 "\\u0410\\u0306b",
1385 "\\u0410c"
1386 };
1387 static const char *testNoCont[] = {
1388 "a\\u0410",
1389 "A\\u0410\\u0306",
1390 "\\uFF21\\u0410\\u0302"
1391 };
1392
1393 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1394 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1395 }
1396
TestContraction(void)1397 static void TestContraction(void) {
1398 const static char *testrules[] = {
1399 "&A = AB / B",
1400 "&A = A\\u0306/\\u0306",
1401 "&c = ch / h"
1402 };
1403 const static UChar testdata[][2] = {
1404 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1405 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1406 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1407 };
1408 const static UChar testdata2[][2] = {
1409 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1410 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1411 {0x0063 /* 'c' */, 0x006C /* 'l' */}
1412 };
1413 #if 0
1414 /*
1415 * These pairs of rule strings are not guaranteed to yield the very same mappings.
1416 * In fact, LDML 24 recommends an improved way of creating mappings
1417 * which always yields different mappings for such pairs. See
1418 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1419 */
1420 const static char *testrules3[] = {
1421 "&z < xyz &xyzw << B",
1422 "&z < xyz &xyz << B / w",
1423 "&z < ch &achm << B",
1424 "&z < ch &a << B / chm",
1425 "&\\ud800\\udc00w << B",
1426 "&\\ud800\\udc00 << B / w",
1427 "&a\\ud800\\udc00m << B",
1428 "&a << B / \\ud800\\udc00m",
1429 };
1430 #endif
1431
1432 UErrorCode status = U_ZERO_ERROR;
1433 UCollator *coll;
1434 UChar rule[256] = {0};
1435 uint32_t rlen = 0;
1436 int i;
1437
1438 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1439 UCollationElements *iter1;
1440 int j = 0;
1441 log_verbose("Rule %s for testing\n", testrules[i]);
1442 rlen = u_unescape(testrules[i], rule, 32);
1443 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1444 if (U_FAILURE(status)) {
1445 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1446 return;
1447 }
1448 iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1449 if (U_FAILURE(status)) {
1450 log_err("Collation iterator creation failed\n");
1451 return;
1452 }
1453 while (j < 2) {
1454 UCollationElements *iter2 = ucol_openElements(coll,
1455 &(testdata[i][j]),
1456 1, &status);
1457 uint32_t ce;
1458 if (U_FAILURE(status)) {
1459 log_err("Collation iterator creation failed\n");
1460 return;
1461 }
1462 ce = ucol_next(iter2, &status);
1463 while (ce != UCOL_NULLORDER) {
1464 if ((uint32_t)ucol_next(iter1, &status) != ce) {
1465 log_err("Collation elements in contraction split does not match\n");
1466 return;
1467 }
1468 ce = ucol_next(iter2, &status);
1469 }
1470 j ++;
1471 ucol_closeElements(iter2);
1472 }
1473 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1474 log_err("Collation elements not exhausted\n");
1475 return;
1476 }
1477 ucol_closeElements(iter1);
1478 ucol_close(coll);
1479 }
1480
1481 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1482 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1483 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1484 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1485 testdata2[0][0], testdata2[0][1], testdata2[1][0],
1486 testdata2[1][1]);
1487 return;
1488 }
1489 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1490 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1491 testdata2[1][0], testdata2[1][1], testdata2[2][0],
1492 testdata2[2][1]);
1493 return;
1494 }
1495 ucol_close(coll);
1496 #if 0 /* see above */
1497 for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
1498 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1499 UCollator *coll1,
1500 *coll2;
1501 UCollationElements *iter1,
1502 *iter2;
1503 UChar ch = 0x0042 /* 'B' */;
1504 uint32_t ce;
1505 rlen = u_unescape(testrules3[i], rule, 32);
1506 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1507 rlen = u_unescape(testrules3[i + 1], rule, 32);
1508 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1509 if (U_FAILURE(status)) {
1510 log_err("Collator creation failed %s\n", testrules[i]);
1511 return;
1512 }
1513 iter1 = ucol_openElements(coll1, &ch, 1, &status);
1514 iter2 = ucol_openElements(coll2, &ch, 1, &status);
1515 if (U_FAILURE(status)) {
1516 log_err("Collation iterator creation failed\n");
1517 return;
1518 }
1519 ce = ucol_next(iter1, &status);
1520 if (U_FAILURE(status)) {
1521 log_err("Retrieving ces failed\n");
1522 return;
1523 }
1524 while (ce != UCOL_NULLORDER) {
1525 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1526 if (ce == ce2) {
1527 log_verbose("CEs match: %08x\n", ce);
1528 } else {
1529 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1530 return;
1531 }
1532 ce = ucol_next(iter1, &status);
1533 if (U_FAILURE(status)) {
1534 log_err("Retrieving ces failed\n");
1535 return;
1536 }
1537 }
1538 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1539 log_err("CEs not exhausted\n");
1540 return;
1541 }
1542 ucol_closeElements(iter1);
1543 ucol_closeElements(iter2);
1544 ucol_close(coll1);
1545 ucol_close(coll2);
1546 }
1547 #endif
1548 }
1549
TestExpansion(void)1550 static void TestExpansion(void) {
1551 const static char *testrules[] = {
1552 #if 0
1553 /*
1554 * This seems to have tested that M was not mapped to an expansion.
1555 * I believe the old builder just did that because it computed the extension CEs
1556 * at the very end, which was a bug.
1557 * Among other problems, it violated the core tailoring principle
1558 * by making an earlier rule depend on a later one.
1559 * And, of course, if M did not get an expansion, then it was primary different from K,
1560 * unlike what the rule &K<<M says.
1561 */
1562 "&J << K / B & K << M",
1563 #endif
1564 "&J << K / B << M"
1565 };
1566 const static UChar testdata[][3] = {
1567 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1568 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1569 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1570 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1571 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1572 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1573 };
1574
1575 UErrorCode status = U_ZERO_ERROR;
1576 UCollator *coll;
1577 UChar rule[256] = {0};
1578 uint32_t rlen = 0;
1579 int i;
1580
1581 for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1582 int j = 0;
1583 log_verbose("Rule %s for testing\n", testrules[i]);
1584 rlen = u_unescape(testrules[i], rule, 32);
1585 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1586 if (U_FAILURE(status)) {
1587 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1588 return;
1589 }
1590
1591 for (j = 0; j < 5; j ++) {
1592 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1593 }
1594 ucol_close(coll);
1595 }
1596 }
1597
1598 #if 0
1599 /* this test tests the current limitations of the engine */
1600 /* it always fail, so it is disabled by default */
1601 static void TestLimitations(void) {
1602 /* recursive expansions */
1603 {
1604 static const char *rule = "&a=b/c&d=c/e";
1605 static const char *tlimit01[] = {"add","b","adf"};
1606 static const char *tlimit02[] = {"aa","b","af"};
1607 log_verbose("recursive expansions\n");
1608 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1609 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1610 }
1611 /* contractions spanning expansions */
1612 {
1613 static const char *rule = "&a<<<c/e&g<<<eh";
1614 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1615 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1616 log_verbose("contractions spanning expansions\n");
1617 genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1618 genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1619 }
1620 /* normalization: nulls in contractions */
1621 {
1622 static const char *rule = "&a<<<\\u0000\\u0302";
1623 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1624 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1625 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1626 static const UColAttributeValue valOn[] = { UCOL_ON };
1627 static const UColAttributeValue valOff[] = { UCOL_OFF };
1628
1629 log_verbose("NULL in contractions\n");
1630 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1631 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1632 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1633 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1634
1635 }
1636 /* normalization: contractions spanning normalization */
1637 {
1638 static const char *rule = "&a<<<\\u0000\\u0302";
1639 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1640 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1641 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1642 static const UColAttributeValue valOn[] = { UCOL_ON };
1643 static const UColAttributeValue valOff[] = { UCOL_OFF };
1644
1645 log_verbose("contractions spanning normalization\n");
1646 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1647 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1648 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1649 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1650
1651 }
1652 /* variable top: */
1653 {
1654 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1655 static const char *rule = "&\\u2010<x<[variable top]=z";
1656 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1657 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1658 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1659 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1660 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1661 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1662 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1663
1664 log_verbose("variable top\n");
1665 genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1666 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1667 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1668 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1669 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
1670
1671 }
1672 /* case level */
1673 {
1674 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1675 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1676 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1677 static const UColAttribute att[] = { UCOL_CASE_FIRST};
1678 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1679 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1680 log_verbose("case level\n");
1681 genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1682 genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1683 /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1684 /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1685 }
1686
1687 }
1688 #endif
1689
TestBocsuCoverage(void)1690 static void TestBocsuCoverage(void) {
1691 UErrorCode status = U_ZERO_ERROR;
1692 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1693 UChar test[256] = {0};
1694 uint32_t tlen = u_unescape(testString, test, 32);
1695 uint8_t key[256] = {0};
1696 uint32_t klen = 0;
1697
1698 UCollator *coll = ucol_open("", &status);
1699 if(U_SUCCESS(status)) {
1700 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1701
1702 klen = ucol_getSortKey(coll, test, tlen, key, 256);
1703 (void)klen; /* Suppress set but not used warning. */
1704
1705 ucol_close(coll);
1706 } else {
1707 log_data_err("Couldn't open UCA\n");
1708 }
1709 }
1710
TestVariableTopSetting(void)1711 static void TestVariableTopSetting(void) {
1712 UErrorCode status = U_ZERO_ERROR;
1713 uint32_t varTopOriginal = 0, varTop1, varTop2;
1714 UCollator *coll = ucol_open("", &status);
1715 if(U_SUCCESS(status)) {
1716
1717 static const UChar nul = 0;
1718 static const UChar space = 0x20;
1719 static const UChar dot = 0x2e; /* punctuation */
1720 static const UChar degree = 0xb0; /* symbol */
1721 static const UChar dollar = 0x24; /* currency symbol */
1722 static const UChar zero = 0x30; /* digit */
1723
1724 varTopOriginal = ucol_getVariableTop(coll, &status);
1725 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1726 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1727
1728 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1729 varTop2 = ucol_getVariableTop(coll, &status);
1730 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1731 if(U_FAILURE(status) || varTop1 != varTop2 ||
1732 !ucol_equal(coll, &nul, 0, &space, 1) ||
1733 ucol_equal(coll, &nul, 0, &dot, 1) ||
1734 ucol_equal(coll, &nul, 0, °ree, 1) ||
1735 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1736 ucol_equal(coll, &nul, 0, &zero, 1) ||
1737 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1738 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1739 }
1740
1741 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1742 varTop2 = ucol_getVariableTop(coll, &status);
1743 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1744 if(U_FAILURE(status) || varTop1 != varTop2 ||
1745 !ucol_equal(coll, &nul, 0, &space, 1) ||
1746 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1747 ucol_equal(coll, &nul, 0, °ree, 1) ||
1748 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1749 ucol_equal(coll, &nul, 0, &zero, 1) ||
1750 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) {
1751 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1752 }
1753
1754 varTop1 = ucol_setVariableTop(coll, °ree, 1, &status);
1755 varTop2 = ucol_getVariableTop(coll, &status);
1756 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1757 if(U_FAILURE(status) || varTop1 != varTop2 ||
1758 !ucol_equal(coll, &nul, 0, &space, 1) ||
1759 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1760 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1761 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1762 ucol_equal(coll, &nul, 0, &zero, 1) ||
1763 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) {
1764 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1765 }
1766
1767 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1768 varTop2 = ucol_getVariableTop(coll, &status);
1769 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1770 if(U_FAILURE(status) || varTop1 != varTop2 ||
1771 !ucol_equal(coll, &nul, 0, &space, 1) ||
1772 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1773 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1774 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1775 ucol_equal(coll, &nul, 0, &zero, 1) ||
1776 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1777 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1778 }
1779
1780 log_verbose("Testing setting variable top to contractions\n");
1781 {
1782 UChar first[4] = { 0 };
1783 first[0] = 0x0040;
1784 first[1] = 0x0050;
1785 first[2] = 0x0000;
1786
1787 status = U_ZERO_ERROR;
1788 ucol_setVariableTop(coll, first, -1, &status);
1789
1790 if(U_SUCCESS(status)) {
1791 log_err("Invalid contraction succeded in setting variable top!\n");
1792 }
1793
1794 }
1795
1796 log_verbose("Test restoring variable top\n");
1797
1798 status = U_ZERO_ERROR;
1799 ucol_restoreVariableTop(coll, varTopOriginal, &status);
1800 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1801 log_err("Couldn't restore old variable top\n");
1802 }
1803
1804 log_verbose("Testing calling with error set\n");
1805
1806 status = U_INTERNAL_PROGRAM_ERROR;
1807 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1808 varTop2 = ucol_getVariableTop(coll, &status);
1809 ucol_restoreVariableTop(coll, varTop2, &status);
1810 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1811 varTop2 = ucol_getVariableTop(NULL, &status);
1812 ucol_restoreVariableTop(NULL, varTop2, &status);
1813 if(status != U_INTERNAL_PROGRAM_ERROR) {
1814 log_err("Bad reaction to passed error!\n");
1815 }
1816 ucol_close(coll);
1817 } else {
1818 log_data_err("Couldn't open UCA collator\n");
1819 }
1820 }
1821
TestMaxVariable()1822 static void TestMaxVariable() {
1823 UErrorCode status = U_ZERO_ERROR;
1824 UColReorderCode oldMax, max;
1825 UCollator *coll;
1826
1827 static const UChar nul = 0;
1828 static const UChar space = 0x20;
1829 static const UChar dot = 0x2e; /* punctuation */
1830 static const UChar degree = 0xb0; /* symbol */
1831 static const UChar dollar = 0x24; /* currency symbol */
1832 static const UChar zero = 0x30; /* digit */
1833
1834 coll = ucol_open("", &status);
1835 if(U_FAILURE(status)) {
1836 log_data_err("Couldn't open root collator\n");
1837 return;
1838 }
1839
1840 oldMax = ucol_getMaxVariable(coll);
1841 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1842 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1843
1844 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1845 max = ucol_getMaxVariable(coll);
1846 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1847 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1848 !ucol_equal(coll, &nul, 0, &space, 1) ||
1849 ucol_equal(coll, &nul, 0, &dot, 1) ||
1850 ucol_equal(coll, &nul, 0, °ree, 1) ||
1851 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1852 ucol_equal(coll, &nul, 0, &zero, 1) ||
1853 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1854 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1855 }
1856
1857 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1858 max = ucol_getMaxVariable(coll);
1859 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1860 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1861 !ucol_equal(coll, &nul, 0, &space, 1) ||
1862 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1863 ucol_equal(coll, &nul, 0, °ree, 1) ||
1864 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1865 ucol_equal(coll, &nul, 0, &zero, 1) ||
1866 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) {
1867 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1868 }
1869
1870 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1871 max = ucol_getMaxVariable(coll);
1872 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1873 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1874 !ucol_equal(coll, &nul, 0, &space, 1) ||
1875 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1876 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1877 ucol_equal(coll, &nul, 0, &dollar, 1) ||
1878 ucol_equal(coll, &nul, 0, &zero, 1) ||
1879 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) {
1880 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1881 }
1882
1883 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1884 max = ucol_getMaxVariable(coll);
1885 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1886 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1887 !ucol_equal(coll, &nul, 0, &space, 1) ||
1888 !ucol_equal(coll, &nul, 0, &dot, 1) ||
1889 !ucol_equal(coll, &nul, 0, °ree, 1) ||
1890 !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1891 ucol_equal(coll, &nul, 0, &zero, 1) ||
1892 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1893 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1894 }
1895
1896 log_verbose("Test restoring maxVariable\n");
1897 status = U_ZERO_ERROR;
1898 ucol_setMaxVariable(coll, oldMax, &status);
1899 if(oldMax != ucol_getMaxVariable(coll)) {
1900 log_err("Couldn't restore old maxVariable\n");
1901 }
1902
1903 log_verbose("Testing calling with error set\n");
1904 status = U_INTERNAL_PROGRAM_ERROR;
1905 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1906 max = ucol_getMaxVariable(coll);
1907 if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1908 log_err("Bad reaction to passed error!\n");
1909 }
1910 ucol_close(coll);
1911 }
1912
TestNonChars(void)1913 static void TestNonChars(void) {
1914 static const char *test[] = {
1915 "\\u0000", /* ignorable */
1916 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
1917 "\\uFDD0", "\\uFDEF",
1918 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
1919 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
1920 "\\U0003FFFE", "\\U0003FFFF",
1921 "\\U0004FFFE", "\\U0004FFFF",
1922 "\\U0005FFFE", "\\U0005FFFF",
1923 "\\U0006FFFE", "\\U0006FFFF",
1924 "\\U0007FFFE", "\\U0007FFFF",
1925 "\\U0008FFFE", "\\U0008FFFF",
1926 "\\U0009FFFE", "\\U0009FFFF",
1927 "\\U000AFFFE", "\\U000AFFFF",
1928 "\\U000BFFFE", "\\U000BFFFF",
1929 "\\U000CFFFE", "\\U000CFFFF",
1930 "\\U000DFFFE", "\\U000DFFFF",
1931 "\\U000EFFFE", "\\U000EFFFF",
1932 "\\U000FFFFE", "\\U000FFFFF",
1933 "\\U0010FFFE", "\\U0010FFFF",
1934 "\\uFFFF" /* special character with maximum primary weight */
1935 };
1936 UErrorCode status = U_ZERO_ERROR;
1937 UCollator *coll = ucol_open("en_US", &status);
1938
1939 log_verbose("Test non characters\n");
1940
1941 if(U_SUCCESS(status)) {
1942 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1943 } else {
1944 log_err_status(status, "Unable to open collator\n");
1945 }
1946
1947 ucol_close(coll);
1948 }
1949
TestExtremeCompression(void)1950 static void TestExtremeCompression(void) {
1951 static char *test[4];
1952 int32_t j = 0, i = 0;
1953
1954 for(i = 0; i<4; i++) {
1955 test[i] = (char *)malloc(2048*sizeof(char));
1956 }
1957
1958 for(j = 20; j < 500; j++) {
1959 for(i = 0; i<4; i++) {
1960 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1961 test[i][j-1] = (char)('a'+i);
1962 test[i][j] = 0;
1963 }
1964 genericLocaleStarter("en_US", (const char **)test, 4);
1965 }
1966
1967
1968 for(i = 0; i<4; i++) {
1969 free(test[i]);
1970 }
1971 }
1972
1973 #if 0
1974 static void TestExtremeCompression(void) {
1975 static char *test[4];
1976 int32_t j = 0, i = 0;
1977 UErrorCode status = U_ZERO_ERROR;
1978 UCollator *coll = ucol_open("en_US", status);
1979 for(i = 0; i<4; i++) {
1980 test[i] = (char *)malloc(2048*sizeof(char));
1981 }
1982 for(j = 10; j < 2048; j++) {
1983 for(i = 0; i<4; i++) {
1984 uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1985 test[i][j-1] = (char)('a'+i);
1986 test[i][j] = 0;
1987 }
1988 }
1989 genericLocaleStarter("en_US", (const char **)test, 4);
1990
1991 for(j = 10; j < 2048; j++) {
1992 for(i = 0; i<1; i++) {
1993 uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1994 test[i][j] = 0;
1995 }
1996 }
1997 for(i = 0; i<4; i++) {
1998 free(test[i]);
1999 }
2000 }
2001 #endif
2002
TestSurrogates(void)2003 static void TestSurrogates(void) {
2004 static const char *test[] = {
2005 "z","\\ud900\\udc25", "\\ud805\\udc50",
2006 "\\ud800\\udc00y", "\\ud800\\udc00r",
2007 "\\ud800\\udc00f", "\\ud800\\udc00",
2008 "\\ud800\\udc00c", "\\ud800\\udc00b",
2009 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2010 "\\ud800\\udc00a",
2011 "c", "b"
2012 };
2013
2014 static const char *rule =
2015 "&z < \\ud900\\udc25 < \\ud805\\udc50"
2016 "< \\ud800\\udc00y < \\ud800\\udc00r"
2017 "< \\ud800\\udc00f << \\ud800\\udc00"
2018 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2019 "< \\ud800\\udc00a < c < b" ;
2020
2021 genericRulesStarter(rule, test, 14);
2022 }
2023
2024 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
TestPrefix(void)2025 static void TestPrefix(void) {
2026 uint32_t i;
2027
2028 static const struct {
2029 const char *rules;
2030 const char *data[50];
2031 const uint32_t len;
2032 } tests[] = {
2033 { "&z <<< z|a",
2034 {"zz", "za"}, 2 },
2035
2036 { "&z <<< z| a",
2037 {"zz", "za"}, 2 },
2038 { "[strength I]"
2039 "&a=\\ud900\\udc25"
2040 "&z<<<\\ud900\\udc25|a",
2041 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2042 };
2043
2044
2045 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2046 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2047 }
2048 }
2049
2050 /* This test uses data suplied by Masashiko Maedera to test the implementation */
2051 /* JIS X 4061 collation order implementation */
TestNewJapanese(void)2052 static void TestNewJapanese(void) {
2053
2054 static const char * const test1[] = {
2055 "\\u30b7\\u30e3\\u30fc\\u30ec",
2056 "\\u30b7\\u30e3\\u30a4",
2057 "\\u30b7\\u30e4\\u30a3",
2058 "\\u30b7\\u30e3\\u30ec",
2059 "\\u3061\\u3087\\u3053",
2060 "\\u3061\\u3088\\u3053",
2061 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2062 "\\u3066\\u30fc\\u305f",
2063 "\\u30c6\\u30fc\\u30bf",
2064 "\\u30c6\\u30a7\\u30bf",
2065 "\\u3066\\u3048\\u305f",
2066 "\\u3067\\u30fc\\u305f",
2067 "\\u30c7\\u30fc\\u30bf",
2068 "\\u30c7\\u30a7\\u30bf",
2069 "\\u3067\\u3048\\u305f",
2070 "\\u3066\\u30fc\\u305f\\u30fc",
2071 "\\u30c6\\u30fc\\u30bf\\u30a1",
2072 "\\u30c6\\u30a7\\u30bf\\u30fc",
2073 "\\u3066\\u3047\\u305f\\u3041",
2074 "\\u3066\\u3048\\u305f\\u30fc",
2075 "\\u3067\\u30fc\\u305f\\u30fc",
2076 "\\u30c7\\u30fc\\u30bf\\u30a1",
2077 "\\u3067\\u30a7\\u305f\\u30a1",
2078 "\\u30c7\\u3047\\u30bf\\u3041",
2079 "\\u30c7\\u30a8\\u30bf\\u30a2",
2080 "\\u3072\\u3086",
2081 "\\u3073\\u3085\\u3042",
2082 "\\u3074\\u3085\\u3042",
2083 "\\u3073\\u3085\\u3042\\u30fc",
2084 "\\u30d3\\u30e5\\u30a2\\u30fc",
2085 "\\u3074\\u3085\\u3042\\u30fc",
2086 "\\u30d4\\u30e5\\u30a2\\u30fc",
2087 "\\u30d2\\u30e5\\u30a6",
2088 "\\u30d2\\u30e6\\u30a6",
2089 "\\u30d4\\u30e5\\u30a6\\u30a2",
2090 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2091 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2092 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2093 "\\u3072\\u3085\\u3093",
2094 "\\u3074\\u3085\\u3093",
2095 "\\u3075\\u30fc\\u308a",
2096 "\\u30d5\\u30fc\\u30ea",
2097 "\\u3075\\u3045\\u308a",
2098 "\\u3075\\u30a5\\u308a",
2099 "\\u3075\\u30a5\\u30ea",
2100 "\\u30d5\\u30a6\\u30ea",
2101 "\\u3076\\u30fc\\u308a",
2102 "\\u30d6\\u30fc\\u30ea",
2103 "\\u3076\\u3045\\u308a",
2104 "\\u30d6\\u30a5\\u308a",
2105 "\\u3077\\u3046\\u308a",
2106 "\\u30d7\\u30a6\\u30ea",
2107 "\\u3075\\u30fc\\u308a\\u30fc",
2108 "\\u30d5\\u30a5\\u30ea\\u30fc",
2109 "\\u3075\\u30a5\\u308a\\u30a3",
2110 "\\u30d5\\u3045\\u308a\\u3043",
2111 "\\u30d5\\u30a6\\u30ea\\u30fc",
2112 "\\u3075\\u3046\\u308a\\u3043",
2113 "\\u30d6\\u30a6\\u30ea\\u30a4",
2114 "\\u3077\\u30fc\\u308a\\u30fc",
2115 "\\u3077\\u30a5\\u308a\\u30a4",
2116 "\\u3077\\u3046\\u308a\\u30fc",
2117 "\\u30d7\\u30a6\\u30ea\\u30a4",
2118 "\\u30d5\\u30fd",
2119 "\\u3075\\u309e",
2120 "\\u3076\\u309d",
2121 "\\u3076\\u3075",
2122 "\\u3076\\u30d5",
2123 "\\u30d6\\u3075",
2124 "\\u30d6\\u30d5",
2125 "\\u3076\\u309e",
2126 "\\u3076\\u3077",
2127 "\\u30d6\\u3077",
2128 "\\u3077\\u309d",
2129 "\\u30d7\\u30fd",
2130 "\\u3077\\u3075",
2131 };
2132
2133 static const char *test2[] = {
2134 "\\u306f\\u309d", /* H\\u309d */
2135 "\\u30cf\\u30fd", /* K\\u30fd */
2136 "\\u306f\\u306f", /* HH */
2137 "\\u306f\\u30cf", /* HK */
2138 "\\u30cf\\u30cf", /* KK */
2139 "\\u306f\\u309e", /* H\\u309e */
2140 "\\u30cf\\u30fe", /* K\\u30fe */
2141 "\\u306f\\u3070", /* HH\\u309b */
2142 "\\u30cf\\u30d0", /* KK\\u309b */
2143 "\\u306f\\u3071", /* HH\\u309c */
2144 "\\u30cf\\u3071", /* KH\\u309c */
2145 "\\u30cf\\u30d1", /* KK\\u309c */
2146 "\\u3070\\u309d", /* H\\u309b\\u309d */
2147 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2148 "\\u3070\\u306f", /* H\\u309bH */
2149 "\\u30d0\\u30cf", /* K\\u309bK */
2150 "\\u3070\\u309e", /* H\\u309b\\u309e */
2151 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2152 "\\u3070\\u3070", /* H\\u309bH\\u309b */
2153 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2154 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2155 "\\u3070\\u3071", /* H\\u309bH\\u309c */
2156 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2157 "\\u3071\\u309d", /* H\\u309c\\u309d */
2158 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2159 "\\u3071\\u306f", /* H\\u309cH */
2160 "\\u30d1\\u30cf", /* K\\u309cK */
2161 "\\u3071\\u3070", /* H\\u309cH\\u309b */
2162 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2163 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2164 "\\u3071\\u3071", /* H\\u309cH\\u309c */
2165 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2166 };
2167 /*
2168 static const char *test3[] = {
2169 "\\u221er\\u221e",
2170 "\\u221eR#",
2171 "\\u221et\\u221e",
2172 "#r\\u221e",
2173 "#R#",
2174 "#t%",
2175 "#T%",
2176 "8t\\u221e",
2177 "8T\\u221e",
2178 "8t#",
2179 "8T#",
2180 "8t%",
2181 "8T%",
2182 "8t8",
2183 "8T8",
2184 "\\u03c9r\\u221e",
2185 "\\u03a9R%",
2186 "rr\\u221e",
2187 "rR\\u221e",
2188 "Rr\\u221e",
2189 "RR\\u221e",
2190 "RT%",
2191 "rt8",
2192 "tr\\u221e",
2193 "tr8",
2194 "TR8",
2195 "tt8",
2196 "\\u30b7\\u30e3\\u30fc\\u30ec",
2197 };
2198 */
2199 static const UColAttribute att[] = { UCOL_STRENGTH };
2200 static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2201
2202 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2203 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2204
2205 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2206 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2207 /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2208 genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2209 genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
2210 }
2211
TestStrCollIdenticalPrefix(void)2212 static void TestStrCollIdenticalPrefix(void) {
2213 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2214 const char* test[] = {
2215 "ab\\ud9b0\\udc70",
2216 "ab\\ud9b0\\udc71"
2217 };
2218 genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
2219 }
2220 /* Contractions should have all their canonically equivalent */
2221 /* strings included */
TestContractionClosure(void)2222 static void TestContractionClosure(void) {
2223 static const struct {
2224 const char *rules;
2225 const char *data[10];
2226 const uint32_t len;
2227 } tests[] = {
2228 { "&b=\\u00e4\\u00e4",
2229 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2230 { "&b=\\u00C5",
2231 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2232 };
2233 uint32_t i;
2234
2235
2236 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2237 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2238 }
2239 }
2240
2241 /* This tests also fails*/
TestBeforePrefixFailure(void)2242 static void TestBeforePrefixFailure(void) {
2243 static const struct {
2244 const char *rules;
2245 const char *data[10];
2246 const uint32_t len;
2247 } tests[] = {
2248 { "&g <<< a"
2249 "&[before 3]\\uff41 <<< x",
2250 {"x", "\\uff41"}, 2 },
2251 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2252 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2253 "&[before 3]\\u30a7<<<\\u30a9",
2254 {"\\u30a9", "\\u30a7"}, 2 },
2255 { "&[before 3]\\u30a7<<<\\u30a9"
2256 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2257 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2258 {"\\u30a9", "\\u30a7"}, 2 },
2259 };
2260 uint32_t i;
2261
2262
2263 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2264 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2265 }
2266
2267 #if 0
2268 const char* rule1 =
2269 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2270 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2271 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2272 const char* rule2 =
2273 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2274 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2275 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2276 const char* test[] = {
2277 "\\u30c6\\u30fc\\u30bf",
2278 "\\u30c6\\u30a7\\u30bf",
2279 };
2280 genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2281 genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
2282 /* this piece of code should be in some sort of verbose mode */
2283 /* it gets the collation elements for elements and prints them */
2284 /* This is useful when trying to see whether the problem is */
2285 {
2286 UErrorCode status = U_ZERO_ERROR;
2287 uint32_t i = 0;
2288 UCollationElements *it = NULL;
2289 uint32_t CE;
2290 UChar string[256];
2291 uint32_t uStringLen;
2292 UCollator *coll = NULL;
2293
2294 uStringLen = u_unescape(rule1, string, 256);
2295
2296 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2297
2298 /*coll = ucol_open("ja_JP_JIS", &status);*/
2299 it = ucol_openElements(coll, string, 0, &status);
2300
2301 for(i = 0; i < UPRV_LENGTHOF(test); i++) {
2302 log_verbose("%s\n", test[i]);
2303 uStringLen = u_unescape(test[i], string, 256);
2304 ucol_setText(it, string, uStringLen, &status);
2305
2306 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2307 log_verbose("%08X\n", CE);
2308 }
2309 log_verbose("\n");
2310
2311 }
2312
2313 ucol_closeElements(it);
2314 ucol_close(coll);
2315 }
2316 #endif
2317 }
2318
TestPrefixCompose(void)2319 static void TestPrefixCompose(void) {
2320 const char* rule1 =
2321 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2322 /*
2323 const char* test[] = {
2324 "\\u30c6\\u30fc\\u30bf",
2325 "\\u30c6\\u30a7\\u30bf",
2326 };
2327 */
2328 {
2329 UErrorCode status = U_ZERO_ERROR;
2330 /*uint32_t i = 0;*/
2331 /*UCollationElements *it = NULL;*/
2332 /* uint32_t CE;*/
2333 UChar string[256];
2334 uint32_t uStringLen;
2335 UCollator *coll = NULL;
2336
2337 uStringLen = u_unescape(rule1, string, 256);
2338
2339 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2340 ucol_close(coll);
2341 }
2342
2343
2344 }
2345
2346 /*
2347 [last variable] last variable value
2348 [last primary ignorable] largest CE for primary ignorable
2349 [last secondary ignorable] largest CE for secondary ignorable
2350 [last tertiary ignorable] largest CE for tertiary ignorable
2351 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2352 */
2353
TestRuleOptions(void)2354 static void TestRuleOptions(void) {
2355 /* values here are hardcoded and are correct for the current UCA
2356 * when the UCA changes, one might be forced to change these
2357 * values.
2358 */
2359
2360 /*
2361 * These strings contain the last character before [variable top]
2362 * and the first and second characters (by primary weights) after it.
2363 * See FractionalUCA.txt. For example:
2364 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2365 [variable top = 0C FE]
2366 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2367 and
2368 00B4; [0D 0C, 05, 05]
2369 *
2370 * Note: Starting with UCA 6.0, the [variable top] collation element
2371 * is not the weight of any character or string,
2372 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2373 */
2374 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2375 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
2376 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2377
2378 /*
2379 * This string has to match the character that has the [last regular] weight
2380 * which changes with each UCA version.
2381 * See the bottom of FractionalUCA.txt which says something like
2382 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2383 *
2384 * Note: Starting with UCA 6.0, the [last regular] collation element
2385 * is not the weight of any character or string,
2386 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2387 */
2388 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2389
2390 static const struct {
2391 const char *rules;
2392 const char *data[10];
2393 const uint32_t len;
2394 } tests[] = {
2395 #if 0
2396 /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2397 /* - all befores here amount to zero */
2398 { "&[before 3][first tertiary ignorable]<<<a",
2399 { "\\u0000", "a"}, 2
2400 }, /* you cannot go before first tertiary ignorable */
2401
2402 { "&[before 3][last tertiary ignorable]<<<a",
2403 { "\\u0000", "a"}, 2
2404 }, /* you cannot go before last tertiary ignorable */
2405 #endif
2406 /*
2407 * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2408 * and it *is* possible to "go before" that.
2409 */
2410 { "&[before 3][first secondary ignorable]<<<a",
2411 { "\\u0000", "a"}, 2
2412 },
2413
2414 { "&[before 3][last secondary ignorable]<<<a",
2415 { "\\u0000", "a"}, 2
2416 },
2417
2418 /* 'normal' befores */
2419
2420 /*
2421 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2422 * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2423 * because there is no tailoring space before that boundary.
2424 * Made the tests work by tailoring to a space instead.
2425 */
2426 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first primary ignorable]<a */
2427 { "c", "b", "\\u0332", "a" }, 4
2428 },
2429
2430 /* we don't have a code point that corresponds to
2431 * the last primary ignorable
2432 */
2433 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last primary ignorable]<a */
2434 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2435 },
2436
2437 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2438 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
2439 },
2440
2441 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2442 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2443 },
2444
2445 { "&[first regular]<a"
2446 "&[before 1][first regular]<b",
2447 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2448 },
2449
2450 { "&[before 1][last regular]<b"
2451 "&[last regular]<a",
2452 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2453 },
2454
2455 { "&[before 1][first implicit]<b"
2456 "&[first implicit]<a",
2457 { "b", "\\u4e00", "a", "\\u4e01"}, 4
2458 },
2459 #if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2460 { "&[before 1][last implicit]<b"
2461 "&[last implicit]<a",
2462 { "b", "\\U0010FFFD", "a" }, 3
2463 },
2464 #endif
2465 { "&[last variable]<z"
2466 "&' '<x" /* was &[last primary ignorable]<x, see above */
2467 "&[last secondary ignorable]<<y"
2468 "&[last tertiary ignorable]<<<w"
2469 "&[top]<u",
2470 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2471 }
2472
2473 };
2474 uint32_t i;
2475
2476 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2477 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2478 }
2479 }
2480
2481
TestOptimize(void)2482 static void TestOptimize(void) {
2483 /* this is not really a test - just trying out
2484 * whether copying of UCA contents will fail
2485 * Cannot really test, since the functionality
2486 * remains the same.
2487 */
2488 static const struct {
2489 const char *rules;
2490 const char *data[10];
2491 const uint32_t len;
2492 } tests[] = {
2493 /* - all befores here amount to zero */
2494 { "[optimize [\\uAC00-\\uD7FF]]",
2495 { "a", "b"}, 2}
2496 };
2497 uint32_t i;
2498
2499 for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2500 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2501 }
2502 }
2503
2504 /*
2505 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2506 weiv ucol_strcollIter?
2507 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2508 weiv these are the input strings?
2509 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2510 weiv will check - could be a problem with utf-8 iterator
2511 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2512 weiv hmmm
2513 cycheng@ca.ibm.c... note that we have a standalone high surrogate
2514 weiv that doesn't sound right
2515 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2516 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
2517 cycheng@ca.ibm.c... yes
2518 weiv and then do the comparison
2519 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2520 weiv utf-16 strings look like a little endian ones in the example you sent me
2521 weiv It could be a bug - let me try to test it out
2522 cycheng@ca.ibm.c... ok
2523 cycheng@ca.ibm.c... we can wait till the conf. call
2524 cycheng@ca.ibm.c... next weke
2525 weiv that would be great
2526 weiv hmmm
2527 weiv I might be wrong
2528 weiv let me play with it some more
2529 cycheng@ca.ibm.c... ok
2530 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
2531 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2532 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2533 weiv ok
2534 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2535 weiv thanks
2536 cycheng@ca.ibm.c... the 4 strings we sent are just samples
2537 */
2538 #if 0
2539 static void Alexis(void) {
2540 UErrorCode status = U_ZERO_ERROR;
2541 UCollator *coll = ucol_open("", &status);
2542
2543
2544 const char utf16be[2][4] = {
2545 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2546 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2547 };
2548
2549 const char utf8[2][4] = {
2550 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2551 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2552 };
2553
2554 UCharIterator iterU161, iterU162;
2555 UCharIterator iterU81, iterU82;
2556
2557 UCollationResult resU16, resU8;
2558
2559 uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2560 uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2561
2562 uiter_setUTF8(&iterU81, utf8[0], 4);
2563 uiter_setUTF8(&iterU82, utf8[1], 4);
2564
2565 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2566
2567 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2568 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2569
2570
2571 if(resU16 != resU8) {
2572 log_err("different results\n");
2573 }
2574
2575 ucol_close(coll);
2576 }
2577 #endif
2578
2579 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
Alexis2(void)2580 static void Alexis2(void) {
2581 UErrorCode status = U_ZERO_ERROR;
2582 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2583 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2584 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2585 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2586
2587 UConverter *conv = NULL;
2588
2589 UCharIterator U16BEItS, U16BEItT;
2590 UCharIterator U8ItS, U8ItT;
2591
2592 UCollationResult resU16, resU16BE, resU8;
2593
2594 static const char* const pairs[][2] = {
2595 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2596 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2597 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2598 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2599 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2600 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2601 { "\\u0020", "\\u0020\\u0000"}
2602 /*
2603 5F20 (my result here)
2604 5F204E008E3F
2605 5F20 (your result here)
2606 */
2607 };
2608
2609 int32_t i = 0;
2610
2611 UCollator *coll = ucol_open("", &status);
2612 if(status == U_FILE_ACCESS_ERROR) {
2613 log_data_err("Is your data around?\n");
2614 return;
2615 } else if(U_FAILURE(status)) {
2616 log_err("Error opening collator\n");
2617 return;
2618 }
2619 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2620 conv = ucnv_open("UTF16BE", &status);
2621 for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
2622 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2623 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2624
2625 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2626
2627 log_verbose("Result of strcoll is %i\n", resU16);
2628
2629 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2630 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2631 (void)U16BELenS; /* Suppress set but not used warnings. */
2632 (void)U16BELenT;
2633
2634 /* use the original sizes, as the result from converter is in bytes */
2635 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2636 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2637
2638 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2639
2640 log_verbose("Result of U16BE is %i\n", resU16BE);
2641
2642 if(resU16 != resU16BE) {
2643 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2644 }
2645
2646 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2647 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2648
2649 uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2650 uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2651
2652 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2653
2654 if(resU16 != resU8) {
2655 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2656 }
2657
2658 }
2659
2660 ucol_close(coll);
2661 ucnv_close(conv);
2662 }
2663
TestHebrewUCA(void)2664 static void TestHebrewUCA(void) {
2665 UErrorCode status = U_ZERO_ERROR;
2666 static const char *first[] = {
2667 "d790d6b8d79cd795d6bcd7a9",
2668 "d790d79cd79ed7a7d799d799d7a1",
2669 "d790d6b4d79ed795d6bcd7a9",
2670 };
2671
2672 char utf8String[3][256];
2673 UChar utf16String[3][256];
2674
2675 int32_t i = 0, j = 0;
2676 int32_t sizeUTF8[3];
2677 int32_t sizeUTF16[3];
2678
2679 UCollator *coll = ucol_open("", &status);
2680 if (U_FAILURE(status)) {
2681 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2682 return;
2683 }
2684 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2685
2686 for(i = 0; i < UPRV_LENGTHOF(first); i++) {
2687 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2688 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2689 log_verbose("%i: ");
2690 for(j = 0; j < sizeUTF16[i]; j++) {
2691 /*log_verbose("\\u%04X", utf16String[i][j]);*/
2692 log_verbose("%04X", utf16String[i][j]);
2693 }
2694 log_verbose("\n");
2695 }
2696 for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2697 for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
2698 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2699 }
2700 }
2701
2702 ucol_close(coll);
2703
2704 }
2705
TestPartialSortKeyTermination(void)2706 static void TestPartialSortKeyTermination(void) {
2707 static const char* cases[] = {
2708 "\\u1234\\u1234\\udc00",
2709 "\\udc00\\ud800\\ud800"
2710 };
2711
2712 int32_t i;
2713
2714 UErrorCode status = U_ZERO_ERROR;
2715
2716 UCollator *coll = ucol_open("", &status);
2717
2718 UCharIterator iter;
2719
2720 UChar currCase[256];
2721 int32_t length = 0;
2722 int32_t pKeyLen = 0;
2723
2724 uint8_t key[256];
2725
2726 for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
2727 uint32_t state[2] = {0, 0};
2728 length = u_unescape(cases[i], currCase, 256);
2729 uiter_setString(&iter, currCase, length);
2730 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2731 (void)pKeyLen; /* Suppress set but not used warning. */
2732
2733 log_verbose("Done\n");
2734
2735 }
2736 ucol_close(coll);
2737 }
2738
TestSettings(void)2739 static void TestSettings(void) {
2740 static const char* cases[] = {
2741 "apple",
2742 "Apple"
2743 };
2744
2745 static const char* locales[] = {
2746 "",
2747 "en"
2748 };
2749
2750 UErrorCode status = U_ZERO_ERROR;
2751
2752 int32_t i = 0, j = 0;
2753
2754 UChar source[256], target[256];
2755 int32_t sLen = 0, tLen = 0;
2756
2757 UCollator *collateObject = NULL;
2758 for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
2759 collateObject = ucol_open(locales[i], &status);
2760 ucol_setStrength(collateObject, UCOL_PRIMARY);
2761 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2762 for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
2763 sLen = u_unescape(cases[j-1], source, 256);
2764 source[sLen] = 0;
2765 tLen = u_unescape(cases[j], target, 256);
2766 source[tLen] = 0;
2767 doTest(collateObject, source, target, UCOL_EQUAL);
2768 }
2769 ucol_close(collateObject);
2770 }
2771 }
2772
TestEqualsForCollator(const char * locName,UCollator * source,UCollator * target)2773 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2774 UErrorCode status = U_ZERO_ERROR;
2775 int32_t errorNo = 0;
2776 const UChar *sourceRules = NULL;
2777 int32_t sourceRulesLen = 0;
2778 UParseError parseError;
2779 UColAttributeValue french = UCOL_OFF;
2780
2781 if(!ucol_equals(source, target)) {
2782 log_err("Same collators, different address not equal\n");
2783 errorNo++;
2784 }
2785 ucol_close(target);
2786 if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2787 target = ucol_safeClone(source, NULL, NULL, &status);
2788 if(U_FAILURE(status)) {
2789 log_err("Error creating clone\n");
2790 errorNo++;
2791 return errorNo;
2792 }
2793 if(!ucol_equals(source, target)) {
2794 log_err("Collator different from it's clone\n");
2795 errorNo++;
2796 }
2797 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2798 if(french == UCOL_ON) {
2799 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2800 } else {
2801 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2802 }
2803 if(U_FAILURE(status)) {
2804 log_err("Error setting attributes\n");
2805 errorNo++;
2806 return errorNo;
2807 }
2808 if(ucol_equals(source, target)) {
2809 log_err("Collators same even when options changed\n");
2810 errorNo++;
2811 }
2812 ucol_close(target);
2813
2814 sourceRules = ucol_getRules(source, &sourceRulesLen);
2815 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2816 if(U_FAILURE(status)) {
2817 log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2818 errorNo++;
2819 return errorNo;
2820 }
2821 /* Note: The tailoring rule string is an optional data item. */
2822 if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2823 log_err("Collator different from collator that was created from the same rules\n");
2824 errorNo++;
2825 }
2826 ucol_close(target);
2827 }
2828 return errorNo;
2829 }
2830
2831
TestEquals(void)2832 static void TestEquals(void) {
2833 /* ucol_equals is not currently a public API. There is a chance that it will become
2834 * something like this.
2835 */
2836 /* test whether the two collators instantiated from the same locale are equal */
2837 UErrorCode status = U_ZERO_ERROR;
2838 UParseError parseError;
2839 int32_t noOfLoc = uloc_countAvailable();
2840 const char *locName = NULL;
2841 UCollator *source = NULL, *target = NULL;
2842 int32_t i = 0;
2843
2844 const char* rules[] = {
2845 "&l < lj <<< Lj <<< LJ",
2846 "&n < nj <<< Nj <<< NJ",
2847 "&ae <<< \\u00e4",
2848 "&AE <<< \\u00c4"
2849 };
2850 /*
2851 const char* badRules[] = {
2852 "&l <<< Lj",
2853 "&n < nj <<< nJ <<< NJ",
2854 "&a <<< \\u00e4",
2855 "&AE <<< \\u00c4 <<< x"
2856 };
2857 */
2858
2859 UChar sourceRules[1024], targetRules[1024];
2860 int32_t sourceRulesSize = 0, targetRulesSize = 0;
2861 int32_t rulesSize = UPRV_LENGTHOF(rules);
2862
2863 for(i = 0; i < rulesSize; i++) {
2864 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2865 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2866 }
2867
2868 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2869 if(status == U_FILE_ACCESS_ERROR) {
2870 log_data_err("Is your data around?\n");
2871 return;
2872 } else if(U_FAILURE(status)) {
2873 log_err("Error opening collator\n");
2874 return;
2875 }
2876 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2877 if(!ucol_equals(source, target)) {
2878 log_err("Equivalent collators not equal!\n");
2879 }
2880 ucol_close(source);
2881 ucol_close(target);
2882
2883 source = ucol_open("root", &status);
2884 target = ucol_open("root", &status);
2885 log_verbose("Testing root\n");
2886 if(!ucol_equals(source, source)) {
2887 log_err("Same collator not equal\n");
2888 }
2889 if(TestEqualsForCollator("root", source, target)) {
2890 log_err("Errors for root\n");
2891 }
2892 ucol_close(source);
2893
2894 for(i = 0; i<noOfLoc; i++) {
2895 status = U_ZERO_ERROR;
2896 locName = uloc_getAvailable(i);
2897 /*if(hasCollationElements(locName)) {*/
2898 log_verbose("Testing equality for locale %s\n", locName);
2899 source = ucol_open(locName, &status);
2900 target = ucol_open(locName, &status);
2901 if (U_FAILURE(status)) {
2902 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status));
2903 continue;
2904 }
2905 if(TestEqualsForCollator(locName, source, target)) {
2906 log_err("Errors for locale %s\n", locName);
2907 }
2908 ucol_close(source);
2909 /*}*/
2910 }
2911 }
2912
TestJ2726(void)2913 static void TestJ2726(void) {
2914 UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2915 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2916 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2917 UErrorCode status = U_ZERO_ERROR;
2918 UCollator *coll = ucol_open("en", &status);
2919 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2920 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2921 doTest(coll, a, aSpace, UCOL_EQUAL);
2922 doTest(coll, aSpace, a, UCOL_EQUAL);
2923 doTest(coll, a, spaceA, UCOL_EQUAL);
2924 doTest(coll, spaceA, a, UCOL_EQUAL);
2925 doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2926 doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2927 ucol_close(coll);
2928 }
2929
NullRule(void)2930 static void NullRule(void) {
2931 UChar r[3] = {0};
2932 UErrorCode status = U_ZERO_ERROR;
2933 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2934 if(U_SUCCESS(status)) {
2935 log_err("This should have been an error!\n");
2936 ucol_close(coll);
2937 } else {
2938 status = U_ZERO_ERROR;
2939 }
2940 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2941 if(U_FAILURE(status)) {
2942 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2943 } else {
2944 ucol_close(coll);
2945 }
2946 }
2947
2948 /**
2949 * Test for CollationElementIterator previous and next for the whole set of
2950 * unicode characters with normalization on.
2951 */
TestNumericCollation(void)2952 static void TestNumericCollation(void)
2953 {
2954 UErrorCode status = U_ZERO_ERROR;
2955
2956 const static char *basicTestStrings[]={
2957 "hello1",
2958 "hello2",
2959 "hello2002",
2960 "hello2003",
2961 "hello123456",
2962 "hello1234567",
2963 "hello10000000",
2964 "hello100000000",
2965 "hello1000000000",
2966 "hello10000000000",
2967 };
2968
2969 const static char *preZeroTestStrings[]={
2970 "avery10000",
2971 "avery010000",
2972 "avery0010000",
2973 "avery00010000",
2974 "avery000010000",
2975 "avery0000010000",
2976 "avery00000010000",
2977 "avery000000010000",
2978 };
2979
2980 const static char *thirtyTwoBitNumericStrings[]={
2981 "avery42949672960",
2982 "avery42949672961",
2983 "avery42949672962",
2984 "avery429496729610"
2985 };
2986
2987 const static char *longNumericStrings[]={
2988 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2989 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2990 are treated as multiple collation elements. */
2991 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2992 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2993 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2994 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2995 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2996 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2997 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
2998 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
2999 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3000 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3001 };
3002
3003 const static char *supplementaryDigits[] = {
3004 "\\uD835\\uDFCE", /* 0 */
3005 "\\uD835\\uDFCF", /* 1 */
3006 "\\uD835\\uDFD0", /* 2 */
3007 "\\uD835\\uDFD1", /* 3 */
3008 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3009 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3010 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3011 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3012 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3013 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3014 };
3015
3016 const static char *foreignDigits[] = {
3017 "\\u0661",
3018 "\\u0662",
3019 "\\u0663",
3020 "\\u0661\\u0660",
3021 "\\u0661\\u0662",
3022 "\\u0661\\u0663",
3023 "\\u0662\\u0660",
3024 "\\u0662\\u0662",
3025 "\\u0662\\u0663",
3026 "\\u0663\\u0660",
3027 "\\u0663\\u0662",
3028 "\\u0663\\u0663"
3029 };
3030
3031 const static char *evenZeroes[] = {
3032 "2000",
3033 "2001",
3034 "2002",
3035 "2003"
3036 };
3037
3038 UColAttribute att = UCOL_NUMERIC_COLLATION;
3039 UColAttributeValue val = UCOL_ON;
3040
3041 /* Open our collator. */
3042 UCollator* coll = ucol_open("root", &status);
3043 if (U_FAILURE(status)){
3044 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3045 myErrorName(status));
3046 return;
3047 }
3048 genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3049 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3050 genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3051 genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3052 genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3053 genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
3054
3055 /* Setting up our collator to do digits. */
3056 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3057 if (U_FAILURE(status)){
3058 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3059 myErrorName(status));
3060 return;
3061 }
3062
3063 /*
3064 Testing that prepended zeroes still yield the correct collation behavior.
3065 We expect that every element in our strings array will be equal.
3066 */
3067 genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
3068
3069 ucol_close(coll);
3070 }
3071
TestTibetanConformance(void)3072 static void TestTibetanConformance(void)
3073 {
3074 const char* test[] = {
3075 "\\u0FB2\\u0591\\u0F71\\u0061",
3076 "\\u0FB2\\u0F71\\u0061"
3077 };
3078
3079 UErrorCode status = U_ZERO_ERROR;
3080 UCollator *coll = ucol_open("", &status);
3081 UChar source[100];
3082 UChar target[100];
3083 int result;
3084 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3085 if (U_SUCCESS(status)) {
3086 u_unescape(test[0], source, 100);
3087 u_unescape(test[1], target, 100);
3088 doTest(coll, source, target, UCOL_EQUAL);
3089 result = ucol_strcoll(coll, source, -1, target, -1);
3090 log_verbose("result %d\n", result);
3091 if (UCOL_EQUAL != result) {
3092 log_err("Tibetan comparison error\n");
3093 }
3094 }
3095 ucol_close(coll);
3096
3097 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3098 }
3099
TestPinyinProblem(void)3100 static void TestPinyinProblem(void) {
3101 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3102 genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
3103 }
3104
3105 /**
3106 * Iterate through the given iterator, checking to see that all the strings
3107 * in the expected array are present.
3108 * @param expected array of strings we expect to see, or NULL
3109 * @param expectedCount number of elements of expected, or 0
3110 */
checkUEnumeration(const char * msg,UEnumeration * iter,const char ** expected,int32_t expectedCount)3111 static int32_t checkUEnumeration(const char* msg,
3112 UEnumeration* iter,
3113 const char** expected,
3114 int32_t expectedCount) {
3115 UErrorCode ec = U_ZERO_ERROR;
3116 int32_t i = 0, n, j, bit;
3117 int32_t seenMask = 0;
3118
3119 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3120 n = uenum_count(iter, &ec);
3121 if (!assertSuccess("count", &ec)) return -1;
3122 log_verbose("%s = [", msg);
3123 for (;; ++i) {
3124 const char* s = uenum_next(iter, NULL, &ec);
3125 if (!assertSuccess("snext", &ec) || s == NULL) break;
3126 if (i != 0) log_verbose(",");
3127 log_verbose("%s", s);
3128 /* check expected list */
3129 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3130 if ((seenMask&bit) == 0 &&
3131 uprv_strcmp(s, expected[j]) == 0) {
3132 seenMask |= bit;
3133 break;
3134 }
3135 }
3136 }
3137 log_verbose("] (%d)\n", i);
3138 assertTrue("count verified", i==n);
3139 /* did we see all expected strings? */
3140 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3141 if ((seenMask&bit)!=0) {
3142 log_verbose("Ok: \"%s\" seen\n", expected[j]);
3143 } else {
3144 log_err("FAIL: \"%s\" not seen\n", expected[j]);
3145 }
3146 }
3147 return n;
3148 }
3149
3150 /**
3151 * Test new API added for separate collation tree.
3152 */
TestSeparateTrees(void)3153 static void TestSeparateTrees(void) {
3154 UErrorCode ec = U_ZERO_ERROR;
3155 UEnumeration *e = NULL;
3156 int32_t n = -1;
3157 UBool isAvailable;
3158 char loc[256];
3159
3160 static const char* AVAIL[] = { "en", "de" };
3161
3162 static const char* KW[] = { "collation" };
3163
3164 static const char* KWVAL[] = { "phonebook", "stroke" };
3165
3166 #if !UCONFIG_NO_SERVICE
3167 e = ucol_openAvailableLocales(&ec);
3168 if (e != NULL) {
3169 assertSuccess("ucol_openAvailableLocales", &ec);
3170 assertTrue("ucol_openAvailableLocales!=0", e!=0);
3171 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
3172 (void)n; /* Suppress set but not used warnings. */
3173 /* Don't need to check n because we check list */
3174 uenum_close(e);
3175 } else {
3176 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3177 }
3178 #endif
3179
3180 e = ucol_getKeywords(&ec);
3181 if (e != NULL) {
3182 assertSuccess("ucol_getKeywords", &ec);
3183 assertTrue("ucol_getKeywords!=0", e!=0);
3184 n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
3185 /* Don't need to check n because we check list */
3186 uenum_close(e);
3187 } else {
3188 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3189 }
3190
3191 e = ucol_getKeywordValues(KW[0], &ec);
3192 if (e != NULL) {
3193 assertSuccess("ucol_getKeywordValues", &ec);
3194 assertTrue("ucol_getKeywordValues!=0", e!=0);
3195 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3196 /* Don't need to check n because we check list */
3197 uenum_close(e);
3198 } else {
3199 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3200 }
3201
3202 /* Try setting a warning before calling ucol_getKeywordValues */
3203 ec = U_USING_FALLBACK_WARNING;
3204 e = ucol_getKeywordValues(KW[0], &ec);
3205 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3206 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3207 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3208 /* Don't need to check n because we check list */
3209 uenum_close(e);
3210 }
3211
3212 /*
3213 U_DRAFT int32_t U_EXPORT2
3214 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3215 const char* locale, UBool* isAvailable,
3216 UErrorCode* status);
3217 }
3218 */
3219 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3220 &isAvailable, &ec);
3221 if (assertSuccess("getFunctionalEquivalent", &ec)) {
3222 assertEquals("getFunctionalEquivalent(de)", "root", loc);
3223 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3224 isAvailable == TRUE);
3225 }
3226
3227 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3228 &isAvailable, &ec);
3229 if (assertSuccess("getFunctionalEquivalent", &ec)) {
3230 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3231 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3232 isAvailable == FALSE);
3233 }
3234 }
3235
3236 /* supercedes TestJ784 */
TestBeforePinyin(void)3237 static void TestBeforePinyin(void) {
3238 const static char rules[] = {
3239 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3240 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3241 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3242 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3243 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3244 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3245 };
3246
3247 const static char *test[] = {
3248 "l\\u0101",
3249 "la",
3250 "l\\u0101n",
3251 "lan ",
3252 "l\\u0113",
3253 "le",
3254 "l\\u0113n",
3255 "len"
3256 };
3257
3258 const static char *test2[] = {
3259 "x\\u0101",
3260 "x\\u0100",
3261 "X\\u0101",
3262 "X\\u0100",
3263 "x\\u00E1",
3264 "x\\u00C1",
3265 "X\\u00E1",
3266 "X\\u00C1",
3267 "x\\u01CE",
3268 "x\\u01CD",
3269 "X\\u01CE",
3270 "X\\u01CD",
3271 "x\\u00E0",
3272 "x\\u00C0",
3273 "X\\u00E0",
3274 "X\\u00C0",
3275 "xa",
3276 "xA",
3277 "Xa",
3278 "XA",
3279 "x\\u0101x",
3280 "x\\u0100x",
3281 "x\\u00E1x",
3282 "x\\u00C1x",
3283 "x\\u01CEx",
3284 "x\\u01CDx",
3285 "x\\u00E0x",
3286 "x\\u00C0x",
3287 "xax",
3288 "xAx"
3289 };
3290
3291 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3292 genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3293 genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3294 genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3295 }
3296
TestBeforeTightening(void)3297 static void TestBeforeTightening(void) {
3298 static const struct {
3299 const char *rules;
3300 UErrorCode expectedStatus;
3301 } tests[] = {
3302 { "&[before 1]a<x", U_ZERO_ERROR },
3303 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3304 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3305 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3306 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3307 { "&[before 2]a<<x",U_ZERO_ERROR },
3308 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3309 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3310 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },
3311 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },
3312 { "&[before 3]a<<<x",U_ZERO_ERROR },
3313 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },
3314 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3315 };
3316
3317 int32_t i = 0;
3318
3319 UErrorCode status = U_ZERO_ERROR;
3320 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3321 uint32_t rlen = 0;
3322
3323 UCollator *coll = NULL;
3324
3325
3326 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3327 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3328 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3329 if(status != tests[i].expectedStatus) {
3330 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3331 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3332 }
3333 ucol_close(coll);
3334 status = U_ZERO_ERROR;
3335 }
3336
3337 }
3338
3339 /*
3340 &m < a
3341 &[before 1] a < x <<< X << q <<< Q < z
3342 assert: m <<< M < x <<< X << q <<< Q < z < a < n
3343
3344 &m < a
3345 &[before 2] a << x <<< X << q <<< Q < z
3346 assert: m <<< M < x <<< X << q <<< Q << a < z < n
3347
3348 &m < a
3349 &[before 3] a <<< x <<< X << q <<< Q < z
3350 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3351
3352
3353 &m << a
3354 &[before 1] a < x <<< X << q <<< Q < z
3355 assert: x <<< X << q <<< Q < z < m <<< M << a < n
3356
3357 &m << a
3358 &[before 2] a << x <<< X << q <<< Q < z
3359 assert: m <<< M << x <<< X << q <<< Q << a < z < n
3360
3361 &m << a
3362 &[before 3] a <<< x <<< X << q <<< Q < z
3363 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3364
3365
3366 &m <<< a
3367 &[before 1] a < x <<< X << q <<< Q < z
3368 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3369
3370 &m <<< a
3371 &[before 2] a << x <<< X << q <<< Q < z
3372 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n
3373
3374 &m <<< a
3375 &[before 3] a <<< x <<< X << q <<< Q < z
3376 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n
3377
3378
3379 &[before 1] s < x <<< X << q <<< Q < z
3380 assert: r <<< R < x <<< X << q <<< Q < z < s < n
3381
3382 &[before 2] s << x <<< X << q <<< Q < z
3383 assert: r <<< R < x <<< X << q <<< Q << s < z < n
3384
3385 &[before 3] s <<< x <<< X << q <<< Q < z
3386 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3387
3388
3389 &[before 1] \u24DC < x <<< X << q <<< Q < z
3390 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3391
3392 &[before 2] \u24DC << x <<< X << q <<< Q < z
3393 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3394
3395 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
3396 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n
3397 */
3398
3399
3400 #if 0
3401 /* requires features not yet supported */
3402 static void TestMoreBefore(void) {
3403 static const struct {
3404 const char* rules;
3405 const char* order[16];
3406 int32_t size;
3407 } tests[] = {
3408 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3409 { "m","M","x","X","q","Q","z","a","n" }, 9},
3410 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3411 { "m","M","x","X","q","Q","a","z","n" }, 9},
3412 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3413 { "m","M","x","X","a","q","Q","z","n" }, 9},
3414 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3415 { "x","X","q","Q","z","m","M","a","n" }, 9},
3416 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3417 { "m","M","x","X","q","Q","a","z","n" }, 9},
3418 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3419 { "m","M","x","X","a","q","Q","z","n" }, 9},
3420 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3421 { "x","X","q","Q","z","n","m","a","M" }, 9},
3422 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3423 { "x","X","q","Q","m","a","M","z","n" }, 9},
3424 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3425 { "m","x","X","a","M","q","Q","z","n" }, 9},
3426 { "&[before 1] s < x <<< X << q <<< Q < z",
3427 { "r","R","x","X","q","Q","z","s","n" }, 9},
3428 { "&[before 2] s << x <<< X << q <<< Q < z",
3429 { "r","R","x","X","q","Q","s","z","n" }, 9},
3430 { "&[before 3] s <<< x <<< X << q <<< Q < z",
3431 { "r","R","x","X","s","q","Q","z","n" }, 9},
3432 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3433 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3434 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3435 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3436 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3437 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3438 };
3439
3440 int32_t i = 0;
3441
3442 for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3443 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3444 }
3445 }
3446 #endif
3447
TestTailorNULL(void)3448 static void TestTailorNULL( void ) {
3449 const static char* rule = "&a <<< '\\u0000'";
3450 UErrorCode status = U_ZERO_ERROR;
3451 UChar rlz[RULE_BUFFER_LEN] = { 0 };
3452 uint32_t rlen = 0;
3453 UChar a = 1, null = 0;
3454 UCollationResult res = UCOL_EQUAL;
3455
3456 UCollator *coll = NULL;
3457
3458
3459 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3460 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3461
3462 if(U_FAILURE(status)) {
3463 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3464 } else {
3465 res = ucol_strcoll(coll, &a, 1, &null, 1);
3466
3467 if(res != UCOL_LESS) {
3468 log_err("NULL was not tailored properly!\n");
3469 }
3470 }
3471
3472 ucol_close(coll);
3473 }
3474
3475 static void
TestUpperFirstQuaternary(void)3476 TestUpperFirstQuaternary(void)
3477 {
3478 const char* tests[] = { "B", "b", "Bb", "bB" };
3479 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3480 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3481 genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
3482 }
3483
3484 static void
TestJ4960(void)3485 TestJ4960(void)
3486 {
3487 const char* tests[] = { "\\u00e2T", "aT" };
3488 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3489 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3490 const char* tests2[] = { "a", "A" };
3491 const char* rule = "&[first tertiary ignorable]=A=a";
3492 UColAttribute att2[] = { UCOL_CASE_LEVEL };
3493 UColAttributeValue attVals2[] = { UCOL_ON };
3494 /* Test whether we correctly ignore primary ignorables on case level when */
3495 /* we have only primary & case level */
3496 genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
3497 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3498 /* and case level */
3499 genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
3500 /* Test whether completely ignorable letters have case level info (they shouldn't) */
3501 genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
3502 }
3503
3504 static void
TestJ5223(void)3505 TestJ5223(void)
3506 {
3507 static const char *test = "this is a test string";
3508 UChar ustr[256];
3509 int32_t ustr_length = u_unescape(test, ustr, 256);
3510 unsigned char sortkey[256];
3511 int32_t sortkey_length;
3512 UErrorCode status = U_ZERO_ERROR;
3513 static UCollator *coll = NULL;
3514 coll = ucol_open("root", &status);
3515 if(U_FAILURE(status)) {
3516 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3517 return;
3518 }
3519 ucol_setStrength(coll, UCOL_PRIMARY);
3520 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3521 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3522 if (U_FAILURE(status)) {
3523 log_err("Failed setting atributes\n");
3524 return;
3525 }
3526 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3527 if (sortkey_length > 256) return;
3528
3529 /* we mark the position where the null byte should be written in advance */
3530 sortkey[sortkey_length-1] = 0xAA;
3531
3532 /* we set the buffer size one byte higher than needed */
3533 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3534 sortkey_length+1);
3535
3536 /* no error occurs (for me) */
3537 if (sortkey[sortkey_length-1] == 0xAA) {
3538 log_err("Hit bug at first try\n");
3539 }
3540
3541 /* we mark the position where the null byte should be written again */
3542 sortkey[sortkey_length-1] = 0xAA;
3543
3544 /* this time we set the buffer size to the exact amount needed */
3545 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3546 sortkey_length);
3547
3548 /* now the trailing null byte is not written */
3549 if (sortkey[sortkey_length-1] == 0xAA) {
3550 log_err("Hit bug at second try\n");
3551 }
3552
3553 ucol_close(coll);
3554 }
3555
3556 /* Regression test for Thai partial sort key problem */
3557 static void
TestJ5232(void)3558 TestJ5232(void)
3559 {
3560 const static char *test[] = {
3561 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3562 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3563 };
3564
3565 genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
3566 }
3567
3568 static void
TestJ5367(void)3569 TestJ5367(void)
3570 {
3571 const static char *test[] = { "a", "y" };
3572 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3573 genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3574 }
3575
3576 static void
TestVI5913(void)3577 TestVI5913(void)
3578 {
3579 UErrorCode status = U_ZERO_ERROR;
3580 int32_t i, j;
3581 UCollator *coll =NULL;
3582 uint8_t resColl[100], expColl[100];
3583 int32_t rLen, tLen, ruleLen, sLen, kLen;
3584 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypogegrammeni*/
3585 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
3586 /*
3587 * Note: Just tailoring &z<ae^ does not work as expected:
3588 * The UCA spec requires for discontiguous contractions that they
3589 * extend an *existing match* by one combining mark at a time.
3590 * Therefore, ae must be a contraction so that the builder finds
3591 * discontiguous contractions for ae^, for example with an intervening underdot.
3592 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3593 */
3594 UChar rule3[256]={
3595 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */
3596 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/
3597 0};
3598 static const UChar tData[][20]={
3599 {0x1EAC, 0},
3600 {0x0041, 0x0323, 0x0302, 0},
3601 {0x1EA0, 0x0302, 0},
3602 {0x00C2, 0x0323, 0},
3603 {0x1ED8, 0}, /* O with dot and circumflex */
3604 {0x1ECC, 0x0302, 0},
3605 {0x1EB7, 0},
3606 {0x1EA1, 0x0306, 0},
3607 };
3608 static const UChar tailorData[][20]={
3609 {0x1FA2, 0}, /* Omega with 3 combining marks */
3610 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3611 {0x1FF3, 0x0313, 0x0300, 0},
3612 {0x1F60, 0x0300, 0x0345, 0},
3613 {0x1F62, 0x0345, 0},
3614 {0x1FA0, 0x0300, 0},
3615 };
3616 static const UChar tailorData2[][20]={
3617 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
3618 {0x0073, 0x0323, 0x030C, 0},
3619 {0x0073, 0x030C, 0x0323, 0},
3620 };
3621 static const UChar tailorData3[][20]={
3622 {0x007a, 0}, /* z */
3623 {0x0061, 0x0065, 0}, /* a + e */
3624 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3625 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
3626 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3627 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
3628 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
3629 {0x00EA, 0}, /* e with circumflex */
3630 };
3631
3632 /* Test Vietnamese sort. */
3633 coll = ucol_open("vi", &status);
3634 if(U_FAILURE(status)) {
3635 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3636 return;
3637 }
3638 log_verbose("\n\nVI collation:");
3639 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3640 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3641 }
3642 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3643 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3644 }
3645 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3646 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3647 }
3648 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3649 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3650 }
3651
3652 for (j=0; j<8; j++) {
3653 tLen = u_strlen(tData[j]);
3654 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3655 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3656 for(i = 0; i<rLen; i++) {
3657 log_verbose(" %02X", resColl[i]);
3658 }
3659 }
3660
3661 ucol_close(coll);
3662
3663 /* Test Romanian sort. */
3664 coll = ucol_open("ro", &status);
3665 log_verbose("\n\nRO collation:");
3666 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3667 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3668 }
3669 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3670 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3671 }
3672 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3673 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3674 }
3675
3676 for (j=4; j<8; j++) {
3677 tLen = u_strlen(tData[j]);
3678 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);
3679 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3680 for(i = 0; i<rLen; i++) {
3681 log_verbose(" %02X", resColl[i]);
3682 }
3683 }
3684 ucol_close(coll);
3685
3686 /* Test the precomposed Greek character with 3 combining marks. */
3687 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3688 ruleLen = u_strlen(rule);
3689 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3690 if (U_FAILURE(status)) {
3691 log_err("ucol_openRules failed with %s\n", u_errorName(status));
3692 return;
3693 }
3694 sLen = u_strlen(tailorData[0]);
3695 for (j=1; j<6; j++) {
3696 tLen = u_strlen(tailorData[j]);
3697 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {
3698 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3699 }
3700 }
3701 /* Test getSortKey. */
3702 tLen = u_strlen(tailorData[0]);
3703 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3704 for (j=0; j<6; j++) {
3705 tLen = u_strlen(tailorData[j]);
3706 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3707 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3708 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3709 for(i = 0; i<rLen; i++) {
3710 log_err(" %02X", resColl[i]);
3711 }
3712 }
3713 }
3714 ucol_close(coll);
3715
3716 log_verbose("\n\nTailoring test for s with caron:");
3717 ruleLen = u_strlen(rule2);
3718 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3719 tLen = u_strlen(tailorData2[0]);
3720 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3721 for (j=1; j<3; j++) {
3722 tLen = u_strlen(tailorData2[j]);
3723 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3724 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3725 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);
3726 for(i = 0; i<rLen; i++) {
3727 log_err(" %02X", resColl[i]);
3728 }
3729 }
3730 }
3731 ucol_close(coll);
3732
3733 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3734 ruleLen = u_strlen(rule3);
3735 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3736 tLen = u_strlen(tailorData3[3]);
3737 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3738 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3739 for(i = 0; i<kLen; i++) {
3740 log_verbose(" %02X", expColl[i]);
3741 }
3742 for (j=4; j<6; j++) {
3743 tLen = u_strlen(tailorData3[j]);
3744 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3745
3746 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3747 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3748 for(i = 0; i<rLen; i++) {
3749 log_err(" %02X", resColl[i]);
3750 }
3751 }
3752
3753 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3754 for(i = 0; i<rLen; i++) {
3755 log_verbose(" %02X", resColl[i]);
3756 }
3757 }
3758 ucol_close(coll);
3759 }
3760
3761 static void
TestTailor6179(void)3762 TestTailor6179(void)
3763 {
3764 UErrorCode status = U_ZERO_ERROR;
3765 int32_t i;
3766 UCollator *coll =NULL;
3767 uint8_t resColl[100];
3768 int32_t rLen, tLen, ruleLen;
3769 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
3770 static const UChar rule1[]={
3771 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3772 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3773 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3774 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3775 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3776 static const UChar rule2[]={
3777 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3778 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3779 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3780 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3781 0x3C,0x3C,0x20,0x62,0};
3782
3783 static const UChar tData1[][4]={
3784 {0x61, 0},
3785 {0x62, 0},
3786 { 0xFDD0,0x009E, 0}
3787 };
3788 static const UChar tData2[][4]={
3789 {0x61, 0},
3790 {0x62, 0},
3791 { 0xFDD0,0x009E, 0}
3792 };
3793
3794 /*
3795 * These values from FractionalUCA.txt will change,
3796 * and need to be updated here.
3797 * TODO: Make this not check for particular sort keys.
3798 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3799 */
3800 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3801 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3802 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3803 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3804
3805 UParseError parseError;
3806
3807 /* Test [Last Primary ignorable] */
3808
3809 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
3810 ruleLen = u_strlen(rule1);
3811 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3812 if (U_FAILURE(status)) {
3813 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3814 return;
3815 }
3816 tLen = u_strlen(tData1[0]);
3817 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3818 if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3819 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);
3820 for(i = 0; i<rLen; i++) {
3821 log_err(" %02X", resColl[i]);
3822 }
3823 log_err("\n");
3824 }
3825 tLen = u_strlen(tData1[1]);
3826 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3827 if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3828 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);
3829 for(i = 0; i<rLen; i++) {
3830 log_err(" %02X", resColl[i]);
3831 }
3832 log_err("\n");
3833 }
3834 ucol_close(coll);
3835
3836
3837 /* Test [Last Secondary ignorable] */
3838 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
3839 ruleLen = u_strlen(rule2);
3840 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3841 if (U_FAILURE(status)) {
3842 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3843 log_info(" offset=%d \"%s\" | \"%s\"\n",
3844 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3845 return;
3846 }
3847 tLen = u_strlen(tData2[0]);
3848 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3849 if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3850 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);
3851 for(i = 0; i<rLen; i++) {
3852 log_err(" %02X", resColl[i]);
3853 }
3854 log_err("\n");
3855 }
3856 tLen = u_strlen(tData2[1]);
3857 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3858 if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3859 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);
3860 for(i = 0; i<rLen; i++) {
3861 log_err(" %02X", resColl[i]);
3862 }
3863 log_err("\n");
3864 }
3865 ucol_close(coll);
3866 }
3867
3868 static void
TestUCAPrecontext(void)3869 TestUCAPrecontext(void)
3870 {
3871 UErrorCode status = U_ZERO_ERROR;
3872 int32_t i, j;
3873 UCollator *coll =NULL;
3874 uint8_t resColl[100], prevColl[100];
3875 int32_t rLen, tLen, ruleLen;
3876 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3877 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3878 /* & l middle-dot << a a is an expansion. */
3879
3880 UChar tData1[][20]={
3881 { 0xb7, 0}, /* standalone middle dot(0xb7) */
3882 { 0x387, 0}, /* standalone middle dot(0x387) */
3883 { 0x61, 0}, /* a */
3884 { 0x6C, 0}, /* l */
3885 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
3886 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
3887 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3888 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
3889 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3890 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
3891 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
3892 };
3893
3894 log_verbose("\n\nEN collation:");
3895 coll = ucol_open("en", &status);
3896 if (U_FAILURE(status)) {
3897 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3898 return;
3899 }
3900 for (j=0; j<11; j++) {
3901 tLen = u_strlen(tData1[j]);
3902 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3903 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3904 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3905 j, tData1[j]);
3906 }
3907 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3908 for(i = 0; i<rLen; i++) {
3909 log_verbose(" %02X", resColl[i]);
3910 }
3911 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3912 }
3913 ucol_close(coll);
3914
3915
3916 log_verbose("\n\nJA collation:");
3917 coll = ucol_open("ja", &status);
3918 if (U_FAILURE(status)) {
3919 log_err("Tailoring test: &z <<a|- failed!");
3920 return;
3921 }
3922 for (j=0; j<11; j++) {
3923 tLen = u_strlen(tData1[j]);
3924 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3925 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3926 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3927 j, tData1[j]);
3928 }
3929 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3930 for(i = 0; i<rLen; i++) {
3931 log_verbose(" %02X", resColl[i]);
3932 }
3933 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3934 }
3935 ucol_close(coll);
3936
3937
3938 log_verbose("\n\nTailoring test: & middle dot < a ");
3939 ruleLen = u_strlen(rule1);
3940 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3941 if (U_FAILURE(status)) {
3942 log_err("Tailoring test: & middle dot < a failed!");
3943 return;
3944 }
3945 for (j=0; j<11; j++) {
3946 tLen = u_strlen(tData1[j]);
3947 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3948 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3949 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3950 j, tData1[j]);
3951 }
3952 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3953 for(i = 0; i<rLen; i++) {
3954 log_verbose(" %02X", resColl[i]);
3955 }
3956 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3957 }
3958 ucol_close(coll);
3959
3960
3961 log_verbose("\n\nTailoring test: & l middle-dot << a ");
3962 ruleLen = u_strlen(rule2);
3963 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3964 if (U_FAILURE(status)) {
3965 log_err("Tailoring test: & l middle-dot << a failed!");
3966 return;
3967 }
3968 for (j=0; j<11; j++) {
3969 tLen = u_strlen(tData1[j]);
3970 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3971 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3972 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3973 j, tData1[j]);
3974 }
3975 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3976 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3977 j, tData1[j]);
3978 }
3979 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);
3980 for(i = 0; i<rLen; i++) {
3981 log_verbose(" %02X", resColl[i]);
3982 }
3983 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3984 }
3985 ucol_close(coll);
3986 }
3987
3988 static void
TestOutOfBuffer5468(void)3989 TestOutOfBuffer5468(void)
3990 {
3991 static const char *test = "\\u4e00";
3992 UChar ustr[256];
3993 int32_t ustr_length = u_unescape(test, ustr, 256);
3994 unsigned char shortKeyBuf[1];
3995 int32_t sortkey_length;
3996 UErrorCode status = U_ZERO_ERROR;
3997 static UCollator *coll = NULL;
3998
3999 coll = ucol_open("root", &status);
4000 if(U_FAILURE(status)) {
4001 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4002 return;
4003 }
4004 ucol_setStrength(coll, UCOL_PRIMARY);
4005 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4006 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4007 if (U_FAILURE(status)) {
4008 log_err("Failed setting atributes\n");
4009 return;
4010 }
4011
4012 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4013 if (sortkey_length != 4) {
4014 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);
4015 }
4016 log_verbose("length of sortKey is %d", sortkey_length);
4017 ucol_close(coll);
4018 }
4019
4020 #define TSKC_DATA_SIZE 5
4021 #define TSKC_BUF_SIZE 50
4022 static void
TestSortKeyConsistency(void)4023 TestSortKeyConsistency(void)
4024 {
4025 UErrorCode icuRC = U_ZERO_ERROR;
4026 UCollator* ucol;
4027 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4028
4029 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4030 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4031 int32_t i, j, i2;
4032
4033 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4034 if (U_FAILURE(icuRC))
4035 {
4036 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4037 return;
4038 }
4039
4040 for (i = 0; i < TSKC_DATA_SIZE; i++)
4041 {
4042 UCharIterator uiter;
4043 uint32_t state[2] = { 0, 0 };
4044 int32_t dataLen = i+1;
4045 for (j=0; j<TSKC_BUF_SIZE; j++)
4046 bufFull[i][j] = bufPart[i][j] = 0;
4047
4048 /* Full sort key */
4049 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4050
4051 /* Partial sort key */
4052 uiter_setString(&uiter, data, dataLen);
4053 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4054 if (U_FAILURE(icuRC))
4055 {
4056 log_err("ucol_nextSortKeyPart failed\n");
4057 ucol_close(ucol);
4058 return;
4059 }
4060
4061 for (i2=0; i2<i; i2++)
4062 {
4063 UBool fullMatch = TRUE;
4064 UBool partMatch = TRUE;
4065 for (j=0; j<TSKC_BUF_SIZE; j++)
4066 {
4067 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4068 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4069 }
4070 if (fullMatch != partMatch) {
4071 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4072 : "partial key was consistent, but full key changed\n");
4073 ucol_close(ucol);
4074 return;
4075 }
4076 }
4077 }
4078
4079 /*=============================================*/
4080 ucol_close(ucol);
4081 }
4082
4083 /* ticket: 6101 */
TestCroatianSortKey(void)4084 static void TestCroatianSortKey(void) {
4085 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4086 UErrorCode status = U_ZERO_ERROR;
4087 UCollator *ucol;
4088 UCharIterator iter;
4089
4090 static const UChar text[] = { 0x0044, 0xD81A };
4091
4092 size_t length = UPRV_LENGTHOF(text);
4093
4094 uint8_t textSortKey[32];
4095 size_t lenSortKey = 32;
4096 size_t actualSortKeyLen;
4097 uint32_t uStateInfo[2] = { 0, 0 };
4098
4099 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4100 if (U_FAILURE(status)) {
4101 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4102 return;
4103 }
4104
4105 uiter_setString(&iter, text, length);
4106
4107 actualSortKeyLen = ucol_nextSortKeyPart(
4108 ucol, &iter, (uint32_t*)uStateInfo,
4109 textSortKey, lenSortKey, &status
4110 );
4111
4112 if (actualSortKeyLen == lenSortKey) {
4113 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4114 }
4115
4116 ucol_close(ucol);
4117 }
4118
4119 /* ticket: 6140 */
4120 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4121 * they are both Hiragana and Katakana
4122 */
4123 #define SORTKEYLEN 50
TestHiragana(void)4124 static void TestHiragana(void) {
4125 UErrorCode status = U_ZERO_ERROR;
4126 UCollator* ucol;
4127 UCollationResult strcollresult;
4128 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4129 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4130 int32_t data1Len = UPRV_LENGTHOF(data1);
4131 int32_t data2Len = UPRV_LENGTHOF(data2);
4132 int32_t i, j;
4133 uint8_t sortKey1[SORTKEYLEN];
4134 uint8_t sortKey2[SORTKEYLEN];
4135
4136 UCharIterator uiter1;
4137 UCharIterator uiter2;
4138 uint32_t state1[2] = { 0, 0 };
4139 uint32_t state2[2] = { 0, 0 };
4140 int32_t keySize1;
4141 int32_t keySize2;
4142
4143 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4144 &status);
4145 if (U_FAILURE(status)) {
4146 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4147 return;
4148 }
4149
4150 /* Start of full sort keys */
4151 /* Full sort key1 */
4152 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4153 /* Full sort key2 */
4154 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4155 if (keySize1 == keySize2) {
4156 for (i = 0; i < keySize1; i++) {
4157 if (sortKey1[i] != sortKey2[i]) {
4158 log_err("Full sort keys are different. Should be equal.");
4159 }
4160 }
4161 } else {
4162 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4163 }
4164 /* End of full sort keys */
4165
4166 /* Start of partial sort keys */
4167 /* Partial sort key1 */
4168 uiter_setString(&uiter1, data1, data1Len);
4169 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4170 /* Partial sort key2 */
4171 uiter_setString(&uiter2, data2, data2Len);
4172 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4173 if (U_SUCCESS(status) && keySize1 == keySize2) {
4174 for (j = 0; j < keySize1; j++) {
4175 if (sortKey1[j] != sortKey2[j]) {
4176 log_err("Partial sort keys are different. Should be equal");
4177 }
4178 }
4179 } else {
4180 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4181 }
4182 /* End of partial sort keys */
4183
4184 /* Start of strcoll */
4185 /* Use ucol_strcoll() to determine ordering */
4186 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4187 if (strcollresult != UCOL_EQUAL) {
4188 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4189 }
4190
4191 ucol_close(ucol);
4192 }
4193
4194 /* Convenient struct for running collation tests */
4195 typedef struct {
4196 const UChar source[MAX_TOKEN_LEN]; /* String on left */
4197 const UChar target[MAX_TOKEN_LEN]; /* String on right */
4198 UCollationResult result; /* -1, 0 or +1, depending on collation */
4199 } OneTestCase;
4200
4201 /*
4202 * Utility function to test one collation test case.
4203 * @param testcases Array of test cases.
4204 * @param n_testcases Size of the array testcases.
4205 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
4206 * @param n_rules Size of the array str_rules.
4207 */
doTestOneTestCase(const OneTestCase testcases[],int n_testcases,const char * str_rules[],int n_rules)4208 static void doTestOneTestCase(const OneTestCase testcases[],
4209 int n_testcases,
4210 const char* str_rules[],
4211 int n_rules)
4212 {
4213 int rule_no, testcase_no;
4214 UChar rule[500];
4215 int32_t length = 0;
4216 UErrorCode status = U_ZERO_ERROR;
4217 UParseError parse_error;
4218 UCollator *myCollation;
4219
4220 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4221
4222 length = u_unescape(str_rules[rule_no], rule, 500);
4223 if (length == 0) {
4224 log_err("ERROR: The rule cannot be unescaped: %s\n");
4225 return;
4226 }
4227 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4228 if(U_FAILURE(status)){
4229 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4230 log_info(" offset=%d \"%s\" | \"%s\"\n",
4231 parse_error.offset,
4232 aescstrdup(parse_error.preContext, -1),
4233 aescstrdup(parse_error.postContext, -1));
4234 return;
4235 }
4236 log_verbose("Testing the <<* syntax\n");
4237 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4238 ucol_setStrength(myCollation, UCOL_TERTIARY);
4239 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4240 doTest(myCollation,
4241 testcases[testcase_no].source,
4242 testcases[testcase_no].target,
4243 testcases[testcase_no].result
4244 );
4245 }
4246 ucol_close(myCollation);
4247 }
4248 }
4249
4250 const static OneTestCase rangeTestcases[] = {
4251 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */
4252 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */
4253 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */
4254
4255 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */
4256 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */
4257 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */
4258 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */
4259 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */
4260
4261 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */
4262 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */
4263 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */
4264 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */
4265
4266 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */
4267 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */
4268 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */
4269 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */
4270 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */
4271 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */
4272 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */
4273 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */
4274 };
4275
4276 static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
4277
4278 const static OneTestCase rangeTestcasesSupplemental[] = {
4279 { {0x4e00}, {0xfffb}, UCOL_LESS }, /* U+4E00 < U+FFFB */
4280 { {0xfffb}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFB < U+10000 */
4281 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */
4282 { {0x4e00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+4E00 < U+10001 */
4283 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
4284 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */
4285 { {0x4e00}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+4E00 < U+10001 */
4286 };
4287
4288 static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
4289
4290 const static OneTestCase rangeTestcasesQwerty[] = {
4291 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */
4292 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */
4293
4294 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */
4295 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */
4296
4297 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */
4298 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */
4299
4300 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */
4301 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */
4302
4303 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4304 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */
4305 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4306 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */
4307 };
4308
4309 static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
4310
TestSameStrengthList(void)4311 static void TestSameStrengthList(void)
4312 {
4313 const char* strRules[] = {
4314 /* Normal */
4315 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
4316
4317 /* Lists */
4318 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4319 };
4320 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4321 }
4322
TestSameStrengthListQuoted(void)4323 static void TestSameStrengthListQuoted(void)
4324 {
4325 const char* strRules[] = {
4326 /* Lists with quoted characters */
4327 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4328 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4329
4330 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4331 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4332
4333 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
4334 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4335 };
4336 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4337 }
4338
TestSameStrengthListSupplemental(void)4339 static void TestSameStrengthListSupplemental(void)
4340 {
4341 const char* strRules[] = {
4342 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4343 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4344 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4345 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4346 };
4347 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4348 }
4349
TestSameStrengthListQwerty(void)4350 static void TestSameStrengthListQwerty(void)
4351 {
4352 const char* strRules[] = {
4353 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4354 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4355 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4356 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4357 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4358
4359 /* Quoted characters also will work if two quoted characters are not consecutive. */
4360 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4361
4362 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4363 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4364
4365 };
4366 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4367 }
4368
TestSameStrengthListQuotedQwerty(void)4369 static void TestSameStrengthListQuotedQwerty(void)
4370 {
4371 const char* strRules[] = {
4372 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
4373 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
4374 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
4375
4376 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4377 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4378 };
4379 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4380 }
4381
TestSameStrengthListRanges(void)4382 static void TestSameStrengthListRanges(void)
4383 {
4384 const char* strRules[] = {
4385 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4386 };
4387 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4388 }
4389
TestSameStrengthListSupplementalRanges(void)4390 static void TestSameStrengthListSupplementalRanges(void)
4391 {
4392 const char* strRules[] = {
4393 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4394 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4395 };
4396 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4397 }
4398
TestSpecialCharacters(void)4399 static void TestSpecialCharacters(void)
4400 {
4401 const char* strRules[] = {
4402 /* Normal */
4403 "&';'<'+'<','<'-'<'&'<'*'",
4404
4405 /* List */
4406 "&';'<*'+,-&*'",
4407
4408 /* Range */
4409 "&';'<*'+'-'-&*'",
4410 };
4411
4412 const static OneTestCase specialCharacterStrings[] = {
4413 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */
4414 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */
4415 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */
4416 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */
4417 };
4418 doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4419 }
4420
TestPrivateUseCharacters(void)4421 static void TestPrivateUseCharacters(void)
4422 {
4423 const char* strRules[] = {
4424 /* Normal */
4425 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4426 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4427 };
4428
4429 const static OneTestCase privateUseCharacterStrings[] = {
4430 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4431 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4432 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4433 { {0xe2da}, {0xe2db}, UCOL_LESS },
4434 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4435 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4436 };
4437 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4438 }
4439
TestPrivateUseCharactersInList(void)4440 static void TestPrivateUseCharactersInList(void)
4441 {
4442 const char* strRules[] = {
4443 /* List */
4444 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4445 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4446 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4447 };
4448
4449 const static OneTestCase privateUseCharacterStrings[] = {
4450 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4451 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4452 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4453 { {0xe2da}, {0xe2db}, UCOL_LESS },
4454 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4455 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4456 };
4457 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4458 }
4459
TestPrivateUseCharactersInRange(void)4460 static void TestPrivateUseCharactersInRange(void)
4461 {
4462 const char* strRules[] = {
4463 /* Range */
4464 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4465 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4466 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4467 };
4468
4469 const static OneTestCase privateUseCharacterStrings[] = {
4470 { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4471 { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4472 { {0xe2d9}, {0xe2da}, UCOL_LESS },
4473 { {0xe2da}, {0xe2db}, UCOL_LESS },
4474 { {0xe2db}, {0xe2dc}, UCOL_LESS },
4475 { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4476 };
4477 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4478 }
4479
TestInvalidListsAndRanges(void)4480 static void TestInvalidListsAndRanges(void)
4481 {
4482 const char* invalidRules[] = {
4483 /* Range not in starred expression */
4484 "&\\ufffe<\\uffff-\\U00010002",
4485
4486 /* Range without start */
4487 "&a<*-c",
4488
4489 /* Range without end */
4490 "&a<*b-",
4491
4492 /* More than one hyphen */
4493 "&a<*b-g-l",
4494
4495 /* Range in the wrong order */
4496 "&a<*k-b",
4497
4498 };
4499
4500 UChar rule[500];
4501 UErrorCode status = U_ZERO_ERROR;
4502 UParseError parse_error;
4503 int n_rules = UPRV_LENGTHOF(invalidRules);
4504 int rule_no;
4505 int length;
4506 UCollator *myCollation;
4507
4508 for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4509
4510 length = u_unescape(invalidRules[rule_no], rule, 500);
4511 if (length == 0) {
4512 log_err("ERROR: The rule cannot be unescaped: %s\n");
4513 return;
4514 }
4515 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4516 (void)myCollation; /* Suppress set but not used warning. */
4517 if(!U_FAILURE(status)){
4518 log_err("ERROR: Could not cause a failure as expected: \n");
4519 }
4520 status = U_ZERO_ERROR;
4521 }
4522 }
4523
4524 /*
4525 * This test ensures that characters placed before a character in a different script have the same lead byte
4526 * in their collation key before and after script reordering.
4527 */
TestBeforeRuleWithScriptReordering(void)4528 static void TestBeforeRuleWithScriptReordering(void)
4529 {
4530 UParseError error;
4531 UErrorCode status = U_ZERO_ERROR;
4532 UCollator *myCollation;
4533 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4534 UChar rules[500];
4535 uint32_t rulesLength = 0;
4536 int32_t reorderCodes[1] = {USCRIPT_GREEK};
4537 UCollationResult collResult;
4538
4539 uint8_t baseKey[256];
4540 uint32_t baseKeyLength;
4541 uint8_t beforeKey[256];
4542 uint32_t beforeKeyLength;
4543
4544 UChar base[] = { 0x03b1 }; /* base */
4545 int32_t baseLen = UPRV_LENGTHOF(base);
4546
4547 UChar before[] = { 0x0e01 }; /* ko kai */
4548 int32_t beforeLen = UPRV_LENGTHOF(before);
4549
4550 /*UChar *data[] = { before, base };
4551 genericRulesStarter(srules, data, 2);*/
4552
4553 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4554
4555 (void)beforeKeyLength; /* Suppress set but not used warnings. */
4556 (void)baseKeyLength;
4557
4558 /* build collator */
4559 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4560
4561 rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
4562 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4563 if(U_FAILURE(status)) {
4564 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4565 return;
4566 }
4567
4568 /* check collation results - before rule applied but not script reordering */
4569 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4570 if (collResult != UCOL_GREATER) {
4571 log_err("Collation result not correct before script reordering = %d\n", collResult);
4572 }
4573
4574 /* check the lead byte of the collation keys before script reordering */
4575 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4576 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4577 if (baseKey[0] != beforeKey[0]) {
4578 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4579 }
4580
4581 /* reorder the scripts */
4582 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4583 if(U_FAILURE(status)) {
4584 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4585 return;
4586 }
4587
4588 /* check collation results - before rule applied and after script reordering */
4589 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4590 if (collResult != UCOL_GREATER) {
4591 log_err("Collation result not correct after script reordering = %d\n", collResult);
4592 }
4593
4594 /* check the lead byte of the collation keys after script reordering */
4595 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4596 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4597 if (baseKey[0] != beforeKey[0]) {
4598 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4599 }
4600
4601 ucol_close(myCollation);
4602 }
4603
4604 /*
4605 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4606 */
TestNonLeadBytesDuringCollationReordering(void)4607 static void TestNonLeadBytesDuringCollationReordering(void)
4608 {
4609 UErrorCode status = U_ZERO_ERROR;
4610 UCollator *myCollation;
4611 int32_t reorderCodes[1] = {USCRIPT_GREEK};
4612
4613 uint8_t baseKey[256];
4614 uint32_t baseKeyLength;
4615 uint8_t reorderKey[256];
4616 uint32_t reorderKeyLength;
4617
4618 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4619
4620 uint32_t i;
4621
4622
4623 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4624
4625 /* build collator tertiary */
4626 myCollation = ucol_open("", &status);
4627 ucol_setStrength(myCollation, UCOL_TERTIARY);
4628 if(U_FAILURE(status)) {
4629 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4630 return;
4631 }
4632 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4633
4634 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4635 if(U_FAILURE(status)) {
4636 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4637 return;
4638 }
4639 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4640
4641 if (baseKeyLength != reorderKeyLength) {
4642 log_err("Key lengths not the same during reordering.\n");
4643 return;
4644 }
4645
4646 for (i = 1; i < baseKeyLength; i++) {
4647 if (baseKey[i] != reorderKey[i]) {
4648 log_err("Collation key bytes not the same at position %d.\n", i);
4649 return;
4650 }
4651 }
4652 ucol_close(myCollation);
4653
4654 /* build collator quaternary */
4655 myCollation = ucol_open("", &status);
4656 ucol_setStrength(myCollation, UCOL_QUATERNARY);
4657 if(U_FAILURE(status)) {
4658 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4659 return;
4660 }
4661 baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4662
4663 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4664 if(U_FAILURE(status)) {
4665 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4666 return;
4667 }
4668 reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4669
4670 if (baseKeyLength != reorderKeyLength) {
4671 log_err("Key lengths not the same during reordering.\n");
4672 return;
4673 }
4674
4675 for (i = 1; i < baseKeyLength; i++) {
4676 if (baseKey[i] != reorderKey[i]) {
4677 log_err("Collation key bytes not the same at position %d.\n", i);
4678 return;
4679 }
4680 }
4681 ucol_close(myCollation);
4682 }
4683
4684 /*
4685 * Test reordering API.
4686 */
TestReorderingAPI(void)4687 static void TestReorderingAPI(void)
4688 {
4689 UErrorCode status = U_ZERO_ERROR;
4690 UCollator *myCollation;
4691 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4692 int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4693 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4694 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4695 UCollationResult collResult;
4696 int32_t retrievedReorderCodesLength;
4697 int32_t retrievedReorderCodes[10];
4698 UChar greekString[] = { 0x03b1 };
4699 UChar punctuationString[] = { 0x203e };
4700 int loopIndex;
4701
4702 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4703
4704 /* build collator tertiary */
4705 myCollation = ucol_open("", &status);
4706 ucol_setStrength(myCollation, UCOL_TERTIARY);
4707 if(U_FAILURE(status)) {
4708 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4709 return;
4710 }
4711
4712 /* set the reorderding */
4713 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4714 if (U_FAILURE(status)) {
4715 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4716 return;
4717 }
4718
4719 /* get the reordering */
4720 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4721 if (status != U_BUFFER_OVERFLOW_ERROR) {
4722 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4723 return;
4724 }
4725 status = U_ZERO_ERROR;
4726 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4727 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4728 return;
4729 }
4730 /* now let's really get it */
4731 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4732 if (U_FAILURE(status)) {
4733 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4734 return;
4735 }
4736 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4737 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4738 return;
4739 }
4740 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4741 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4742 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4743 return;
4744 }
4745 }
4746 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4747 if (collResult != UCOL_LESS) {
4748 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4749 return;
4750 }
4751
4752 /* clear the reordering */
4753 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4754 if (U_FAILURE(status)) {
4755 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4756 return;
4757 }
4758
4759 /* get the reordering again */
4760 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4761 if (retrievedReorderCodesLength != 0) {
4762 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4763 return;
4764 }
4765
4766 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4767 if (collResult != UCOL_GREATER) {
4768 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4769 return;
4770 }
4771
4772 /* clear the reordering using [NONE] */
4773 ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4774 if (U_FAILURE(status)) {
4775 log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4776 return;
4777 }
4778
4779 /* get the reordering again */
4780 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4781 if (retrievedReorderCodesLength != 0) {
4782 log_err_status(status,
4783 "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4784 retrievedReorderCodesLength);
4785 return;
4786 }
4787
4788 /* test for error condition on duplicate reorder codes */
4789 ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4790 if (!U_FAILURE(status)) {
4791 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4792 return;
4793 }
4794
4795 status = U_ZERO_ERROR;
4796 /* test for reorder codes after a reset code */
4797 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4798 if (!U_FAILURE(status)) {
4799 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4800 return;
4801 }
4802
4803 ucol_close(myCollation);
4804 }
4805
4806 /*
4807 * Test reordering API.
4808 */
TestReorderingAPIWithRuleCreatedCollator(void)4809 static void TestReorderingAPIWithRuleCreatedCollator(void)
4810 {
4811 UErrorCode status = U_ZERO_ERROR;
4812 UCollator *myCollation;
4813 UChar rules[90];
4814 static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4815 static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4816 static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4817 UCollationResult collResult;
4818 int32_t retrievedReorderCodesLength;
4819 int32_t retrievedReorderCodes[10];
4820 static const UChar greekString[] = { 0x03b1 };
4821 static const UChar punctuationString[] = { 0x203e };
4822 static const UChar hanString[] = { 0x65E5, 0x672C };
4823 int loopIndex;
4824
4825 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4826
4827 /* build collator from rules */
4828 u_uastrcpy(rules, "[reorder Hani Grek]");
4829 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4830 if(U_FAILURE(status)) {
4831 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4832 return;
4833 }
4834
4835 /* get the reordering */
4836 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4837 if (U_FAILURE(status)) {
4838 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4839 return;
4840 }
4841 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4842 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4843 return;
4844 }
4845 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4846 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4847 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4848 return;
4849 }
4850 }
4851 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4852 if (collResult != UCOL_GREATER) {
4853 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4854 return;
4855 }
4856
4857 /* set the reordering */
4858 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4859 if (U_FAILURE(status)) {
4860 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4861 return;
4862 }
4863
4864 /* get the reordering */
4865 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4866 if (status != U_BUFFER_OVERFLOW_ERROR) {
4867 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4868 return;
4869 }
4870 status = U_ZERO_ERROR;
4871 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4872 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4873 return;
4874 }
4875 /* now let's really get it */
4876 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4877 if (U_FAILURE(status)) {
4878 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4879 return;
4880 }
4881 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4882 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4883 return;
4884 }
4885 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4886 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4887 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4888 return;
4889 }
4890 }
4891 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4892 if (collResult != UCOL_LESS) {
4893 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4894 return;
4895 }
4896
4897 /* clear the reordering */
4898 ucol_setReorderCodes(myCollation, NULL, 0, &status);
4899 if (U_FAILURE(status)) {
4900 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4901 return;
4902 }
4903
4904 /* get the reordering again */
4905 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4906 if (retrievedReorderCodesLength != 0) {
4907 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4908 return;
4909 }
4910
4911 collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4912 if (collResult != UCOL_GREATER) {
4913 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4914 return;
4915 }
4916
4917 /* reset the reordering */
4918 ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4919 if (U_FAILURE(status)) {
4920 log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4921 return;
4922 }
4923 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4924 if (U_FAILURE(status)) {
4925 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4926 return;
4927 }
4928 if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4929 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4930 return;
4931 }
4932 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4933 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4934 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4935 return;
4936 }
4937 }
4938
4939 ucol_close(myCollation);
4940 }
4941
containsExpectedScript(const int32_t scripts[],int32_t length,int32_t expectedScript)4942 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4943 int32_t i;
4944 for (i = 0; i < length; ++i) {
4945 if (expectedScript == scripts[i]) { return TRUE; }
4946 }
4947 return FALSE;
4948 }
4949
TestEquivalentReorderingScripts(void)4950 static void TestEquivalentReorderingScripts(void) {
4951 // Beginning with ICU 55, collation reordering moves single scripts
4952 // rather than groups of scripts,
4953 // except where scripts share a range and sort primary-equal.
4954 UErrorCode status = U_ZERO_ERROR;
4955 int32_t equivalentScripts[100];
4956 int32_t length;
4957 int i;
4958 int32_t prevScript;
4959 /* These scripts are expected to be equivalent. */
4960 static const int32_t expectedScripts[] = {
4961 USCRIPT_HIRAGANA,
4962 USCRIPT_KATAKANA,
4963 USCRIPT_KATAKANA_OR_HIRAGANA
4964 };
4965
4966 equivalentScripts[0] = 0;
4967 length = ucol_getEquivalentReorderCodes(
4968 USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4969 if (U_FAILURE(status)) {
4970 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4971 return;
4972 }
4973 if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4974 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4975 "length expected 1, was = %d; expected [%d] was [%d]\n",
4976 length, USCRIPT_GOTHIC, equivalentScripts[0]);
4977 }
4978
4979 length = ucol_getEquivalentReorderCodes(
4980 USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4981 if (U_FAILURE(status)) {
4982 log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4983 return;
4984 }
4985 if (length != UPRV_LENGTHOF(expectedScripts)) {
4986 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4987 "expected %d, was = %d\n",
4988 UPRV_LENGTHOF(expectedScripts), length);
4989 }
4990 prevScript = -1;
4991 for (i = 0; i < length; ++i) {
4992 int32_t script = equivalentScripts[i];
4993 if (script <= prevScript) {
4994 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4995 }
4996 prevScript = script;
4997 }
4998 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
4999 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5000 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5001 expectedScripts[i]);
5002 }
5003 }
5004
5005 length = ucol_getEquivalentReorderCodes(
5006 USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5007 if (U_FAILURE(status)) {
5008 log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5009 return;
5010 }
5011 if (length != UPRV_LENGTHOF(expectedScripts)) {
5012 log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5013 "expected %d, was = %d\n",
5014 UPRV_LENGTHOF(expectedScripts), length);
5015 }
5016 for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5017 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5018 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5019 expectedScripts[i]);
5020 }
5021 }
5022
5023 length = ucol_getEquivalentReorderCodes(
5024 USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5025 if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
5026 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5027 "expected %d, was = %d\n",
5028 UPRV_LENGTHOF(expectedScripts), length);
5029 }
5030
5031 length = ucol_getEquivalentReorderCodes(
5032 USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5033 if (U_FAILURE(status) || length != 3) {
5034 log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5035 "expected 3, was = %d\n", length);
5036 }
5037 length = ucol_getEquivalentReorderCodes(
5038 USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5039 if (U_FAILURE(status) || length != 3) {
5040 log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5041 "expected 3, was = %d\n", length);
5042 }
5043 length = ucol_getEquivalentReorderCodes(
5044 USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5045 if (U_FAILURE(status) || length != 3) {
5046 log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5047 "expected 3, was = %d\n", length);
5048 }
5049
5050 length = ucol_getEquivalentReorderCodes(
5051 USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5052 if (U_FAILURE(status) || length != 2) {
5053 log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5054 "expected 2, was = %d\n", length);
5055 }
5056 length = ucol_getEquivalentReorderCodes(
5057 USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5058 if (U_FAILURE(status) || length != 2) {
5059 log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5060 "expected 2, was = %d\n", length);
5061 }
5062 }
5063
TestReorderingAcrossCloning(void)5064 static void TestReorderingAcrossCloning(void)
5065 {
5066 UErrorCode status = U_ZERO_ERROR;
5067 UCollator *myCollation;
5068 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5069 UCollator *clonedCollation;
5070 int32_t retrievedReorderCodesLength;
5071 int32_t retrievedReorderCodes[10];
5072 int loopIndex;
5073
5074 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5075
5076 /* build collator tertiary */
5077 myCollation = ucol_open("", &status);
5078 ucol_setStrength(myCollation, UCOL_TERTIARY);
5079 if(U_FAILURE(status)) {
5080 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5081 return;
5082 }
5083
5084 /* set the reorderding */
5085 ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5086 if (U_FAILURE(status)) {
5087 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5088 return;
5089 }
5090
5091 /* clone the collator */
5092 clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5093 if (U_FAILURE(status)) {
5094 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5095 return;
5096 }
5097
5098 /* get the reordering */
5099 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
5100 if (U_FAILURE(status)) {
5101 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5102 return;
5103 }
5104 if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5105 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
5106 return;
5107 }
5108 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5109 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5110 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5111 return;
5112 }
5113 }
5114
5115 /*uprv_free(buffer);*/
5116 ucol_close(myCollation);
5117 ucol_close(clonedCollation);
5118 }
5119
5120 /*
5121 * Utility function to test one collation reordering test case set.
5122 * @param testcases Array of test cases.
5123 * @param n_testcases Size of the array testcases.
5124 * @param reorderTokens Array of reordering codes.
5125 * @param reorderTokensLen Size of the array reorderTokens.
5126 */
doTestOneReorderingAPITestCase(const OneTestCase testCases[],uint32_t testCasesLen,const int32_t reorderTokens[],int32_t reorderTokensLen)5127 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5128 {
5129 uint32_t testCaseNum;
5130 UErrorCode status = U_ZERO_ERROR;
5131 UCollator *myCollation;
5132
5133 myCollation = ucol_open("", &status);
5134 if (U_FAILURE(status)) {
5135 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5136 return;
5137 }
5138 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5139 if(U_FAILURE(status)) {
5140 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5141 return;
5142 }
5143
5144 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5145 doTest(myCollation,
5146 testCases[testCaseNum].source,
5147 testCases[testCaseNum].target,
5148 testCases[testCaseNum].result
5149 );
5150 }
5151 ucol_close(myCollation);
5152 }
5153
TestGreekFirstReorder(void)5154 static void TestGreekFirstReorder(void)
5155 {
5156 const char* strRules[] = {
5157 "[reorder Grek]"
5158 };
5159
5160 const int32_t apiRules[] = {
5161 USCRIPT_GREEK
5162 };
5163
5164 const static OneTestCase privateUseCharacterStrings[] = {
5165 { {0x0391}, {0x0391}, UCOL_EQUAL },
5166 { {0x0041}, {0x0391}, UCOL_GREATER },
5167 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5168 { {0x0060}, {0x0391}, UCOL_LESS },
5169 { {0x0391}, {0xe2dc}, UCOL_LESS },
5170 { {0x0391}, {0x0060}, UCOL_GREATER },
5171 };
5172
5173 /* Test rules creation */
5174 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5175
5176 /* Test collation reordering API */
5177 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5178 }
5179
TestGreekLastReorder(void)5180 static void TestGreekLastReorder(void)
5181 {
5182 const char* strRules[] = {
5183 "[reorder Zzzz Grek]"
5184 };
5185
5186 const int32_t apiRules[] = {
5187 USCRIPT_UNKNOWN, USCRIPT_GREEK
5188 };
5189
5190 const static OneTestCase privateUseCharacterStrings[] = {
5191 { {0x0391}, {0x0391}, UCOL_EQUAL },
5192 { {0x0041}, {0x0391}, UCOL_LESS },
5193 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5194 { {0x0060}, {0x0391}, UCOL_LESS },
5195 { {0x0391}, {0xe2dc}, UCOL_GREATER },
5196 };
5197
5198 /* Test rules creation */
5199 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5200
5201 /* Test collation reordering API */
5202 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5203 }
5204
TestNonScriptReorder(void)5205 static void TestNonScriptReorder(void)
5206 {
5207 const char* strRules[] = {
5208 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5209 };
5210
5211 const int32_t apiRules[] = {
5212 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5213 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5214 UCOL_REORDER_CODE_CURRENCY
5215 };
5216
5217 const static OneTestCase privateUseCharacterStrings[] = {
5218 { {0x0391}, {0x0041}, UCOL_LESS },
5219 { {0x0041}, {0x0391}, UCOL_GREATER },
5220 { {0x0060}, {0x0041}, UCOL_LESS },
5221 { {0x0060}, {0x0391}, UCOL_GREATER },
5222 { {0x0024}, {0x0041}, UCOL_GREATER },
5223 };
5224
5225 /* Test rules creation */
5226 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5227
5228 /* Test collation reordering API */
5229 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5230 }
5231
TestHaniReorder(void)5232 static void TestHaniReorder(void)
5233 {
5234 const char* strRules[] = {
5235 "[reorder Hani]"
5236 };
5237 const int32_t apiRules[] = {
5238 USCRIPT_HAN
5239 };
5240
5241 const static OneTestCase privateUseCharacterStrings[] = {
5242 { {0x4e00}, {0x0041}, UCOL_LESS },
5243 { {0x4e00}, {0x0060}, UCOL_GREATER },
5244 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5245 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5246 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5247 { {0xfa27}, {0x0041}, UCOL_LESS },
5248 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5249 };
5250
5251 /* Test rules creation */
5252 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5253
5254 /* Test collation reordering API */
5255 doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5256 }
5257
TestHaniReorderWithOtherRules(void)5258 static void TestHaniReorderWithOtherRules(void)
5259 {
5260 const char* strRules[] = {
5261 "[reorder Hani] &b<a"
5262 };
5263 /*const int32_t apiRules[] = {
5264 USCRIPT_HAN
5265 };*/
5266
5267 const static OneTestCase privateUseCharacterStrings[] = {
5268 { {0x4e00}, {0x0041}, UCOL_LESS },
5269 { {0x4e00}, {0x0060}, UCOL_GREATER },
5270 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5271 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5272 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5273 { {0xfa27}, {0x0041}, UCOL_LESS },
5274 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5275 { {0x0062}, {0x0061}, UCOL_LESS },
5276 };
5277
5278 /* Test rules creation */
5279 doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5280 }
5281
TestMultipleReorder(void)5282 static void TestMultipleReorder(void)
5283 {
5284 const char* strRules[] = {
5285 "[reorder Grek Zzzz DIGIT Latn Hani]"
5286 };
5287
5288 const int32_t apiRules[] = {
5289 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5290 };
5291
5292 const static OneTestCase collationTestCases[] = {
5293 { {0x0391}, {0x0041}, UCOL_LESS},
5294 { {0x0031}, {0x0041}, UCOL_LESS},
5295 { {0x0041}, {0x4e00}, UCOL_LESS},
5296 };
5297
5298 /* Test rules creation */
5299 doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
5300
5301 /* Test collation reordering API */
5302 doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
5303 }
5304
5305 /*
5306 * Test that covers issue reported in ticket 8814
5307 */
TestReorderWithNumericCollation(void)5308 static void TestReorderWithNumericCollation(void)
5309 {
5310 UErrorCode status = U_ZERO_ERROR;
5311 UCollator *myCollation;
5312 UCollator *myReorderCollation;
5313 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5314 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5315 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5316 UChar fortyS[] = { 0x0053 };
5317 UChar fortyThreeP[] = { 0x0050 };
5318 uint8_t fortyS_sortKey[128];
5319 int32_t fortyS_sortKey_Length;
5320 uint8_t fortyThreeP_sortKey[128];
5321 int32_t fortyThreeP_sortKey_Length;
5322 uint8_t fortyS_sortKey_reorder[128];
5323 int32_t fortyS_sortKey_reorder_Length;
5324 uint8_t fortyThreeP_sortKey_reorder[128];
5325 int32_t fortyThreeP_sortKey_reorder_Length;
5326 UCollationResult collResult;
5327 UCollationResult collResultReorder;
5328
5329 log_verbose("Testing reordering with and without numeric collation\n");
5330
5331 /* build collator tertiary with numeric */
5332 myCollation = ucol_open("", &status);
5333 /*
5334 ucol_setStrength(myCollation, UCOL_TERTIARY);
5335 */
5336 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5337 if(U_FAILURE(status)) {
5338 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5339 return;
5340 }
5341
5342 /* build collator tertiary with numeric and reordering */
5343 myReorderCollation = ucol_open("", &status);
5344 /*
5345 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5346 */
5347 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5348 ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5349 if(U_FAILURE(status)) {
5350 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5351 return;
5352 }
5353
5354 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5355 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5356 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5357 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5358
5359 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5360 log_err_status(status, "ERROR: couldn't generate sort keys\n");
5361 return;
5362 }
5363 collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5364 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5365 /*
5366 fprintf(stderr, "\tcollResult = %x\n", collResult);
5367 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5368 fprintf(stderr, "\nfortyS\n");
5369 for (i = 0; i < fortyS_sortKey_Length; i++) {
5370 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5371 }
5372 fprintf(stderr, "\nfortyThreeP\n");
5373 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5374 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5375 }
5376 */
5377 if (collResult != collResultReorder) {
5378 log_err_status(status, "ERROR: collation results should have been the same.\n");
5379 return;
5380 }
5381
5382 ucol_close(myCollation);
5383 ucol_close(myReorderCollation);
5384 }
5385
compare_uint8_t_arrays(const uint8_t * a,const uint8_t * b)5386 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5387 {
5388 for (; *a == *b; ++a, ++b) {
5389 if (*a == 0) {
5390 return 0;
5391 }
5392 }
5393 return (*a < *b ? -1 : 1);
5394 }
5395
TestImportRulesDeWithPhonebook(void)5396 static void TestImportRulesDeWithPhonebook(void)
5397 {
5398 const char* normalRules[] = {
5399 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5400 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5401 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5402 };
5403 const OneTestCase normalTests[] = {
5404 { {0x00e6}, {0x00c6}, UCOL_LESS},
5405 { {0x00fc}, {0x00dc}, UCOL_GREATER},
5406 };
5407
5408 const char* importRules[] = {
5409 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5410 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5411 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5412 };
5413 const OneTestCase importTests[] = {
5414 { {0x00e6}, {0x00c6}, UCOL_LESS},
5415 { {0x00fc}, {0x00dc}, UCOL_LESS},
5416 };
5417
5418 doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5419 doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
5420 }
5421
5422 #if 0
5423 static void TestImportRulesFiWithEor(void)
5424 {
5425 /* DUCET. */
5426 const char* defaultRules[] = {
5427 "&a<b", /* Dummy rule. */
5428 };
5429
5430 const OneTestCase defaultTests[] = {
5431 { {0x0110}, {0x00F0}, UCOL_LESS},
5432 { {0x00a3}, {0x00a5}, UCOL_LESS},
5433 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5434 };
5435
5436 /* European Ordering rules: ignore currency characters. */
5437 const char* eorRules[] = {
5438 "[import root-u-co-eor]",
5439 };
5440
5441 const OneTestCase eorTests[] = {
5442 { {0x0110}, {0x00F0}, UCOL_LESS},
5443 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5444 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5445 };
5446
5447 const char* fiStdRules[] = {
5448 "[import fi-u-co-standard]",
5449 };
5450
5451 const OneTestCase fiStdTests[] = {
5452 { {0x0110}, {0x00F0}, UCOL_GREATER},
5453 { {0x00a3}, {0x00a5}, UCOL_LESS},
5454 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5455 };
5456
5457 /* Both European Ordering Rules and Fi Standard Rules. */
5458 const char* eorFiStdRules[] = {
5459 "[import root-u-co-eor][import fi-u-co-standard]",
5460 };
5461
5462 /* This is essentially same as the one before once fi.txt is updated with import. */
5463 const char* fiEorRules[] = {
5464 "[import fi-u-co-eor]",
5465 };
5466
5467 const OneTestCase fiEorTests[] = {
5468 { {0x0110}, {0x00F0}, UCOL_GREATER},
5469 { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5470 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5471 };
5472
5473 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5474 doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5475 doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5476 doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
5477
5478 log_knownIssue("8962", NULL);
5479 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5480 eor{
5481 Sequence{
5482 "[import root-u-co-eor][import fi-u-co-standard]"
5483 }
5484 Version{"21.0"}
5485 }
5486 */
5487 /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5488
5489 }
5490 #endif
5491
5492 #if 0
5493 /*
5494 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5495 * the resource files are built with -includeUnihanColl option.
5496 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5497 */
5498 static void TestImportRulesCJKWithUnihan(void)
5499 {
5500 /* DUCET. */
5501 const char* defaultRules[] = {
5502 "&a<b", /* Dummy rule. */
5503 };
5504
5505 const OneTestCase defaultTests[] = {
5506 { {0x3402}, {0x4e1e}, UCOL_GREATER},
5507 };
5508
5509 /* European Ordering rules: ignore currency characters. */
5510 const char* unihanRules[] = {
5511 "[import ko-u-co-unihan]",
5512 };
5513
5514 const OneTestCase unihanTests[] = {
5515 { {0x3402}, {0x4e1e}, UCOL_LESS},
5516 };
5517
5518 doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5519 doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
5520
5521 }
5522 #endif
5523
TestImport(void)5524 static void TestImport(void)
5525 {
5526 UCollator* vicoll;
5527 UCollator* escoll;
5528 UCollator* viescoll;
5529 UCollator* importviescoll;
5530 UParseError error;
5531 UErrorCode status = U_ZERO_ERROR;
5532 UChar* virules;
5533 int32_t viruleslength;
5534 UChar* esrules;
5535 int32_t esruleslength;
5536 UChar* viesrules;
5537 int32_t viesruleslength;
5538 char srules[500] = "[import vi][import es]";
5539 UChar rules[500];
5540 uint32_t length = 0;
5541 int32_t itemCount;
5542 int32_t i, k;
5543 UChar32 start;
5544 UChar32 end;
5545 UChar str[500];
5546 int32_t strLength;
5547
5548 uint8_t sk1[500];
5549 uint8_t sk2[500];
5550
5551 UBool b;
5552 USet* tailoredSet;
5553 USet* importTailoredSet;
5554
5555
5556 vicoll = ucol_open("vi", &status);
5557 if(U_FAILURE(status)){
5558 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5559 return;
5560 }
5561
5562 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5563 if(viruleslength == 0) {
5564 log_data_err("missing vi tailoring rule string\n");
5565 ucol_close(vicoll);
5566 return;
5567 }
5568 escoll = ucol_open("es", &status);
5569 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5570 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5571 viesrules[0] = 0;
5572 u_strcat(viesrules, virules);
5573 u_strcat(viesrules, esrules);
5574 viesruleslength = viruleslength + esruleslength;
5575 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5576
5577 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5578 length = u_unescape(srules, rules, 500);
5579 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5580 if(U_FAILURE(status)){
5581 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5582 return;
5583 }
5584
5585 tailoredSet = ucol_getTailoredSet(viescoll, &status);
5586 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5587
5588 if(!uset_equals(tailoredSet, importTailoredSet)){
5589 log_err("Tailored sets not equal");
5590 }
5591
5592 uset_close(importTailoredSet);
5593
5594 itemCount = uset_getItemCount(tailoredSet);
5595
5596 for( i = 0; i < itemCount; i++){
5597 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5598 if(strLength < 2){
5599 for (; start <= end; start++){
5600 k = 0;
5601 U16_APPEND(str, k, 500, start, b);
5602 (void)b; /* Suppress set but not used warning. */
5603 ucol_getSortKey(viescoll, str, 1, sk1, 500);
5604 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5605 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5606 log_err("Sort key for %s not equal\n", str);
5607 break;
5608 }
5609 }
5610 }else{
5611 ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5612 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5613 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5614 log_err("ZZSort key for %s not equal\n", str);
5615 break;
5616 }
5617
5618 }
5619 }
5620
5621 uset_close(tailoredSet);
5622
5623 uprv_free(viesrules);
5624
5625 ucol_close(vicoll);
5626 ucol_close(escoll);
5627 ucol_close(viescoll);
5628 ucol_close(importviescoll);
5629 }
5630
TestImportWithType(void)5631 static void TestImportWithType(void)
5632 {
5633 UCollator* vicoll;
5634 UCollator* decoll;
5635 UCollator* videcoll;
5636 UCollator* importvidecoll;
5637 UParseError error;
5638 UErrorCode status = U_ZERO_ERROR;
5639 const UChar* virules;
5640 int32_t viruleslength;
5641 const UChar* derules;
5642 int32_t deruleslength;
5643 UChar* viderules;
5644 int32_t videruleslength;
5645 const char srules[500] = "[import vi][import de-u-co-phonebk]";
5646 UChar rules[500];
5647 uint32_t length = 0;
5648 int32_t itemCount;
5649 int32_t i, k;
5650 UChar32 start;
5651 UChar32 end;
5652 UChar str[500];
5653 int32_t strLength;
5654
5655 uint8_t sk1[500];
5656 uint8_t sk2[500];
5657
5658 USet* tailoredSet;
5659 USet* importTailoredSet;
5660
5661 vicoll = ucol_open("vi", &status);
5662 if(U_FAILURE(status)){
5663 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5664 return;
5665 }
5666 virules = ucol_getRules(vicoll, &viruleslength);
5667 if(viruleslength == 0) {
5668 log_data_err("missing vi tailoring rule string\n");
5669 ucol_close(vicoll);
5670 return;
5671 }
5672 /* decoll = ucol_open("de@collation=phonebook", &status); */
5673 decoll = ucol_open("de-u-co-phonebk", &status);
5674 if(U_FAILURE(status)){
5675 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5676 return;
5677 }
5678
5679
5680 derules = ucol_getRules(decoll, &deruleslength);
5681 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5682 viderules[0] = 0;
5683 u_strcat(viderules, virules);
5684 u_strcat(viderules, derules);
5685 videruleslength = viruleslength + deruleslength;
5686 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5687
5688 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5689 length = u_unescape(srules, rules, 500);
5690 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5691 if(U_FAILURE(status)){
5692 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5693 return;
5694 }
5695
5696 tailoredSet = ucol_getTailoredSet(videcoll, &status);
5697 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5698
5699 if(!uset_equals(tailoredSet, importTailoredSet)){
5700 log_err("Tailored sets not equal");
5701 }
5702
5703 uset_close(importTailoredSet);
5704
5705 itemCount = uset_getItemCount(tailoredSet);
5706
5707 for( i = 0; i < itemCount; i++){
5708 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5709 if(strLength < 2){
5710 for (; start <= end; start++){
5711 k = 0;
5712 U16_APPEND_UNSAFE(str, k, start);
5713 ucol_getSortKey(videcoll, str, 1, sk1, 500);
5714 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5715 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5716 log_err("Sort key for %s not equal\n", str);
5717 break;
5718 }
5719 }
5720 }else{
5721 ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5722 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5723 if(compare_uint8_t_arrays(sk1, sk2) != 0){
5724 log_err("Sort key for %s not equal\n", str);
5725 break;
5726 }
5727
5728 }
5729 }
5730
5731 uset_close(tailoredSet);
5732
5733 uprv_free(viderules);
5734
5735 ucol_close(videcoll);
5736 ucol_close(importvidecoll);
5737 ucol_close(vicoll);
5738 ucol_close(decoll);
5739 }
5740
5741 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5742 static const UChar longUpperStr1[]= { /* 155 chars */
5743 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5744 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5745 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5746 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5747 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5748 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5749 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5750 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5751 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5752 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5753 };
5754
5755 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5756 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5757 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5758 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5759 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5760 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5762 };
5763
5764 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5765 static const UChar longUpperStr3[]= { /* 324 chars */
5766 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5767 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5768 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5769 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5778 };
5779
5780 typedef struct {
5781 const UChar * longUpperStrPtr;
5782 int32_t longUpperStrLen;
5783 } LongUpperStrItem;
5784
5785 /* String pointers must be in reverse collation order of the corresponding strings */
5786 static const LongUpperStrItem longUpperStrItems[] = {
5787 { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5788 { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5789 { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
5790 { NULL, 0 }
5791 };
5792
5793 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5794
5795 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
TestCaseLevelBufferOverflow(void)5796 static void TestCaseLevelBufferOverflow(void)
5797 {
5798 UErrorCode status = U_ZERO_ERROR;
5799 UCollator * ucol = ucol_open("root", &status);
5800 if ( U_SUCCESS(status) ) {
5801 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5802 if ( U_SUCCESS(status) ) {
5803 const LongUpperStrItem * itemPtr;
5804 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5805 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5806 int32_t sortKeyLen;
5807 if (itemPtr > longUpperStrItems) {
5808 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5809 }
5810 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5811 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5812 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5813 break;
5814 }
5815 if ( itemPtr > longUpperStrItems ) {
5816 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5817 if (compareResult >= 0) {
5818 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5819 }
5820 }
5821 }
5822 } else {
5823 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5824 }
5825 ucol_close(ucol);
5826 } else {
5827 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5828 }
5829 }
5830
5831 /* Test for #10595 */
5832 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5833 #define KEY_PART_SIZE 16
5834
TestNextSortKeyPartJaIdentical(void)5835 static void TestNextSortKeyPartJaIdentical(void)
5836 {
5837 UErrorCode status = U_ZERO_ERROR;
5838 UCollator *coll;
5839 uint8_t keyPart[KEY_PART_SIZE];
5840 UCharIterator iter;
5841 uint32_t state[2] = {0, 0};
5842 int32_t keyPartLen;
5843
5844 coll = ucol_open("ja", &status);
5845 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5846 if (U_FAILURE(status)) {
5847 log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5848 return;
5849 }
5850
5851 uiter_setString(&iter, testJapaneseName, 5);
5852 keyPartLen = KEY_PART_SIZE;
5853 while (keyPartLen == KEY_PART_SIZE) {
5854 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5855 if (U_FAILURE(status)) {
5856 log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5857 break;
5858 }
5859 }
5860
5861 ucol_close(coll);
5862 }
5863
5864 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5865
addMiscCollTest(TestNode ** root)5866 void addMiscCollTest(TestNode** root)
5867 {
5868 TEST(TestRuleOptions);
5869 TEST(TestBeforePrefixFailure);
5870 TEST(TestContractionClosure);
5871 TEST(TestPrefixCompose);
5872 TEST(TestStrCollIdenticalPrefix);
5873 TEST(TestPrefix);
5874 TEST(TestNewJapanese);
5875 /*TEST(TestLimitations);*/
5876 TEST(TestNonChars);
5877 TEST(TestExtremeCompression);
5878 TEST(TestSurrogates);
5879 TEST(TestVariableTopSetting);
5880 TEST(TestMaxVariable);
5881 TEST(TestBocsuCoverage);
5882 TEST(TestCyrillicTailoring);
5883 TEST(TestCase);
5884 TEST(IncompleteCntTest);
5885 TEST(BlackBirdTest);
5886 TEST(FunkyATest);
5887 TEST(BillFairmanTest);
5888 TEST(TestChMove);
5889 TEST(TestImplicitTailoring);
5890 TEST(TestFCDProblem);
5891 TEST(TestEmptyRule);
5892 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5893 TEST(TestJ815);
5894 TEST(TestUpperCaseFirst);
5895 TEST(TestBefore);
5896 TEST(TestHangulTailoring);
5897 TEST(TestUCARules);
5898 TEST(TestIncrementalNormalize);
5899 TEST(TestComposeDecompose);
5900 TEST(TestCompressOverlap);
5901 TEST(TestContraction);
5902 TEST(TestExpansion);
5903 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5904 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5905 TEST(TestOptimize);
5906 TEST(TestSuppressContractions);
5907 TEST(Alexis2);
5908 TEST(TestHebrewUCA);
5909 TEST(TestPartialSortKeyTermination);
5910 TEST(TestSettings);
5911 TEST(TestEquals);
5912 TEST(TestJ2726);
5913 TEST(NullRule);
5914 TEST(TestNumericCollation);
5915 TEST(TestTibetanConformance);
5916 TEST(TestPinyinProblem);
5917 TEST(TestSeparateTrees);
5918 TEST(TestBeforePinyin);
5919 TEST(TestBeforeTightening);
5920 /*TEST(TestMoreBefore);*/
5921 TEST(TestTailorNULL);
5922 TEST(TestUpperFirstQuaternary);
5923 TEST(TestJ4960);
5924 TEST(TestJ5223);
5925 TEST(TestJ5232);
5926 TEST(TestJ5367);
5927 TEST(TestHiragana);
5928 TEST(TestSortKeyConsistency);
5929 TEST(TestVI5913); /* VI, RO tailored rules */
5930 TEST(TestCroatianSortKey);
5931 TEST(TestTailor6179);
5932 TEST(TestUCAPrecontext);
5933 TEST(TestOutOfBuffer5468);
5934 TEST(TestSameStrengthList);
5935
5936 TEST(TestSameStrengthListQuoted);
5937 TEST(TestSameStrengthListSupplemental);
5938 TEST(TestSameStrengthListQwerty);
5939 TEST(TestSameStrengthListQuotedQwerty);
5940 TEST(TestSameStrengthListRanges);
5941 TEST(TestSameStrengthListSupplementalRanges);
5942 TEST(TestSpecialCharacters);
5943 TEST(TestPrivateUseCharacters);
5944 TEST(TestPrivateUseCharactersInList);
5945 TEST(TestPrivateUseCharactersInRange);
5946 TEST(TestInvalidListsAndRanges);
5947 TEST(TestImportRulesDeWithPhonebook);
5948 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5949 /* TEST(TestImportRulesCJKWithUnihan); */
5950 TEST(TestImport);
5951 TEST(TestImportWithType);
5952
5953 TEST(TestBeforeRuleWithScriptReordering);
5954 TEST(TestNonLeadBytesDuringCollationReordering);
5955 TEST(TestReorderingAPI);
5956 TEST(TestReorderingAPIWithRuleCreatedCollator);
5957 TEST(TestEquivalentReorderingScripts);
5958 TEST(TestGreekFirstReorder);
5959 TEST(TestGreekLastReorder);
5960 TEST(TestNonScriptReorder);
5961 TEST(TestHaniReorder);
5962 TEST(TestHaniReorderWithOtherRules);
5963 TEST(TestMultipleReorder);
5964 TEST(TestReorderingAcrossCloning);
5965 TEST(TestReorderWithNumericCollation);
5966
5967 TEST(TestCaseLevelBufferOverflow);
5968 TEST(TestNextSortKeyPartJaIdentical);
5969 }
5970
5971 #endif /* #if !UCONFIG_NO_COLLATION */
5972