1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CITERTST.C
9 *
10 * Modification History:
11 * Date Name Description
12 * Madhu Katragadda Ported for C API
13 * 02/19/01 synwee Modified test case for new collation iterator
14 *********************************************************************************/
15 /*
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
18 */
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_COLLATION
23
24 #include "unicode/ucol.h"
25 #include "unicode/uloc.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
28 #include "unicode/putil.h"
29 #include "callcoll.h"
30 #include "cmemory.h"
31 #include "cintltst.h"
32 #include "citertst.h"
33 #include "ccolltst.h"
34 #include "filestrm.h"
35 #include "cstring.h"
36 #include "ucol_imp.h"
37 #include "ucol_tok.h"
38 #include "uparse.h"
39 #include <stdio.h>
40
41 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
42
addCollIterTest(TestNode ** root)43 void addCollIterTest(TestNode** root)
44 {
45 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
46 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
47 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
48 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
49 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
50 addTest(root, &TestNormalizedUnicodeChar,
51 "tscoll/citertst/TestNormalizedUnicodeChar");
52 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
53 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
54 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
55 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
56 addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
57 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
58 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
59 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
60 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
61 }
62
63 /* The locales we support */
64
65 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
66
TestBug672()67 static void TestBug672() {
68 UErrorCode status = U_ZERO_ERROR;
69 UChar pattern[20];
70 UChar text[50];
71 int i;
72 int result[3][3];
73
74 u_uastrcpy(pattern, "resume");
75 u_uastrcpy(text, "Time to resume updating my resume.");
76
77 for (i = 0; i < 3; ++ i) {
78 UCollator *coll = ucol_open(LOCALES[i], &status);
79 UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
80 &status);
81 UCollationElements *titer = ucol_openElements(coll, text, -1,
82 &status);
83 if (U_FAILURE(status)) {
84 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
85 myErrorName(status));
86 return;
87 }
88
89 log_verbose("locale tested %s\n", LOCALES[i]);
90
91 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
92 U_SUCCESS(status)) {
93 }
94 if (U_FAILURE(status)) {
95 log_err("ERROR: reversing collation iterator :%s\n",
96 myErrorName(status));
97 return;
98 }
99 ucol_reset(pitr);
100
101 ucol_setOffset(titer, u_strlen(pattern), &status);
102 if (U_FAILURE(status)) {
103 log_err("ERROR: setting offset in collator :%s\n",
104 myErrorName(status));
105 return;
106 }
107 result[i][0] = ucol_getOffset(titer);
108 log_verbose("Text iterator set to offset %d\n", result[i][0]);
109
110 /* Use previous() */
111 ucol_previous(titer, &status);
112 result[i][1] = ucol_getOffset(titer);
113 log_verbose("Current offset %d after previous\n", result[i][1]);
114
115 /* Add one to index */
116 log_verbose("Adding one to current offset...\n");
117 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
118 if (U_FAILURE(status)) {
119 log_err("ERROR: setting offset in collator :%s\n",
120 myErrorName(status));
121 return;
122 }
123 result[i][2] = ucol_getOffset(titer);
124 log_verbose("Current offset in text = %d\n", result[i][2]);
125 ucol_closeElements(pitr);
126 ucol_closeElements(titer);
127 ucol_close(coll);
128 }
129
130 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
131 uprv_memcmp(result[1], result[2], 3) != 0) {
132 log_err("ERROR: Different locales have different offsets at the same character\n");
133 }
134 }
135
136
137
138 /* Running this test with normalization enabled showed up a bug in the incremental
139 normalization code. */
TestBug672Normalize()140 static void TestBug672Normalize() {
141 UErrorCode status = U_ZERO_ERROR;
142 UChar pattern[20];
143 UChar text[50];
144 int i;
145 int result[3][3];
146
147 u_uastrcpy(pattern, "resume");
148 u_uastrcpy(text, "Time to resume updating my resume.");
149
150 for (i = 0; i < 3; ++ i) {
151 UCollator *coll = ucol_open(LOCALES[i], &status);
152 UCollationElements *pitr = NULL;
153 UCollationElements *titer = NULL;
154
155 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
156
157 pitr = ucol_openElements(coll, pattern, -1, &status);
158 titer = ucol_openElements(coll, text, -1, &status);
159 if (U_FAILURE(status)) {
160 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
161 myErrorName(status));
162 return;
163 }
164
165 log_verbose("locale tested %s\n", LOCALES[i]);
166
167 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
168 U_SUCCESS(status)) {
169 }
170 if (U_FAILURE(status)) {
171 log_err("ERROR: reversing collation iterator :%s\n",
172 myErrorName(status));
173 return;
174 }
175 ucol_reset(pitr);
176
177 ucol_setOffset(titer, u_strlen(pattern), &status);
178 if (U_FAILURE(status)) {
179 log_err("ERROR: setting offset in collator :%s\n",
180 myErrorName(status));
181 return;
182 }
183 result[i][0] = ucol_getOffset(titer);
184 log_verbose("Text iterator set to offset %d\n", result[i][0]);
185
186 /* Use previous() */
187 ucol_previous(titer, &status);
188 result[i][1] = ucol_getOffset(titer);
189 log_verbose("Current offset %d after previous\n", result[i][1]);
190
191 /* Add one to index */
192 log_verbose("Adding one to current offset...\n");
193 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
194 if (U_FAILURE(status)) {
195 log_err("ERROR: setting offset in collator :%s\n",
196 myErrorName(status));
197 return;
198 }
199 result[i][2] = ucol_getOffset(titer);
200 log_verbose("Current offset in text = %d\n", result[i][2]);
201 ucol_closeElements(pitr);
202 ucol_closeElements(titer);
203 ucol_close(coll);
204 }
205
206 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
207 uprv_memcmp(result[1], result[2], 3) != 0) {
208 log_err("ERROR: Different locales have different offsets at the same character\n");
209 }
210 }
211
212
213
214
215 /**
216 * Test for CollationElementIterator previous and next for the whole set of
217 * unicode characters.
218 */
TestUnicodeChar()219 static void TestUnicodeChar()
220 {
221 UChar source[0x100];
222 UCollator *en_us;
223 UCollationElements *iter;
224 UErrorCode status = U_ZERO_ERROR;
225 UChar codepoint;
226
227 UChar *test;
228 en_us = ucol_open("en_US", &status);
229 if (U_FAILURE(status)){
230 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
231 myErrorName(status));
232 return;
233 }
234
235 for (codepoint = 1; codepoint < 0xFFFE;)
236 {
237 test = source;
238
239 while (codepoint % 0xFF != 0)
240 {
241 if (u_isdefined(codepoint))
242 *(test ++) = codepoint;
243 codepoint ++;
244 }
245
246 if (u_isdefined(codepoint))
247 *(test ++) = codepoint;
248
249 if (codepoint != 0xFFFF)
250 codepoint ++;
251
252 *test = 0;
253 iter=ucol_openElements(en_us, source, u_strlen(source), &status);
254 if(U_FAILURE(status)){
255 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
256 myErrorName(status));
257 ucol_close(en_us);
258 return;
259 }
260 /* A basic test to see if it's working at all */
261 log_verbose("codepoint testing %x\n", codepoint);
262 backAndForth(iter);
263 ucol_closeElements(iter);
264
265 /* null termination test */
266 iter=ucol_openElements(en_us, source, -1, &status);
267 if(U_FAILURE(status)){
268 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
269 myErrorName(status));
270 ucol_close(en_us);
271 return;
272 }
273 /* A basic test to see if it's working at all */
274 backAndForth(iter);
275 ucol_closeElements(iter);
276 }
277
278 ucol_close(en_us);
279 }
280
281 /**
282 * Test for CollationElementIterator previous and next for the whole set of
283 * unicode characters with normalization on.
284 */
TestNormalizedUnicodeChar()285 static void TestNormalizedUnicodeChar()
286 {
287 UChar source[0x100];
288 UCollator *th_th;
289 UCollationElements *iter;
290 UErrorCode status = U_ZERO_ERROR;
291 UChar codepoint;
292
293 UChar *test;
294 /* thai should have normalization on */
295 th_th = ucol_open("th_TH", &status);
296 if (U_FAILURE(status)){
297 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
298 myErrorName(status));
299 return;
300 }
301
302 for (codepoint = 1; codepoint < 0xFFFE;)
303 {
304 test = source;
305
306 while (codepoint % 0xFF != 0)
307 {
308 if (u_isdefined(codepoint))
309 *(test ++) = codepoint;
310 codepoint ++;
311 }
312
313 if (u_isdefined(codepoint))
314 *(test ++) = codepoint;
315
316 if (codepoint != 0xFFFF)
317 codepoint ++;
318
319 *test = 0;
320 iter=ucol_openElements(th_th, source, u_strlen(source), &status);
321 if(U_FAILURE(status)){
322 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
323 myErrorName(status));
324 ucol_close(th_th);
325 return;
326 }
327
328 backAndForth(iter);
329 ucol_closeElements(iter);
330
331 iter=ucol_openElements(th_th, source, -1, &status);
332 if(U_FAILURE(status)){
333 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
334 myErrorName(status));
335 ucol_close(th_th);
336 return;
337 }
338
339 backAndForth(iter);
340 ucol_closeElements(iter);
341 }
342
343 ucol_close(th_th);
344 }
345
346 /**
347 * Test the incremental normalization
348 */
TestNormalization()349 static void TestNormalization()
350 {
351 UErrorCode status = U_ZERO_ERROR;
352 const char *str =
353 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
354 UCollator *coll;
355 UChar rule[50];
356 int rulelen = u_unescape(str, rule, 50);
357 int count = 0;
358 const char *testdata[] =
359 {"\\u1ED9", "o\\u0323\\u0302",
360 "\\u0300\\u0315", "\\u0315\\u0300",
361 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
362 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
363 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
364 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
365 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
366 int32_t srclen;
367 UChar source[10];
368 UCollationElements *iter;
369
370 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
371 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
372 if (U_FAILURE(status)){
373 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
374 myErrorName(status));
375 return;
376 }
377
378 srclen = u_unescape(testdata[0], source, 10);
379 iter = ucol_openElements(coll, source, srclen, &status);
380 backAndForth(iter);
381 ucol_closeElements(iter);
382
383 srclen = u_unescape(testdata[1], source, 10);
384 iter = ucol_openElements(coll, source, srclen, &status);
385 backAndForth(iter);
386 ucol_closeElements(iter);
387
388 while (count < 12) {
389 srclen = u_unescape(testdata[count], source, 10);
390 iter = ucol_openElements(coll, source, srclen, &status);
391
392 if (U_FAILURE(status)){
393 log_err("ERROR: in creation of collator element iterator\n %s\n",
394 myErrorName(status));
395 return;
396 }
397 backAndForth(iter);
398 ucol_closeElements(iter);
399
400 iter = ucol_openElements(coll, source, -1, &status);
401
402 if (U_FAILURE(status)){
403 log_err("ERROR: in creation of collator element iterator\n %s\n",
404 myErrorName(status));
405 return;
406 }
407 backAndForth(iter);
408 ucol_closeElements(iter);
409 count ++;
410 }
411 ucol_close(coll);
412 }
413
414 /**
415 * Test for CollationElementIterator.previous()
416 *
417 * @bug 4108758 - Make sure it works with contracting characters
418 *
419 */
TestPrevious()420 static void TestPrevious()
421 {
422 UCollator *coll=NULL;
423 UChar rule[50];
424 UChar *source;
425 UCollator *c1, *c2, *c3;
426 UCollationElements *iter;
427 UErrorCode status = U_ZERO_ERROR;
428 UChar test1[50];
429 UChar test2[50];
430
431 u_uastrcpy(test1, "What subset of all possible test cases?");
432 u_uastrcpy(test2, "has the highest probability of detecting");
433 coll = ucol_open("en_US", &status);
434
435 iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
436 log_verbose("English locale testing back and forth\n");
437 if(U_FAILURE(status)){
438 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
439 myErrorName(status));
440 ucol_close(coll);
441 return;
442 }
443 /* A basic test to see if it's working at all */
444 backAndForth(iter);
445 ucol_closeElements(iter);
446 ucol_close(coll);
447
448 /* Test with a contracting character sequence */
449 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
450 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
451
452 log_verbose("Contraction rule testing back and forth with no normalization\n");
453
454 if (c1 == NULL || U_FAILURE(status))
455 {
456 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
457 myErrorName(status));
458 return;
459 }
460 source=(UChar*)malloc(sizeof(UChar) * 20);
461 u_uastrcpy(source, "abchdcba");
462 iter=ucol_openElements(c1, source, u_strlen(source), &status);
463 if(U_FAILURE(status)){
464 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
465 myErrorName(status));
466 return;
467 }
468 backAndForth(iter);
469 ucol_closeElements(iter);
470 ucol_close(c1);
471
472 /* Test with an expanding character sequence */
473 u_uastrcpy(rule, "&a < b < c/abd < d");
474 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
475 log_verbose("Expansion rule testing back and forth with no normalization\n");
476 if (c2 == NULL || U_FAILURE(status))
477 {
478 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
479 myErrorName(status));
480 return;
481 }
482 u_uastrcpy(source, "abcd");
483 iter=ucol_openElements(c2, source, u_strlen(source), &status);
484 if(U_FAILURE(status)){
485 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
486 myErrorName(status));
487 return;
488 }
489 backAndForth(iter);
490 ucol_closeElements(iter);
491 ucol_close(c2);
492 /* Now try both */
493 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
494 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
495 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
496
497 if (c3 == NULL || U_FAILURE(status))
498 {
499 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
500 myErrorName(status));
501 return;
502 }
503 u_uastrcpy(source, "abcdbchdc");
504 iter=ucol_openElements(c3, source, u_strlen(source), &status);
505 if(U_FAILURE(status)){
506 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
507 myErrorName(status));
508 return;
509 }
510 backAndForth(iter);
511 ucol_closeElements(iter);
512 ucol_close(c3);
513 source[0] = 0x0e41;
514 source[1] = 0x0e02;
515 source[2] = 0x0e41;
516 source[3] = 0x0e02;
517 source[4] = 0x0e27;
518 source[5] = 0x61;
519 source[6] = 0x62;
520 source[7] = 0x63;
521 source[8] = 0;
522
523 coll = ucol_open("th_TH", &status);
524 log_verbose("Thai locale testing back and forth with normalization\n");
525 iter=ucol_openElements(coll, source, u_strlen(source), &status);
526 if(U_FAILURE(status)){
527 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
528 myErrorName(status));
529 return;
530 }
531 backAndForth(iter);
532 ucol_closeElements(iter);
533 ucol_close(coll);
534
535 /* prev test */
536 source[0] = 0x0061;
537 source[1] = 0x30CF;
538 source[2] = 0x3099;
539 source[3] = 0x30FC;
540 source[4] = 0;
541
542 coll = ucol_open("ja_JP", &status);
543 log_verbose("Japanese locale testing back and forth with normalization\n");
544 iter=ucol_openElements(coll, source, u_strlen(source), &status);
545 if(U_FAILURE(status)){
546 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
547 myErrorName(status));
548 return;
549 }
550 backAndForth(iter);
551 ucol_closeElements(iter);
552 ucol_close(coll);
553
554 free(source);
555 }
556
557 /**
558 * Test for getOffset() and setOffset()
559 */
TestOffset()560 static void TestOffset()
561 {
562 UErrorCode status= U_ZERO_ERROR;
563 UCollator *en_us=NULL;
564 UCollationElements *iter, *pristine;
565 int32_t offset;
566 OrderAndOffset *orders;
567 int32_t orderLength=0;
568 int count = 0;
569 UChar test1[50];
570 UChar test2[50];
571
572 u_uastrcpy(test1, "What subset of all possible test cases?");
573 u_uastrcpy(test2, "has the highest probability of detecting");
574 en_us = ucol_open("en_US", &status);
575 log_verbose("Testing getOffset and setOffset for collations\n");
576 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
577 if(U_FAILURE(status)){
578 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
579 myErrorName(status));
580 ucol_close(en_us);
581 return;
582 }
583
584 /* testing boundaries */
585 ucol_setOffset(iter, 0, &status);
586 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
587 log_err("Error: After setting offset to 0, we should be at the end "
588 "of the backwards iteration");
589 }
590 ucol_setOffset(iter, u_strlen(test1), &status);
591 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
592 log_err("Error: After setting offset to end of the string, we should "
593 "be at the end of the backwards iteration");
594 }
595
596 /* Run all the way through the iterator, then get the offset */
597
598 orders = getOrders(iter, &orderLength);
599
600 offset = ucol_getOffset(iter);
601
602 if (offset != u_strlen(test1))
603 {
604 log_err("offset at end != length %d vs %d\n", offset,
605 u_strlen(test1) );
606 }
607
608 /* Now set the offset back to the beginning and see if it works */
609 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
610 if(U_FAILURE(status)){
611 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
612 myErrorName(status));
613 ucol_close(en_us);
614 return;
615 }
616 status = U_ZERO_ERROR;
617
618 ucol_setOffset(iter, 0, &status);
619 if (U_FAILURE(status))
620 {
621 log_err("setOffset failed. %s\n", myErrorName(status));
622 }
623 else
624 {
625 assertEqual(iter, pristine);
626 }
627
628 ucol_closeElements(pristine);
629 ucol_closeElements(iter);
630 free(orders);
631
632 /* testing offsets in normalization buffer */
633 test1[0] = 0x61;
634 test1[1] = 0x300;
635 test1[2] = 0x316;
636 test1[3] = 0x62;
637 test1[4] = 0;
638 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
639 iter = ucol_openElements(en_us, test1, 4, &status);
640 if(U_FAILURE(status)){
641 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
642 myErrorName(status));
643 ucol_close(en_us);
644 return;
645 }
646
647 count = 0;
648 while (ucol_next(iter, &status) != UCOL_NULLORDER &&
649 U_SUCCESS(status)) {
650 switch (count) {
651 case 0:
652 if (ucol_getOffset(iter) != 1) {
653 log_err("ERROR: Offset of iteration should be 1\n");
654 }
655 break;
656 case 3:
657 if (ucol_getOffset(iter) != 4) {
658 log_err("ERROR: Offset of iteration should be 4\n");
659 }
660 break;
661 default:
662 if (ucol_getOffset(iter) != 3) {
663 log_err("ERROR: Offset of iteration should be 3\n");
664 }
665 }
666 count ++;
667 }
668
669 ucol_reset(iter);
670 count = 0;
671 while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
672 U_SUCCESS(status)) {
673 switch (count) {
674 case 0:
675 case 1:
676 if (ucol_getOffset(iter) != 3) {
677 log_err("ERROR: Offset of iteration should be 3\n");
678 }
679 break;
680 case 2:
681 if (ucol_getOffset(iter) != 1) {
682 log_err("ERROR: Offset of iteration should be 1\n");
683 }
684 break;
685 default:
686 if (ucol_getOffset(iter) != 0) {
687 log_err("ERROR: Offset of iteration should be 0\n");
688 }
689 }
690 count ++;
691 }
692
693 if(U_FAILURE(status)){
694 log_err("ERROR: in iterating collation elements %s\n",
695 myErrorName(status));
696 }
697
698 ucol_closeElements(iter);
699 ucol_close(en_us);
700 }
701
702 /**
703 * Test for setText()
704 */
TestSetText()705 static void TestSetText()
706 {
707 int32_t c,i;
708 UErrorCode status = U_ZERO_ERROR;
709 UCollator *en_us=NULL;
710 UCollationElements *iter1, *iter2;
711 UChar test1[50];
712 UChar test2[50];
713
714 u_uastrcpy(test1, "What subset of all possible test cases?");
715 u_uastrcpy(test2, "has the highest probability of detecting");
716 en_us = ucol_open("en_US", &status);
717 log_verbose("testing setText for Collation elements\n");
718 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
719 if(U_FAILURE(status)){
720 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
721 myErrorName(status));
722 ucol_close(en_us);
723 return;
724 }
725 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
726 if(U_FAILURE(status)){
727 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
728 myErrorName(status));
729 ucol_close(en_us);
730 return;
731 }
732
733 /* Run through the second iterator just to exercise it */
734 c = ucol_next(iter2, &status);
735 i = 0;
736
737 while ( ++i < 10 && (c != UCOL_NULLORDER))
738 {
739 if (U_FAILURE(status))
740 {
741 log_err("iter2->next() returned an error. %s\n", myErrorName(status));
742 ucol_closeElements(iter2);
743 ucol_closeElements(iter1);
744 ucol_close(en_us);
745 return;
746 }
747
748 c = ucol_next(iter2, &status);
749 }
750
751 /* Now set it to point to the same string as the first iterator */
752 ucol_setText(iter2, test1, u_strlen(test1), &status);
753 if (U_FAILURE(status))
754 {
755 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
756 }
757 else
758 {
759 assertEqual(iter1, iter2);
760 }
761
762 /* Now set it to point to a null string with fake length*/
763 ucol_setText(iter2, NULL, 2, &status);
764 if (U_FAILURE(status))
765 {
766 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
767 }
768 else
769 {
770 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
771 log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
772 }
773 }
774
775 ucol_closeElements(iter2);
776 ucol_closeElements(iter1);
777 ucol_close(en_us);
778 }
779
780 /** @bug 4108762
781 * Test for getMaxExpansion()
782 */
TestMaxExpansion()783 static void TestMaxExpansion()
784 {
785 UErrorCode status = U_ZERO_ERROR;
786 UCollator *coll ;/*= ucol_open("en_US", &status);*/
787 UChar ch = 0;
788 UChar32 unassigned = 0xEFFFD;
789 UChar supplementary[2];
790 uint32_t stringOffset = 0;
791 UBool isError = FALSE;
792 uint32_t sorder = 0;
793 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
794 uint32_t temporder = 0;
795
796 UChar rule[256];
797 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
798 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
799 UCOL_DEFAULT_STRENGTH,NULL, &status);
800 if(U_SUCCESS(status) && coll) {
801 iter = ucol_openElements(coll, &ch, 1, &status);
802
803 while (ch < 0xFFFF && U_SUCCESS(status)) {
804 int count = 1;
805 uint32_t order;
806 int32_t size = 0;
807
808 ch ++;
809
810 ucol_setText(iter, &ch, 1, &status);
811 order = ucol_previous(iter, &status);
812
813 /* thai management */
814 if (order == 0)
815 order = ucol_previous(iter, &status);
816
817 while (U_SUCCESS(status) &&
818 ucol_previous(iter, &status) != UCOL_NULLORDER) {
819 count ++;
820 }
821
822 size = ucol_getMaxExpansion(iter, order);
823 if (U_FAILURE(status) || size < count) {
824 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
825 ch, count);
826 }
827 }
828
829 /* testing for exact max expansion */
830 ch = 0;
831 while (ch < 0x61) {
832 uint32_t order;
833 int32_t size;
834 ucol_setText(iter, &ch, 1, &status);
835 order = ucol_previous(iter, &status);
836 size = ucol_getMaxExpansion(iter, order);
837 if (U_FAILURE(status) || size != 1) {
838 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
839 ch, 1);
840 }
841 ch ++;
842 }
843
844 ch = 0x63;
845 ucol_setText(iter, &ch, 1, &status);
846 temporder = ucol_previous(iter, &status);
847
848 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
849 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
850 ch, 3);
851 }
852
853 ch = 0x64;
854 ucol_setText(iter, &ch, 1, &status);
855 temporder = ucol_previous(iter, &status);
856
857 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
858 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
859 ch, 3);
860 }
861
862 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
863 ucol_setText(iter, supplementary, 2, &status);
864 sorder = ucol_previous(iter, &status);
865
866 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
867 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
868 ch, 2);
869 }
870
871 /* testing jamo */
872 ch = 0x1165;
873
874 ucol_setText(iter, &ch, 1, &status);
875 temporder = ucol_previous(iter, &status);
876 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
877 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
878 ch, 3);
879 }
880
881 ucol_closeElements(iter);
882 ucol_close(coll);
883
884 /* testing special jamo &a<\u1160 */
885 rule[0] = 0x26;
886 rule[1] = 0x71;
887 rule[2] = 0x3c;
888 rule[3] = 0x1165;
889 rule[4] = 0x2f;
890 rule[5] = 0x71;
891 rule[6] = 0x71;
892 rule[7] = 0x71;
893 rule[8] = 0x71;
894 rule[9] = 0;
895
896 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
897 UCOL_DEFAULT_STRENGTH,NULL, &status);
898 iter = ucol_openElements(coll, &ch, 1, &status);
899
900 temporder = ucol_previous(iter, &status);
901 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
902 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
903 ch, 5);
904 }
905
906 ucol_closeElements(iter);
907 ucol_close(coll);
908 } else {
909 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
910 }
911
912 }
913
914
assertEqual(UCollationElements * i1,UCollationElements * i2)915 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
916 {
917 int32_t c1, c2;
918 int32_t count = 0;
919 UErrorCode status = U_ZERO_ERROR;
920
921 do
922 {
923 c1 = ucol_next(i1, &status);
924 c2 = ucol_next(i2, &status);
925
926 if (c1 != c2)
927 {
928 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
929 break;
930 }
931
932 count += 1;
933 }
934 while (c1 != UCOL_NULLORDER);
935 }
936
937 /**
938 * Testing iterators with extremely small buffers
939 */
TestSmallBuffer()940 static void TestSmallBuffer()
941 {
942 UErrorCode status = U_ZERO_ERROR;
943 UCollator *coll;
944 UCollationElements *testiter,
945 *iter;
946 int32_t count = 0;
947 OrderAndOffset *testorders,
948 *orders;
949
950 UChar teststr[500];
951 UChar str[] = {0x300, 0x31A, 0};
952 /*
953 creating a long string of decomposable characters,
954 since by default the writable buffer is of size 256
955 */
956 while (count < 500) {
957 if ((count & 1) == 0) {
958 teststr[count ++] = 0x300;
959 }
960 else {
961 teststr[count ++] = 0x31A;
962 }
963 }
964
965 coll = ucol_open("th_TH", &status);
966 if(U_SUCCESS(status) && coll) {
967 testiter = ucol_openElements(coll, teststr, 500, &status);
968 iter = ucol_openElements(coll, str, 2, &status);
969
970 orders = getOrders(iter, &count);
971 if (count != 2) {
972 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
973 }
974
975 /*
976 this will rearrange the string data to 250 characters of 0x300 first then
977 250 characters of 0x031A
978 */
979 testorders = getOrders(testiter, &count);
980
981 if (count != 500) {
982 log_err("Error decomposition does not give the right sized collation elements\n");
983 }
984
985 while (count != 0) {
986 /* UCA collation element for 0x0F76 */
987 if ((count > 250 && testorders[-- count].order != orders[1].order) ||
988 (count <= 250 && testorders[-- count].order != orders[0].order)) {
989 log_err("Error decomposition does not give the right collation element at %d count\n", count);
990 break;
991 }
992 }
993
994 free(testorders);
995 free(orders);
996
997 ucol_reset(testiter);
998
999 /* ensures closing of elements done properly to clear writable buffer */
1000 ucol_next(testiter, &status);
1001 ucol_next(testiter, &status);
1002 ucol_closeElements(testiter);
1003 ucol_closeElements(iter);
1004 ucol_close(coll);
1005 } else {
1006 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1007 }
1008 }
1009
1010 /**
1011 * Sniplets of code from genuca
1012 */
hex2num(char hex)1013 static int32_t hex2num(char hex) {
1014 if(hex>='0' && hex <='9') {
1015 return hex-'0';
1016 } else if(hex>='a' && hex<='f') {
1017 return hex-'a'+10;
1018 } else if(hex>='A' && hex<='F') {
1019 return hex-'A'+10;
1020 } else {
1021 return 0;
1022 }
1023 }
1024
1025 /**
1026 * Getting codepoints from a string
1027 * @param str character string contain codepoints seperated by space and ended
1028 * by a semicolon
1029 * @param codepoints array for storage, assuming size > 5
1030 * @return position at the end of the codepoint section
1031 */
getCodePoints(char * str,UChar * codepoints,UChar * contextCPs)1032 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
1033 UErrorCode errorCode = U_ZERO_ERROR;
1034 char *semi = uprv_strchr(str, ';');
1035 char *pipe = uprv_strchr(str, '|');
1036 char *s;
1037 *codepoints = 0;
1038 *contextCPs = 0;
1039 if(semi == NULL) {
1040 log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
1041 return str;
1042 }
1043 if(pipe != NULL) {
1044 int32_t contextLength;
1045 *pipe = 0;
1046 contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
1047 *pipe = '|';
1048 if(U_FAILURE(errorCode)) {
1049 log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
1050 return str;
1051 }
1052 /* prepend the precontext string to the codepoints */
1053 u_memcpy(codepoints, contextCPs, contextLength);
1054 codepoints += contextLength;
1055 /* start of the code point string */
1056 s = pipe + 1;
1057 } else {
1058 s = str;
1059 }
1060 u_parseString(s, codepoints, 99, NULL, &errorCode);
1061 if(U_FAILURE(errorCode)) {
1062 log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
1063 return str;
1064 }
1065 return semi + 1;
1066 }
1067
1068 /**
1069 * Sniplets of code from genuca
1070 */
1071 static int32_t
readElement(char ** from,char * to,char separator,UErrorCode * status)1072 readElement(char **from, char *to, char separator, UErrorCode *status)
1073 {
1074 if (U_SUCCESS(*status)) {
1075 char buffer[1024];
1076 int32_t i = 0;
1077 while (**from != separator) {
1078 if (**from != ' ') {
1079 *(buffer+i++) = **from;
1080 }
1081 (*from)++;
1082 }
1083 (*from)++;
1084 *(buffer + i) = 0;
1085 strcpy(to, buffer);
1086 return i/2;
1087 }
1088
1089 return 0;
1090 }
1091
1092 /**
1093 * Sniplets of code from genuca
1094 */
1095 static uint32_t
getSingleCEValue(char * primary,char * secondary,char * tertiary,UErrorCode * status)1096 getSingleCEValue(char *primary, char *secondary, char *tertiary,
1097 UErrorCode *status)
1098 {
1099 if (U_SUCCESS(*status)) {
1100 uint32_t value = 0;
1101 char primsave = '\0';
1102 char secsave = '\0';
1103 char tersave = '\0';
1104 char *primend = primary+4;
1105 char *secend = secondary+2;
1106 char *terend = tertiary+2;
1107 uint32_t primvalue;
1108 uint32_t secvalue;
1109 uint32_t tervalue;
1110
1111 if (uprv_strlen(primary) > 4) {
1112 primsave = *primend;
1113 *primend = '\0';
1114 }
1115
1116 if (uprv_strlen(secondary) > 2) {
1117 secsave = *secend;
1118 *secend = '\0';
1119 }
1120
1121 if (uprv_strlen(tertiary) > 2) {
1122 tersave = *terend;
1123 *terend = '\0';
1124 }
1125
1126 primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1127 secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1128 tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1129 if(primvalue <= 0xFF) {
1130 primvalue <<= 8;
1131 }
1132
1133 value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1134 | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1135 | (tervalue & UCOL_TERTIARYORDERMASK);
1136
1137 if(primsave!='\0') {
1138 *primend = primsave;
1139 }
1140 if(secsave!='\0') {
1141 *secend = secsave;
1142 }
1143 if(tersave!='\0') {
1144 *terend = tersave;
1145 }
1146 return value;
1147 }
1148 return 0;
1149 }
1150
1151 /**
1152 * Getting collation elements generated from a string
1153 * @param str character string contain collation elements contained in [] and
1154 * seperated by space
1155 * @param ce array for storage, assuming size > 20
1156 * @param status error status
1157 * @return position at the end of the codepoint section
1158 */
getCEs(char * str,uint32_t * ces,UErrorCode * status)1159 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1160 char *pStartCP = uprv_strchr(str, '[');
1161 int count = 0;
1162 char *pEndCP;
1163 char primary[100];
1164 char secondary[100];
1165 char tertiary[100];
1166
1167 while (*pStartCP == '[') {
1168 uint32_t primarycount = 0;
1169 uint32_t secondarycount = 0;
1170 uint32_t tertiarycount = 0;
1171 uint32_t CEi = 1;
1172 pEndCP = strchr(pStartCP, ']');
1173 if(pEndCP == NULL) {
1174 break;
1175 }
1176 pStartCP ++;
1177
1178 primarycount = readElement(&pStartCP, primary, ',', status);
1179 secondarycount = readElement(&pStartCP, secondary, ',', status);
1180 tertiarycount = readElement(&pStartCP, tertiary, ']', status);
1181
1182 /* I want to get the CEs entered right here, including continuation */
1183 ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1184 if (U_FAILURE(*status)) {
1185 break;
1186 }
1187
1188 while (2 * CEi < primarycount || CEi < secondarycount ||
1189 CEi < tertiarycount) {
1190 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1191 if (2 * CEi < primarycount) {
1192 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1193 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1194 }
1195
1196 if (2 * CEi + 1 < primarycount) {
1197 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1198 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1199 }
1200
1201 if (CEi < secondarycount) {
1202 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1203 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1204 }
1205
1206 if (CEi < tertiarycount) {
1207 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1208 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1209 }
1210
1211 CEi ++;
1212 ces[count ++] = value;
1213 }
1214
1215 pStartCP = pEndCP + 1;
1216 }
1217 ces[count] = 0;
1218 return pStartCP;
1219 }
1220
1221 /**
1222 * Getting the FractionalUCA.txt file stream
1223 */
getFractionalUCA(void)1224 static FileStream * getFractionalUCA(void)
1225 {
1226 char newPath[256];
1227 char backupPath[256];
1228 FileStream *result = NULL;
1229
1230 /* Look inside ICU_DATA first */
1231 uprv_strcpy(newPath, ctest_dataSrcDir());
1232 uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1233 uprv_strcat(newPath, "FractionalUCA.txt");
1234
1235 /* As a fallback, try to guess where the source data was located
1236 * at the time ICU was built, and look there.
1237 */
1238 #if defined (U_TOPSRCDIR)
1239 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
1240 #else
1241 {
1242 UErrorCode errorCode = U_ZERO_ERROR;
1243 strcpy(backupPath, loadTestData(&errorCode));
1244 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1245 }
1246 #endif
1247 strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1248
1249 result = T_FileStream_open(newPath, "rb");
1250
1251 if (result == NULL) {
1252 result = T_FileStream_open(backupPath, "rb");
1253 if (result == NULL) {
1254 log_err("Failed to open either %s or %s\n", newPath, backupPath);
1255 }
1256 }
1257 return result;
1258 }
1259
1260 /**
1261 * Testing the CEs returned by the iterator
1262 */
TestCEs()1263 static void TestCEs() {
1264 FileStream *file = NULL;
1265 char line[2048];
1266 char *str;
1267 UChar codepoints[10];
1268 uint32_t ces[20];
1269 UErrorCode status = U_ZERO_ERROR;
1270 UCollator *coll = ucol_open("", &status);
1271 uint32_t lineNo = 0;
1272 UChar contextCPs[5];
1273
1274 if (U_FAILURE(status)) {
1275 log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
1276 return;
1277 }
1278
1279 file = getFractionalUCA();
1280
1281 if (file == NULL) {
1282 log_err("*** unable to open input FractionalUCA.txt file ***\n");
1283 return;
1284 }
1285
1286
1287 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1288 int count = 0;
1289 UCollationElements *iter;
1290 int32_t preContextCeLen=0;
1291 lineNo++;
1292 /* skip this line if it is empty or a comment or is a return value
1293 or start of some variable section */
1294 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1295 line[0] == 0x000D || line[0] == '[') {
1296 continue;
1297 }
1298
1299 str = getCodePoints(line, codepoints, contextCPs);
1300
1301 /* these are 'fake' codepoints in the fractional UCA, and are used just
1302 * for positioning of indirect values. They should not go through this
1303 * test.
1304 */
1305 if(*codepoints == 0xFDD0) {
1306 continue;
1307 }
1308 if (*contextCPs != 0) {
1309 iter = ucol_openElements(coll, contextCPs, -1, &status);
1310 if (U_FAILURE(status)) {
1311 log_err("Error in opening collation elements\n");
1312 break;
1313 }
1314 while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
1315 preContextCeLen++;
1316 }
1317 ucol_closeElements(iter);
1318 }
1319
1320 getCEs(str, ces+preContextCeLen, &status);
1321 if (U_FAILURE(status)) {
1322 log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1323 break;
1324 }
1325 iter = ucol_openElements(coll, codepoints, -1, &status);
1326 if (U_FAILURE(status)) {
1327 log_err("Error in opening collation elements\n");
1328 break;
1329 }
1330 for (;;) {
1331 uint32_t ce = (uint32_t)ucol_next(iter, &status);
1332 if (ce == 0xFFFFFFFF) {
1333 ce = 0;
1334 }
1335 /* we now unconditionally reorder Thai/Lao prevowels, so this
1336 * test would fail if we don't skip here.
1337 */
1338 if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1339 continue;
1340 }
1341 if (ce != ces[count] || U_FAILURE(status)) {
1342 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1343 break;
1344 }
1345 if (ces[count] == 0) {
1346 break;
1347 }
1348 count ++;
1349 }
1350 ucol_closeElements(iter);
1351 }
1352
1353 T_FileStream_close(file);
1354 ucol_close(coll);
1355 }
1356
1357 /**
1358 * Testing the discontigous contractions
1359 */
TestDiscontiguos()1360 static void TestDiscontiguos() {
1361 const char *rulestr =
1362 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1363 UChar rule[50];
1364 int rulelen = u_unescape(rulestr, rule, 50);
1365 const char *src[] = {
1366 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1367 /* base character blocked */
1368 "XD\\u0300", "XD\\u0300\\u0315",
1369 /* non blocking combining character */
1370 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1371 /* blocking combining character */
1372 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1373 /* contraction prefix */
1374 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1375 "X\\u0300\\u031A\\u0315",
1376 /* ends not with a contraction character */
1377 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1378 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1379 };
1380 const char *tgt[] = {
1381 /* non blocking combining character */
1382 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1383 /* base character blocked */
1384 "X D \\u0300", "X D \\u0300\\u0315",
1385 /* non blocking combining character */
1386 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1387 /* blocking combining character */
1388 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1389 /* contraction prefix */
1390 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1391 "X\\u0300 \\u031A \\u0315",
1392 /* ends not with a contraction character */
1393 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1394 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1395 };
1396 int size = 20;
1397 UCollator *coll;
1398 UErrorCode status = U_ZERO_ERROR;
1399 int count = 0;
1400 UCollationElements *iter;
1401 UCollationElements *resultiter;
1402
1403 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1404 iter = ucol_openElements(coll, rule, 1, &status);
1405 resultiter = ucol_openElements(coll, rule, 1, &status);
1406
1407 if (U_FAILURE(status)) {
1408 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1409 return;
1410 }
1411
1412 while (count < size) {
1413 UChar str[20];
1414 UChar tstr[20];
1415 int strLen = u_unescape(src[count], str, 20);
1416 UChar *s;
1417
1418 ucol_setText(iter, str, strLen, &status);
1419 if (U_FAILURE(status)) {
1420 log_err("Error opening collation iterator\n");
1421 return;
1422 }
1423
1424 u_unescape(tgt[count], tstr, 20);
1425 s = tstr;
1426
1427 log_verbose("count %d\n", count);
1428
1429 for (;;) {
1430 uint32_t ce;
1431 UChar *e = u_strchr(s, 0x20);
1432 if (e == 0) {
1433 e = u_strchr(s, 0);
1434 }
1435 ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1436 ce = ucol_next(resultiter, &status);
1437 if (U_FAILURE(status)) {
1438 log_err("Error manipulating collation iterator\n");
1439 return;
1440 }
1441 while (ce != UCOL_NULLORDER) {
1442 if (ce != (uint32_t)ucol_next(iter, &status) ||
1443 U_FAILURE(status)) {
1444 log_err("Discontiguos contraction test mismatch\n");
1445 return;
1446 }
1447 ce = ucol_next(resultiter, &status);
1448 if (U_FAILURE(status)) {
1449 log_err("Error getting next collation element\n");
1450 return;
1451 }
1452 }
1453 s = e + 1;
1454 if (*e == 0) {
1455 break;
1456 }
1457 }
1458 ucol_reset(iter);
1459 backAndForth(iter);
1460 count ++;
1461 }
1462 ucol_closeElements(resultiter);
1463 ucol_closeElements(iter);
1464 ucol_close(coll);
1465 }
1466
TestCEBufferOverflow()1467 static void TestCEBufferOverflow()
1468 {
1469 UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1470 UErrorCode status = U_ZERO_ERROR;
1471 UChar rule[10];
1472 UCollator *coll;
1473 UCollationElements *iter;
1474
1475 u_uastrcpy(rule, "&z < AB");
1476 coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1477 if (U_FAILURE(status)) {
1478 log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
1479 return;
1480 }
1481
1482 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1483 test. this will cause an overflow in getPrev */
1484 str[0] = 0x0041; /* 'A' */
1485 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1486 uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1487 str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */
1488 iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1489 &status);
1490 if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
1491 status == U_BUFFER_OVERFLOW_ERROR) {
1492 log_err("CE buffer should not overflow with long string of trail surrogates\n");
1493 }
1494 ucol_closeElements(iter);
1495 ucol_close(coll);
1496 }
1497
1498 /**
1499 * Checking collation element validity.
1500 */
1501 #define MAX_CODEPOINTS_TO_SHOW 10
showCodepoints(const UChar * codepoints,int length,char * codepointText)1502 static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
1503 int i, lengthToUse = length;
1504 if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
1505 lengthToUse = MAX_CODEPOINTS_TO_SHOW;
1506 }
1507 for (i = 0; i < lengthToUse; ++i) {
1508 int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
1509 if (bytesWritten <= 0) {
1510 break;
1511 }
1512 codepointText += bytesWritten;
1513 }
1514 if (i < length) {
1515 sprintf(codepointText, " ...");
1516 }
1517 }
1518
checkCEValidity(const UCollator * coll,const UChar * codepoints,int length)1519 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1520 int length)
1521 {
1522 UErrorCode status = U_ZERO_ERROR;
1523 UCollationElements *iter = ucol_openElements(coll, codepoints, length,
1524 &status);
1525 UBool result = FALSE;
1526 UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
1527 const char * collLocale;
1528
1529 if (U_FAILURE(status)) {
1530 log_err("Error creating iterator for testing validity\n");
1531 return FALSE;
1532 }
1533 collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
1534 if (U_FAILURE(status) || collLocale==NULL) {
1535 status = U_ZERO_ERROR;
1536 collLocale = "?";
1537 }
1538
1539 for (;;) {
1540 uint32_t ce = ucol_next(iter, &status);
1541 uint32_t primary, p1, p2, secondary, tertiary;
1542 if (ce == UCOL_NULLORDER) {
1543 result = TRUE;
1544 break;
1545 }
1546 if (ce == 0) {
1547 continue;
1548 }
1549 if (ce == 0x02000202) {
1550 /* special CE for merge-sort character */
1551 if (*codepoints == 0xFFFE /* && length == 1 */) {
1552 /*
1553 * Note: We should check for length==1 but the token parser appears
1554 * to give us trailing NUL characters.
1555 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
1556 * rather than the internal collation rule parser
1557 */
1558 continue;
1559 } else {
1560 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
1561 (int)*codepoints, (int)length);
1562 break;
1563 }
1564 }
1565 primary = UCOL_PRIMARYORDER(ce);
1566 p1 = primary >> 8;
1567 p2 = primary & 0xFF;
1568 secondary = UCOL_SECONDARYORDER(ce);
1569 tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
1570
1571 if (!isContinuation(ce)) {
1572 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1573 log_err("Empty CE %08lX except for case bits\n", (long)ce);
1574 break;
1575 }
1576 if (p1 == 0) {
1577 if (p2 != 0) {
1578 log_err("Primary 00 xx in %08lX\n", (long)ce);
1579 break;
1580 }
1581 primaryDone = TRUE;
1582 } else {
1583 if (p1 <= 2 || p1 >= 0xF0) {
1584 /* Primary first bytes F0..FF are specials. */
1585 log_err("Primary first byte of %08lX out of range\n", (long)ce);
1586 break;
1587 }
1588 if (p2 == 0) {
1589 primaryDone = TRUE;
1590 } else {
1591 if (p2 <= 3 || p2 >= 0xFF) {
1592 /* Primary second bytes 03 and FF are sort key compression terminators. */
1593 log_err("Primary second byte of %08lX out of range\n", (long)ce);
1594 break;
1595 }
1596 primaryDone = FALSE;
1597 }
1598 }
1599 if (secondary == 0) {
1600 if (primary != 0) {
1601 log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
1602 break;
1603 }
1604 secondaryDone = TRUE;
1605 } else {
1606 if (secondary <= 2 ||
1607 (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
1608 ) {
1609 /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
1610 log_err("Secondary byte of %08lX out of range\n", (long)ce);
1611 break;
1612 }
1613 secondaryDone = FALSE;
1614 }
1615 if (tertiary == 0) {
1616 /* We know that ce != 0. */
1617 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
1618 break;
1619 }
1620 if (tertiary <= 2) {
1621 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1622 break;
1623 }
1624 tertiaryDone = FALSE;
1625 } else {
1626 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1627 log_err("Empty continuation %08lX\n", (long)ce);
1628 break;
1629 }
1630 if (primaryDone && primary != 0) {
1631 log_err("Primary was done but continues in %08lX\n", (long)ce);
1632 break;
1633 }
1634 if (p1 == 0) {
1635 if (p2 != 0) {
1636 log_err("Primary 00 xx in %08lX\n", (long)ce);
1637 break;
1638 }
1639 primaryDone = TRUE;
1640 } else {
1641 if (p1 <= 2) {
1642 log_err("Primary first byte of %08lX out of range\n", (long)ce);
1643 break;
1644 }
1645 if (p2 == 0) {
1646 primaryDone = TRUE;
1647 } else {
1648 if (p2 <= 3) {
1649 log_err("Primary second byte of %08lX out of range\n", (long)ce);
1650 break;
1651 }
1652 }
1653 }
1654 if (secondaryDone && secondary != 0) {
1655 log_err("Secondary was done but continues in %08lX\n", (long)ce);
1656 break;
1657 }
1658 if (secondary == 0) {
1659 secondaryDone = TRUE;
1660 } else {
1661 if (secondary <= 2) {
1662 log_err("Secondary byte of %08lX out of range\n", (long)ce);
1663 break;
1664 }
1665 }
1666 if (tertiaryDone && tertiary != 0) {
1667 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
1668 break;
1669 }
1670 if (tertiary == 0) {
1671 tertiaryDone = TRUE;
1672 } else if (tertiary <= 2) {
1673 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1674 break;
1675 }
1676 }
1677 }
1678 if (!result) {
1679 char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
1680 showCodepoints(codepoints, length, codepointText);
1681 log_err("Locale: %s Code point string: %s\n", collLocale, codepointText);
1682 }
1683 ucol_closeElements(iter);
1684 return result;
1685 }
1686
TestCEValidity()1687 static void TestCEValidity()
1688 {
1689 /* testing UCA collation elements */
1690 UErrorCode status = U_ZERO_ERROR;
1691 /* en_US has no tailorings */
1692 UCollator *coll = ucol_open("root", &status);
1693 /* tailored locales */
1694 char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1695 const char *loc;
1696 FileStream *file = NULL;
1697 char line[2048];
1698 UChar codepoints[11];
1699 int count = 0;
1700 int maxCount = 0;
1701 UChar contextCPs[3];
1702 UChar32 c;
1703 UParseError parseError;
1704 if (U_FAILURE(status)) {
1705 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1706 return;
1707 }
1708 log_verbose("Testing UCA elements\n");
1709 file = getFractionalUCA();
1710 if (file == NULL) {
1711 log_err("Fractional UCA data can not be opened\n");
1712 return;
1713 }
1714
1715 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1716 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1717 line[0] == 0x000D || line[0] == '[') {
1718 continue;
1719 }
1720
1721 getCodePoints(line, codepoints, contextCPs);
1722 checkCEValidity(coll, codepoints, u_strlen(codepoints));
1723 }
1724
1725 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1726 for (c = 0; c <= 0xffff; ++c) {
1727 if (u_isdefined(c)) {
1728 codepoints[0] = (UChar)c;
1729 checkCEValidity(coll, codepoints, 1);
1730 }
1731 }
1732 for (; c <= 0x10ffff; ++c) {
1733 if (u_isdefined(c)) {
1734 int32_t i = 0;
1735 U16_APPEND_UNSAFE(codepoints, i, c);
1736 checkCEValidity(coll, codepoints, i);
1737 }
1738 }
1739
1740 ucol_close(coll);
1741
1742 /* testing tailored collation elements */
1743 log_verbose("Testing tailored elements\n");
1744 if(getTestOption(QUICK_OPTION)) {
1745 maxCount = sizeof(locale)/sizeof(locale[0]);
1746 } else {
1747 maxCount = uloc_countAvailable();
1748 }
1749 while (count < maxCount) {
1750 const UChar *rules = NULL,
1751 *current = NULL;
1752 UChar *rulesCopy = NULL;
1753 int32_t ruleLen = 0;
1754
1755 uint32_t chOffset = 0;
1756 uint32_t chLen = 0;
1757 uint32_t exOffset = 0;
1758 uint32_t exLen = 0;
1759 uint32_t prefixOffset = 0;
1760 uint32_t prefixLen = 0;
1761 UBool startOfRules = TRUE;
1762 UColOptionSet opts;
1763
1764 UColTokenParser src;
1765 uint32_t strength = 0;
1766 uint16_t specs = 0;
1767 if(getTestOption(QUICK_OPTION)) {
1768 loc = locale[count];
1769 } else {
1770 loc = uloc_getAvailable(count);
1771 if(!hasCollationElements(loc)) {
1772 count++;
1773 continue;
1774 }
1775 }
1776
1777 uprv_memset(&src, 0, sizeof(UColTokenParser));
1778
1779 log_verbose("Testing CEs for %s\n", loc);
1780
1781 coll = ucol_open(loc, &status);
1782 if (U_FAILURE(status)) {
1783 log_err("%s collator creation failed\n", loc);
1784 return;
1785 }
1786
1787 src.opts = &opts;
1788 rules = ucol_getRules(coll, &ruleLen);
1789
1790 if (ruleLen > 0) {
1791 rulesCopy = (UChar *)uprv_malloc((ruleLen +
1792 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1793 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1794 src.current = src.source = rulesCopy;
1795 src.end = rulesCopy + ruleLen;
1796 src.extraCurrent = src.end;
1797 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1798
1799 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1800 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1801 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
1802 strength = src.parsedToken.strength;
1803 chOffset = src.parsedToken.charsOffset;
1804 chLen = src.parsedToken.charsLen;
1805 exOffset = src.parsedToken.extensionOffset;
1806 exLen = src.parsedToken.extensionLen;
1807 prefixOffset = src.parsedToken.prefixOffset;
1808 prefixLen = src.parsedToken.prefixLen;
1809 specs = src.parsedToken.flags;
1810
1811 startOfRules = FALSE;
1812 uprv_memcpy(codepoints, src.source + chOffset,
1813 chLen * sizeof(UChar));
1814 codepoints[chLen] = 0;
1815 checkCEValidity(coll, codepoints, chLen);
1816 }
1817 uprv_free(src.source);
1818 }
1819
1820 ucol_close(coll);
1821 count ++;
1822 }
1823 T_FileStream_close(file);
1824 }
1825
printSortKeyError(const UChar * codepoints,int length,uint8_t * sortkey,int sklen)1826 static void printSortKeyError(const UChar *codepoints, int length,
1827 uint8_t *sortkey, int sklen)
1828 {
1829 int count = 0;
1830 log_err("Sortkey not valid for ");
1831 while (length > 0) {
1832 log_err("0x%04x ", *codepoints);
1833 length --;
1834 codepoints ++;
1835 }
1836 log_err("\nSortkey : ");
1837 while (count < sklen) {
1838 log_err("0x%02x ", sortkey[count]);
1839 count ++;
1840 }
1841 log_err("\n");
1842 }
1843
1844 /**
1845 * Checking sort key validity for all levels
1846 */
checkSortKeyValidity(UCollator * coll,const UChar * codepoints,int length)1847 static UBool checkSortKeyValidity(UCollator *coll,
1848 const UChar *codepoints,
1849 int length)
1850 {
1851 UErrorCode status = U_ZERO_ERROR;
1852 UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1853 UCOL_TERTIARY, UCOL_QUATERNARY,
1854 UCOL_IDENTICAL};
1855 int strengthlen = 5;
1856 int strengthIndex = 0;
1857 int caselevel = 0;
1858
1859 while (caselevel < 1) {
1860 if (caselevel == 0) {
1861 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1862 }
1863 else {
1864 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1865 }
1866
1867 while (strengthIndex < strengthlen) {
1868 int count01 = 0;
1869 uint32_t count = 0;
1870 uint8_t sortkey[128];
1871 uint32_t sklen;
1872
1873 ucol_setStrength(coll, strength[strengthIndex]);
1874 sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1875 while (sortkey[count] != 0) {
1876 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
1877 printSortKeyError(codepoints, length, sortkey, sklen);
1878 return FALSE;
1879 }
1880 if (sortkey[count] == 1) {
1881 count01 ++;
1882 }
1883 count ++;
1884 }
1885
1886 if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
1887 printSortKeyError(codepoints, length, sortkey, sklen);
1888 return FALSE;
1889 }
1890 strengthIndex ++;
1891 }
1892 caselevel ++;
1893 }
1894 return TRUE;
1895 }
1896
TestSortKeyValidity(void)1897 static void TestSortKeyValidity(void)
1898 {
1899 /* testing UCA collation elements */
1900 UErrorCode status = U_ZERO_ERROR;
1901 /* en_US has no tailorings */
1902 UCollator *coll = ucol_open("en_US", &status);
1903 /* tailored locales */
1904 char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1905 FileStream *file = NULL;
1906 char line[2048];
1907 UChar codepoints[10];
1908 int count = 0;
1909 UChar contextCPs[5];
1910 UParseError parseError;
1911 if (U_FAILURE(status)) {
1912 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1913 return;
1914 }
1915 log_verbose("Testing UCA elements\n");
1916 file = getFractionalUCA();
1917 if (file == NULL) {
1918 log_err("Fractional UCA data can not be opened\n");
1919 return;
1920 }
1921
1922 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1923 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1924 line[0] == 0x000D || line[0] == '[') {
1925 continue;
1926 }
1927
1928 getCodePoints(line, codepoints, contextCPs);
1929 if(codepoints[0] == 0xFFFE) {
1930 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
1931 continue;
1932 }
1933 checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1934 }
1935
1936 log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1937 codepoints[0] = 0;
1938
1939 while (codepoints[0] < 0xFFFF) {
1940 if (u_isdefined((UChar32)codepoints[0])) {
1941 checkSortKeyValidity(coll, codepoints, 1);
1942 }
1943 codepoints[0] ++;
1944 }
1945
1946 ucol_close(coll);
1947
1948 /* testing tailored collation elements */
1949 log_verbose("Testing tailored elements\n");
1950 while (count < 5) {
1951 const UChar *rules = NULL,
1952 *current = NULL;
1953 UChar *rulesCopy = NULL;
1954 int32_t ruleLen = 0;
1955
1956 uint32_t chOffset = 0;
1957 uint32_t chLen = 0;
1958 uint32_t exOffset = 0;
1959 uint32_t exLen = 0;
1960 uint32_t prefixOffset = 0;
1961 uint32_t prefixLen = 0;
1962 UBool startOfRules = TRUE;
1963 UColOptionSet opts;
1964
1965 UColTokenParser src;
1966 uint32_t strength = 0;
1967 uint16_t specs = 0;
1968
1969 uprv_memset(&src, 0, sizeof(UColTokenParser));
1970
1971 coll = ucol_open(locale[count], &status);
1972 if (U_FAILURE(status)) {
1973 log_err("%s collator creation failed\n", locale[count]);
1974 return;
1975 }
1976
1977 src.opts = &opts;
1978 rules = ucol_getRules(coll, &ruleLen);
1979
1980 if (ruleLen > 0) {
1981 rulesCopy = (UChar *)uprv_malloc((ruleLen +
1982 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1983 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1984 src.current = src.source = rulesCopy;
1985 src.end = rulesCopy + ruleLen;
1986 src.extraCurrent = src.end;
1987 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1988
1989 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1990 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1991 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
1992 strength = src.parsedToken.strength;
1993 chOffset = src.parsedToken.charsOffset;
1994 chLen = src.parsedToken.charsLen;
1995 exOffset = src.parsedToken.extensionOffset;
1996 exLen = src.parsedToken.extensionLen;
1997 prefixOffset = src.parsedToken.prefixOffset;
1998 prefixLen = src.parsedToken.prefixLen;
1999 specs = src.parsedToken.flags;
2000
2001 startOfRules = FALSE;
2002 uprv_memcpy(codepoints, src.source + chOffset,
2003 chLen * sizeof(UChar));
2004 codepoints[chLen] = 0;
2005 if(codepoints[0] == 0xFFFE) {
2006 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
2007 continue;
2008 }
2009 checkSortKeyValidity(coll, codepoints, chLen);
2010 }
2011 uprv_free(src.source);
2012 }
2013
2014 ucol_close(coll);
2015 count ++;
2016 }
2017 T_FileStream_close(file);
2018 }
2019
2020 #endif /* #if !UCONFIG_NO_COLLATION */
2021