1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File CITERTST.C
11 *
12 * Modification History:
13 * Date Name Description
14 * Madhu Katragadda Ported for C API
15 * 02/19/01 synwee Modified test case for new collation iterator
16 *********************************************************************************/
17 /*
18 * Collation Iterator tests.
19 * (Let me reiterate my position...)
20 */
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_COLLATION
25
26 #include "unicode/ucol.h"
27 #include "unicode/ucoleitr.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utf16.h"
32 #include "unicode/putil.h"
33 #include "callcoll.h"
34 #include "cmemory.h"
35 #include "cintltst.h"
36 #include "citertst.h"
37 #include "ccolltst.h"
38 #include "filestrm.h"
39 #include "cstring.h"
40 #include "ucol_imp.h"
41 #include "uparse.h"
42 #include <stdio.h>
43
44 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
45
addCollIterTest(TestNode ** root)46 void addCollIterTest(TestNode** root)
47 {
48 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
49 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
50 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
51 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
52 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
53 addTest(root, &TestNormalizedUnicodeChar,
54 "tscoll/citertst/TestNormalizedUnicodeChar");
55 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
56 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
57 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
58 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
59 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
60 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
61 }
62
63 /* The locales we support */
64
65 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
66
TestBug672()67 static void TestBug672() {
68 UErrorCode status = U_ZERO_ERROR;
69 UChar pattern[20];
70 UChar text[50];
71 int i;
72 int result[3][3];
73
74 u_uastrcpy(pattern, "resume");
75 u_uastrcpy(text, "Time to resume updating my resume.");
76
77 for (i = 0; i < 3; ++ i) {
78 UCollator *coll = ucol_open(LOCALES[i], &status);
79 UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
80 &status);
81 UCollationElements *titer = ucol_openElements(coll, text, -1,
82 &status);
83 if (U_FAILURE(status)) {
84 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
85 myErrorName(status));
86 return;
87 }
88
89 log_verbose("locale tested %s\n", LOCALES[i]);
90
91 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
92 U_SUCCESS(status)) {
93 }
94 if (U_FAILURE(status)) {
95 log_err("ERROR: reversing collation iterator :%s\n",
96 myErrorName(status));
97 return;
98 }
99 ucol_reset(pitr);
100
101 ucol_setOffset(titer, u_strlen(pattern), &status);
102 if (U_FAILURE(status)) {
103 log_err("ERROR: setting offset in collator :%s\n",
104 myErrorName(status));
105 return;
106 }
107 result[i][0] = ucol_getOffset(titer);
108 log_verbose("Text iterator set to offset %d\n", result[i][0]);
109
110 /* Use previous() */
111 ucol_previous(titer, &status);
112 result[i][1] = ucol_getOffset(titer);
113 log_verbose("Current offset %d after previous\n", result[i][1]);
114
115 /* Add one to index */
116 log_verbose("Adding one to current offset...\n");
117 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
118 if (U_FAILURE(status)) {
119 log_err("ERROR: setting offset in collator :%s\n",
120 myErrorName(status));
121 return;
122 }
123 result[i][2] = ucol_getOffset(titer);
124 log_verbose("Current offset in text = %d\n", result[i][2]);
125 ucol_closeElements(pitr);
126 ucol_closeElements(titer);
127 ucol_close(coll);
128 }
129
130 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
131 uprv_memcmp(result[1], result[2], 3) != 0) {
132 log_err("ERROR: Different locales have different offsets at the same character\n");
133 }
134 }
135
136
137
138 /* Running this test with normalization enabled showed up a bug in the incremental
139 normalization code. */
TestBug672Normalize()140 static void TestBug672Normalize() {
141 UErrorCode status = U_ZERO_ERROR;
142 UChar pattern[20];
143 UChar text[50];
144 int i;
145 int result[3][3];
146
147 u_uastrcpy(pattern, "resume");
148 u_uastrcpy(text, "Time to resume updating my resume.");
149
150 for (i = 0; i < 3; ++ i) {
151 UCollator *coll = ucol_open(LOCALES[i], &status);
152 UCollationElements *pitr = NULL;
153 UCollationElements *titer = NULL;
154
155 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
156
157 pitr = ucol_openElements(coll, pattern, -1, &status);
158 titer = ucol_openElements(coll, text, -1, &status);
159 if (U_FAILURE(status)) {
160 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
161 myErrorName(status));
162 return;
163 }
164
165 log_verbose("locale tested %s\n", LOCALES[i]);
166
167 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
168 U_SUCCESS(status)) {
169 }
170 if (U_FAILURE(status)) {
171 log_err("ERROR: reversing collation iterator :%s\n",
172 myErrorName(status));
173 return;
174 }
175 ucol_reset(pitr);
176
177 ucol_setOffset(titer, u_strlen(pattern), &status);
178 if (U_FAILURE(status)) {
179 log_err("ERROR: setting offset in collator :%s\n",
180 myErrorName(status));
181 return;
182 }
183 result[i][0] = ucol_getOffset(titer);
184 log_verbose("Text iterator set to offset %d\n", result[i][0]);
185
186 /* Use previous() */
187 ucol_previous(titer, &status);
188 result[i][1] = ucol_getOffset(titer);
189 log_verbose("Current offset %d after previous\n", result[i][1]);
190
191 /* Add one to index */
192 log_verbose("Adding one to current offset...\n");
193 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
194 if (U_FAILURE(status)) {
195 log_err("ERROR: setting offset in collator :%s\n",
196 myErrorName(status));
197 return;
198 }
199 result[i][2] = ucol_getOffset(titer);
200 log_verbose("Current offset in text = %d\n", result[i][2]);
201 ucol_closeElements(pitr);
202 ucol_closeElements(titer);
203 ucol_close(coll);
204 }
205
206 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
207 uprv_memcmp(result[1], result[2], 3) != 0) {
208 log_err("ERROR: Different locales have different offsets at the same character\n");
209 }
210 }
211
212
213
214
215 /**
216 * Test for CollationElementIterator previous and next for the whole set of
217 * unicode characters.
218 */
TestUnicodeChar()219 static void TestUnicodeChar()
220 {
221 UChar source[0x100];
222 UCollator *en_us;
223 UCollationElements *iter;
224 UErrorCode status = U_ZERO_ERROR;
225 UChar codepoint;
226
227 UChar *test;
228 en_us = ucol_open("en_US", &status);
229 if (U_FAILURE(status)){
230 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
231 myErrorName(status));
232 return;
233 }
234
235 for (codepoint = 1; codepoint < 0xFFFE;)
236 {
237 test = source;
238
239 while (codepoint % 0xFF != 0)
240 {
241 if (u_isdefined(codepoint))
242 *(test ++) = codepoint;
243 codepoint ++;
244 }
245
246 if (u_isdefined(codepoint))
247 *(test ++) = codepoint;
248
249 if (codepoint != 0xFFFF)
250 codepoint ++;
251
252 *test = 0;
253 iter=ucol_openElements(en_us, source, u_strlen(source), &status);
254 if(U_FAILURE(status)){
255 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
256 myErrorName(status));
257 ucol_close(en_us);
258 return;
259 }
260 /* A basic test to see if it's working at all */
261 log_verbose("codepoint testing %x\n", codepoint);
262 backAndForth(iter);
263 ucol_closeElements(iter);
264
265 /* null termination test */
266 iter=ucol_openElements(en_us, source, -1, &status);
267 if(U_FAILURE(status)){
268 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
269 myErrorName(status));
270 ucol_close(en_us);
271 return;
272 }
273 /* A basic test to see if it's working at all */
274 backAndForth(iter);
275 ucol_closeElements(iter);
276 }
277
278 ucol_close(en_us);
279 }
280
281 /**
282 * Test for CollationElementIterator previous and next for the whole set of
283 * unicode characters with normalization on.
284 */
TestNormalizedUnicodeChar()285 static void TestNormalizedUnicodeChar()
286 {
287 UChar source[0x100];
288 UCollator *th_th;
289 UCollationElements *iter;
290 UErrorCode status = U_ZERO_ERROR;
291 UChar codepoint;
292
293 UChar *test;
294 /* thai should have normalization on */
295 th_th = ucol_open("th_TH", &status);
296 if (U_FAILURE(status)){
297 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
298 myErrorName(status));
299 return;
300 }
301
302 for (codepoint = 1; codepoint < 0xFFFE;)
303 {
304 test = source;
305
306 while (codepoint % 0xFF != 0)
307 {
308 if (u_isdefined(codepoint))
309 *(test ++) = codepoint;
310 codepoint ++;
311 }
312
313 if (u_isdefined(codepoint))
314 *(test ++) = codepoint;
315
316 if (codepoint != 0xFFFF)
317 codepoint ++;
318
319 *test = 0;
320 iter=ucol_openElements(th_th, source, u_strlen(source), &status);
321 if(U_FAILURE(status)){
322 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
323 myErrorName(status));
324 ucol_close(th_th);
325 return;
326 }
327
328 backAndForth(iter);
329 ucol_closeElements(iter);
330
331 iter=ucol_openElements(th_th, source, -1, &status);
332 if(U_FAILURE(status)){
333 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
334 myErrorName(status));
335 ucol_close(th_th);
336 return;
337 }
338
339 backAndForth(iter);
340 ucol_closeElements(iter);
341 }
342
343 ucol_close(th_th);
344 }
345
346 /**
347 * Test the incremental normalization
348 */
TestNormalization()349 static void TestNormalization()
350 {
351 UErrorCode status = U_ZERO_ERROR;
352 const char *str =
353 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
354 UCollator *coll;
355 UChar rule[50];
356 int rulelen = u_unescape(str, rule, 50);
357 int count = 0;
358 const char *testdata[] =
359 {"\\u1ED9", "o\\u0323\\u0302",
360 "\\u0300\\u0315", "\\u0315\\u0300",
361 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
362 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
363 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
364 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
365 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
366 int32_t srclen;
367 UChar source[10];
368 UCollationElements *iter;
369
370 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
371 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
372 if (U_FAILURE(status)){
373 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
374 myErrorName(status));
375 return;
376 }
377
378 srclen = u_unescape(testdata[0], source, 10);
379 iter = ucol_openElements(coll, source, srclen, &status);
380 backAndForth(iter);
381 ucol_closeElements(iter);
382
383 srclen = u_unescape(testdata[1], source, 10);
384 iter = ucol_openElements(coll, source, srclen, &status);
385 backAndForth(iter);
386 ucol_closeElements(iter);
387
388 while (count < 12) {
389 srclen = u_unescape(testdata[count], source, 10);
390 iter = ucol_openElements(coll, source, srclen, &status);
391
392 if (U_FAILURE(status)){
393 log_err("ERROR: in creation of collator element iterator\n %s\n",
394 myErrorName(status));
395 return;
396 }
397 backAndForth(iter);
398 ucol_closeElements(iter);
399
400 iter = ucol_openElements(coll, source, -1, &status);
401
402 if (U_FAILURE(status)){
403 log_err("ERROR: in creation of collator element iterator\n %s\n",
404 myErrorName(status));
405 return;
406 }
407 backAndForth(iter);
408 ucol_closeElements(iter);
409 count ++;
410 }
411 ucol_close(coll);
412 }
413
414 /**
415 * Test for CollationElementIterator.previous()
416 *
417 * @bug 4108758 - Make sure it works with contracting characters
418 *
419 */
TestPrevious()420 static void TestPrevious()
421 {
422 UCollator *coll=NULL;
423 UChar rule[50];
424 UChar *source;
425 UCollator *c1, *c2, *c3;
426 UCollationElements *iter;
427 UErrorCode status = U_ZERO_ERROR;
428 UChar test1[50];
429 UChar test2[50];
430
431 u_uastrcpy(test1, "What subset of all possible test cases?");
432 u_uastrcpy(test2, "has the highest probability of detecting");
433 coll = ucol_open("en_US", &status);
434
435 iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
436 log_verbose("English locale testing back and forth\n");
437 if(U_FAILURE(status)){
438 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
439 myErrorName(status));
440 ucol_close(coll);
441 return;
442 }
443 /* A basic test to see if it's working at all */
444 backAndForth(iter);
445 ucol_closeElements(iter);
446 ucol_close(coll);
447
448 /* Test with a contracting character sequence */
449 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
450 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
451
452 log_verbose("Contraction rule testing back and forth with no normalization\n");
453
454 if (c1 == NULL || U_FAILURE(status))
455 {
456 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
457 myErrorName(status));
458 return;
459 }
460 source=(UChar*)malloc(sizeof(UChar) * 20);
461 u_uastrcpy(source, "abchdcba");
462 iter=ucol_openElements(c1, source, u_strlen(source), &status);
463 if(U_FAILURE(status)){
464 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
465 myErrorName(status));
466 return;
467 }
468 backAndForth(iter);
469 ucol_closeElements(iter);
470 ucol_close(c1);
471
472 /* Test with an expanding character sequence */
473 u_uastrcpy(rule, "&a < b < c/abd < d");
474 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
475 log_verbose("Expansion rule testing back and forth with no normalization\n");
476 if (c2 == NULL || U_FAILURE(status))
477 {
478 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
479 myErrorName(status));
480 return;
481 }
482 u_uastrcpy(source, "abcd");
483 iter=ucol_openElements(c2, source, u_strlen(source), &status);
484 if(U_FAILURE(status)){
485 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
486 myErrorName(status));
487 return;
488 }
489 backAndForth(iter);
490 ucol_closeElements(iter);
491 ucol_close(c2);
492 /* Now try both */
493 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
494 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
495 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
496
497 if (c3 == NULL || U_FAILURE(status))
498 {
499 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
500 myErrorName(status));
501 return;
502 }
503 u_uastrcpy(source, "abcdbchdc");
504 iter=ucol_openElements(c3, source, u_strlen(source), &status);
505 if(U_FAILURE(status)){
506 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
507 myErrorName(status));
508 return;
509 }
510 backAndForth(iter);
511 ucol_closeElements(iter);
512 ucol_close(c3);
513 source[0] = 0x0e41;
514 source[1] = 0x0e02;
515 source[2] = 0x0e41;
516 source[3] = 0x0e02;
517 source[4] = 0x0e27;
518 source[5] = 0x61;
519 source[6] = 0x62;
520 source[7] = 0x63;
521 source[8] = 0;
522
523 coll = ucol_open("th_TH", &status);
524 log_verbose("Thai locale testing back and forth with normalization\n");
525 iter=ucol_openElements(coll, source, u_strlen(source), &status);
526 if(U_FAILURE(status)){
527 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
528 myErrorName(status));
529 return;
530 }
531 backAndForth(iter);
532 ucol_closeElements(iter);
533 ucol_close(coll);
534
535 /* prev test */
536 source[0] = 0x0061;
537 source[1] = 0x30CF;
538 source[2] = 0x3099;
539 source[3] = 0x30FC;
540 source[4] = 0;
541
542 coll = ucol_open("ja_JP", &status);
543 log_verbose("Japanese locale testing back and forth with normalization\n");
544 iter=ucol_openElements(coll, source, u_strlen(source), &status);
545 if(U_FAILURE(status)){
546 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
547 myErrorName(status));
548 return;
549 }
550 backAndForth(iter);
551 ucol_closeElements(iter);
552 ucol_close(coll);
553
554 free(source);
555 }
556
557 /**
558 * Test for getOffset() and setOffset()
559 */
TestOffset()560 static void TestOffset()
561 {
562 UErrorCode status= U_ZERO_ERROR;
563 UCollator *en_us=NULL;
564 UCollationElements *iter, *pristine;
565 int32_t offset;
566 OrderAndOffset *orders;
567 int32_t orderLength=0;
568 int count = 0;
569 UChar test1[50];
570 UChar test2[50];
571
572 u_uastrcpy(test1, "What subset of all possible test cases?");
573 u_uastrcpy(test2, "has the highest probability of detecting");
574 en_us = ucol_open("en_US", &status);
575 log_verbose("Testing getOffset and setOffset for collations\n");
576 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
577 if(U_FAILURE(status)){
578 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
579 myErrorName(status));
580 ucol_close(en_us);
581 return;
582 }
583
584 /* testing boundaries */
585 ucol_setOffset(iter, 0, &status);
586 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
587 log_err("Error: After setting offset to 0, we should be at the end "
588 "of the backwards iteration");
589 }
590 ucol_setOffset(iter, u_strlen(test1), &status);
591 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
592 log_err("Error: After setting offset to end of the string, we should "
593 "be at the end of the backwards iteration");
594 }
595
596 /* Run all the way through the iterator, then get the offset */
597
598 orders = getOrders(iter, &orderLength);
599
600 offset = ucol_getOffset(iter);
601
602 if (offset != u_strlen(test1))
603 {
604 log_err("offset at end != length %d vs %d\n", offset,
605 u_strlen(test1) );
606 }
607
608 /* Now set the offset back to the beginning and see if it works */
609 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
610 if(U_FAILURE(status)){
611 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
612 myErrorName(status));
613 ucol_close(en_us);
614 return;
615 }
616 status = U_ZERO_ERROR;
617
618 ucol_setOffset(iter, 0, &status);
619 if (U_FAILURE(status))
620 {
621 log_err("setOffset failed. %s\n", myErrorName(status));
622 }
623 else
624 {
625 assertEqual(iter, pristine);
626 }
627
628 ucol_closeElements(pristine);
629 ucol_closeElements(iter);
630 free(orders);
631
632 /* testing offsets in normalization buffer */
633 test1[0] = 0x61;
634 test1[1] = 0x300;
635 test1[2] = 0x316;
636 test1[3] = 0x62;
637 test1[4] = 0;
638 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
639 iter = ucol_openElements(en_us, test1, 4, &status);
640 if(U_FAILURE(status)){
641 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
642 myErrorName(status));
643 ucol_close(en_us);
644 return;
645 }
646
647 count = 0;
648 while (ucol_next(iter, &status) != UCOL_NULLORDER &&
649 U_SUCCESS(status)) {
650 switch (count) {
651 case 0:
652 if (ucol_getOffset(iter) != 1) {
653 log_err("ERROR: Offset of iteration should be 1\n");
654 }
655 break;
656 case 3:
657 if (ucol_getOffset(iter) != 4) {
658 log_err("ERROR: Offset of iteration should be 4\n");
659 }
660 break;
661 default:
662 if (ucol_getOffset(iter) != 3) {
663 log_err("ERROR: Offset of iteration should be 3\n");
664 }
665 }
666 count ++;
667 }
668
669 ucol_reset(iter);
670 count = 0;
671 while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
672 U_SUCCESS(status)) {
673 switch (count) {
674 case 0:
675 case 1:
676 if (ucol_getOffset(iter) != 3) {
677 log_err("ERROR: Offset of iteration should be 3\n");
678 }
679 break;
680 case 2:
681 if (ucol_getOffset(iter) != 1) {
682 log_err("ERROR: Offset of iteration should be 1\n");
683 }
684 break;
685 default:
686 if (ucol_getOffset(iter) != 0) {
687 log_err("ERROR: Offset of iteration should be 0\n");
688 }
689 }
690 count ++;
691 }
692
693 if(U_FAILURE(status)){
694 log_err("ERROR: in iterating collation elements %s\n",
695 myErrorName(status));
696 }
697
698 ucol_closeElements(iter);
699 ucol_close(en_us);
700 }
701
702 /**
703 * Test for setText()
704 */
TestSetText()705 static void TestSetText()
706 {
707 int32_t c,i;
708 UErrorCode status = U_ZERO_ERROR;
709 UCollator *en_us=NULL;
710 UCollationElements *iter1, *iter2;
711 UChar test1[50];
712 UChar test2[50];
713
714 u_uastrcpy(test1, "What subset of all possible test cases?");
715 u_uastrcpy(test2, "has the highest probability of detecting");
716 en_us = ucol_open("en_US", &status);
717 log_verbose("testing setText for Collation elements\n");
718 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
719 if(U_FAILURE(status)){
720 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
721 myErrorName(status));
722 ucol_close(en_us);
723 return;
724 }
725 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
726 if(U_FAILURE(status)){
727 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
728 myErrorName(status));
729 ucol_close(en_us);
730 return;
731 }
732
733 /* Run through the second iterator just to exercise it */
734 c = ucol_next(iter2, &status);
735 i = 0;
736
737 while ( ++i < 10 && (c != UCOL_NULLORDER))
738 {
739 if (U_FAILURE(status))
740 {
741 log_err("iter2->next() returned an error. %s\n", myErrorName(status));
742 ucol_closeElements(iter2);
743 ucol_closeElements(iter1);
744 ucol_close(en_us);
745 return;
746 }
747
748 c = ucol_next(iter2, &status);
749 }
750
751 /* Now set it to point to the same string as the first iterator */
752 ucol_setText(iter2, test1, u_strlen(test1), &status);
753 if (U_FAILURE(status))
754 {
755 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
756 }
757 else
758 {
759 assertEqual(iter1, iter2);
760 }
761
762 /* Now set it to point to a null string with fake length*/
763 ucol_setText(iter2, NULL, 2, &status);
764 if (status != U_ILLEGAL_ARGUMENT_ERROR)
765 {
766 log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
767 myErrorName(status));
768 }
769
770 ucol_closeElements(iter2);
771 ucol_closeElements(iter1);
772 ucol_close(en_us);
773 }
774
775 /** @bug 4108762
776 * Test for getMaxExpansion()
777 */
TestMaxExpansion()778 static void TestMaxExpansion()
779 {
780 UErrorCode status = U_ZERO_ERROR;
781 UCollator *coll ;/*= ucol_open("en_US", &status);*/
782 UChar ch = 0;
783 UChar32 unassigned = 0xEFFFD;
784 UChar supplementary[2];
785 uint32_t stringOffset = 0;
786 UBool isError = FALSE;
787 uint32_t sorder = 0;
788 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
789 uint32_t temporder = 0;
790
791 UChar rule[256];
792 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
793 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
794 UCOL_DEFAULT_STRENGTH,NULL, &status);
795 if(U_SUCCESS(status) && coll) {
796 iter = ucol_openElements(coll, &ch, 1, &status);
797
798 while (ch < 0xFFFF && U_SUCCESS(status)) {
799 int count = 1;
800 uint32_t order;
801 int32_t size = 0;
802
803 ch ++;
804
805 ucol_setText(iter, &ch, 1, &status);
806 order = ucol_previous(iter, &status);
807
808 /* thai management */
809 if (order == 0)
810 order = ucol_previous(iter, &status);
811
812 while (U_SUCCESS(status) &&
813 ucol_previous(iter, &status) != UCOL_NULLORDER) {
814 count ++;
815 }
816
817 size = ucol_getMaxExpansion(iter, order);
818 if (U_FAILURE(status) || size < count) {
819 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
820 ch, count);
821 }
822 }
823
824 /* testing for exact max expansion */
825 ch = 0;
826 while (ch < 0x61) {
827 uint32_t order;
828 int32_t size;
829 ucol_setText(iter, &ch, 1, &status);
830 order = ucol_previous(iter, &status);
831 size = ucol_getMaxExpansion(iter, order);
832 if (U_FAILURE(status) || size != 1) {
833 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
834 ch, 1);
835 }
836 ch ++;
837 }
838
839 ch = 0x63;
840 ucol_setText(iter, &ch, 1, &status);
841 temporder = ucol_previous(iter, &status);
842
843 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
844 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
845 ch, 3);
846 }
847
848 ch = 0x64;
849 ucol_setText(iter, &ch, 1, &status);
850 temporder = ucol_previous(iter, &status);
851
852 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
853 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
854 ch, 3);
855 }
856
857 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
858 (void)isError; /* Suppress set but not used warning. */
859 ucol_setText(iter, supplementary, 2, &status);
860 sorder = ucol_previous(iter, &status);
861
862 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
863 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
864 ch, 2);
865 }
866
867 /* testing jamo */
868 ch = 0x1165;
869
870 ucol_setText(iter, &ch, 1, &status);
871 temporder = ucol_previous(iter, &status);
872 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
873 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
874 ch, 3);
875 }
876
877 ucol_closeElements(iter);
878 ucol_close(coll);
879
880 /* testing special jamo &a<\u1160 */
881 rule[0] = 0x26;
882 rule[1] = 0x71;
883 rule[2] = 0x3c;
884 rule[3] = 0x1165;
885 rule[4] = 0x2f;
886 rule[5] = 0x71;
887 rule[6] = 0x71;
888 rule[7] = 0x71;
889 rule[8] = 0x71;
890 rule[9] = 0;
891
892 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
893 UCOL_DEFAULT_STRENGTH,NULL, &status);
894 iter = ucol_openElements(coll, &ch, 1, &status);
895
896 temporder = ucol_previous(iter, &status);
897 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
898 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
899 ch, 5);
900 }
901
902 ucol_closeElements(iter);
903 ucol_close(coll);
904 } else {
905 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
906 }
907
908 }
909
910
assertEqual(UCollationElements * i1,UCollationElements * i2)911 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
912 {
913 int32_t c1, c2;
914 int32_t count = 0;
915 UErrorCode status = U_ZERO_ERROR;
916
917 do
918 {
919 c1 = ucol_next(i1, &status);
920 c2 = ucol_next(i2, &status);
921
922 if (c1 != c2)
923 {
924 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
925 break;
926 }
927
928 count += 1;
929 }
930 while (c1 != UCOL_NULLORDER);
931 }
932
933 /**
934 * Testing iterators with extremely small buffers
935 */
TestSmallBuffer()936 static void TestSmallBuffer()
937 {
938 UErrorCode status = U_ZERO_ERROR;
939 UCollator *coll;
940 UCollationElements *testiter,
941 *iter;
942 int32_t count = 0;
943 OrderAndOffset *testorders,
944 *orders;
945
946 UChar teststr[500];
947 UChar str[] = {0x300, 0x31A, 0};
948 /*
949 creating a long string of decomposable characters,
950 since by default the writable buffer is of size 256
951 */
952 while (count < 500) {
953 if ((count & 1) == 0) {
954 teststr[count ++] = 0x300;
955 }
956 else {
957 teststr[count ++] = 0x31A;
958 }
959 }
960
961 coll = ucol_open("th_TH", &status);
962 if(U_SUCCESS(status) && coll) {
963 testiter = ucol_openElements(coll, teststr, 500, &status);
964 iter = ucol_openElements(coll, str, 2, &status);
965
966 orders = getOrders(iter, &count);
967 if (count != 2) {
968 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
969 }
970
971 /*
972 this will rearrange the string data to 250 characters of 0x300 first then
973 250 characters of 0x031A
974 */
975 testorders = getOrders(testiter, &count);
976
977 if (count != 500) {
978 log_err("Error decomposition does not give the right sized collation elements\n");
979 }
980
981 while (count != 0) {
982 /* UCA collation element for 0x0F76 */
983 if ((count > 250 && testorders[-- count].order != orders[1].order) ||
984 (count <= 250 && testorders[-- count].order != orders[0].order)) {
985 log_err("Error decomposition does not give the right collation element at %d count\n", count);
986 break;
987 }
988 }
989
990 free(testorders);
991 free(orders);
992
993 ucol_reset(testiter);
994
995 /* ensures closing of elements done properly to clear writable buffer */
996 ucol_next(testiter, &status);
997 ucol_next(testiter, &status);
998 ucol_closeElements(testiter);
999 ucol_closeElements(iter);
1000 ucol_close(coll);
1001 } else {
1002 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1003 }
1004 }
1005
1006 /**
1007 * Testing the discontigous contractions
1008 */
TestDiscontiguos()1009 static void TestDiscontiguos() {
1010 const char *rulestr =
1011 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1012 UChar rule[50];
1013 int rulelen = u_unescape(rulestr, rule, 50);
1014 const char *src[] = {
1015 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1016 /* base character blocked */
1017 "XD\\u0300", "XD\\u0300\\u0315",
1018 /* non blocking combining character */
1019 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1020 /* blocking combining character */
1021 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1022 /* contraction prefix */
1023 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1024 "X\\u0300\\u031A\\u0315",
1025 /* ends not with a contraction character */
1026 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1027 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1028 };
1029 const char *tgt[] = {
1030 /* non blocking combining character */
1031 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1032 /* base character blocked */
1033 "X D \\u0300", "X D \\u0300\\u0315",
1034 /* non blocking combining character */
1035 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1036 /* blocking combining character */
1037 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1038 /* contraction prefix */
1039 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1040 "X\\u0300 \\u031A \\u0315",
1041 /* ends not with a contraction character */
1042 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1043 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1044 };
1045 int size = 20;
1046 UCollator *coll;
1047 UErrorCode status = U_ZERO_ERROR;
1048 int count = 0;
1049 UCollationElements *iter;
1050 UCollationElements *resultiter;
1051
1052 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1053 iter = ucol_openElements(coll, rule, 1, &status);
1054 resultiter = ucol_openElements(coll, rule, 1, &status);
1055
1056 if (U_FAILURE(status)) {
1057 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1058 return;
1059 }
1060
1061 while (count < size) {
1062 UChar str[20];
1063 UChar tstr[20];
1064 int strLen = u_unescape(src[count], str, 20);
1065 UChar *s;
1066
1067 ucol_setText(iter, str, strLen, &status);
1068 if (U_FAILURE(status)) {
1069 log_err("Error opening collation iterator\n");
1070 return;
1071 }
1072
1073 u_unescape(tgt[count], tstr, 20);
1074 s = tstr;
1075
1076 log_verbose("count %d\n", count);
1077
1078 for (;;) {
1079 uint32_t ce;
1080 UChar *e = u_strchr(s, 0x20);
1081 if (e == 0) {
1082 e = u_strchr(s, 0);
1083 }
1084 ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1085 ce = ucol_next(resultiter, &status);
1086 if (U_FAILURE(status)) {
1087 log_err("Error manipulating collation iterator\n");
1088 return;
1089 }
1090 while (ce != UCOL_NULLORDER) {
1091 if (ce != (uint32_t)ucol_next(iter, &status) ||
1092 U_FAILURE(status)) {
1093 log_err("Discontiguos contraction test mismatch\n");
1094 return;
1095 }
1096 ce = ucol_next(resultiter, &status);
1097 if (U_FAILURE(status)) {
1098 log_err("Error getting next collation element\n");
1099 return;
1100 }
1101 }
1102 s = e + 1;
1103 if (*e == 0) {
1104 break;
1105 }
1106 }
1107 ucol_reset(iter);
1108 backAndForth(iter);
1109 count ++;
1110 }
1111 ucol_closeElements(resultiter);
1112 ucol_closeElements(iter);
1113 ucol_close(coll);
1114 }
1115
1116 /**
1117 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1118 * normalization on AND jamo tailoring, among other things.
1119 *
1120 * Note: This test is sensitive to changes of the root collator,
1121 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1122 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1123 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1124 * For example, the DUCET's artificial secondary CE in the ae-ligature
1125 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1126 */
1127 static const UChar tsceText[] = { /* Nothing in here should be ignorable */
1128 0x0020, 0xAC00, /* simple LV Hangul */
1129 0x0020, 0xAC01, /* simple LVT Hangul */
1130 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
1131 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
1132 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1133 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1134 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1135 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1136 0x0020, 0x00E6, /* small letter ae, expands */
1137 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
1138 0x0020
1139 };
1140 enum { kLen_tsceText = UPRV_LENGTHOF(tsceText) };
1141
1142 static const int32_t rootStandardOffsets[] = {
1143 0, 1,2,
1144 2, 3,4,4,
1145 4, 5,6,6,
1146 6, 7,8,8,
1147 8, 9,10,11,
1148 12, 13,14,15,
1149 16, 17,18,19,
1150 20, 21,22,23,
1151 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1152 26, 27,28,28,
1153 28,
1154 29
1155 };
1156 enum { kLen_rootStandardOffsets = UPRV_LENGTHOF(rootStandardOffsets) };
1157
1158 static const int32_t rootSearchOffsets[] = {
1159 0, 1,2,
1160 2, 3,4,4,
1161 4, 5,6,6,6,
1162 6, 7,8,8,8,8,8,8,
1163 8, 9,10,11,
1164 12, 13,14,15,
1165 16, 17,18,19,20,
1166 20, 21,22,22,23,23,23,24,
1167 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1168 26, 27,28,28,
1169 28,
1170 29
1171 };
1172 enum { kLen_rootSearchOffsets = UPRV_LENGTHOF(rootSearchOffsets) };
1173
1174 typedef struct {
1175 const char * locale;
1176 const int32_t * offsets;
1177 int32_t offsetsLen;
1178 } TSCEItem;
1179
1180 static const TSCEItem tsceItems[] = {
1181 { "root", rootStandardOffsets, kLen_rootStandardOffsets },
1182 { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },
1183 { NULL, NULL, 0 }
1184 };
1185
TestSearchCollatorElements(void)1186 static void TestSearchCollatorElements(void)
1187 {
1188 const TSCEItem * tsceItemPtr;
1189 for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
1190 UErrorCode status = U_ZERO_ERROR;
1191 UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
1192 if ( U_SUCCESS(status) ) {
1193 UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
1194 if ( U_SUCCESS(status) ) {
1195 int32_t offset, element;
1196 const int32_t * nextOffsetPtr;
1197 const int32_t * limitOffsetPtr;
1198
1199 nextOffsetPtr = tsceItemPtr->offsets;
1200 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1201 do {
1202 offset = ucol_getOffset(uce);
1203 element = ucol_next(uce, &status);
1204 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->locale, offset, element);
1205 if ( element == 0 ) {
1206 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
1207 }
1208 if ( nextOffsetPtr < limitOffsetPtr ) {
1209 if (offset != *nextOffsetPtr) {
1210 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1211 tsceItemPtr->locale, *nextOffsetPtr, offset );
1212 nextOffsetPtr = limitOffsetPtr;
1213 break;
1214 }
1215 nextOffsetPtr++;
1216 } else {
1217 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
1218 }
1219 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1220 if ( nextOffsetPtr < limitOffsetPtr ) {
1221 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
1222 }
1223
1224 ucol_setOffset(uce, kLen_tsceText, &status);
1225 status = U_ZERO_ERROR;
1226 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1227 limitOffsetPtr = tsceItemPtr->offsets;
1228 do {
1229 offset = ucol_getOffset(uce);
1230 element = ucol_previous(uce, &status);
1231 if ( element == 0 ) {
1232 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
1233 }
1234 if ( nextOffsetPtr > limitOffsetPtr ) {
1235 nextOffsetPtr--;
1236 if (offset != *nextOffsetPtr) {
1237 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1238 tsceItemPtr->locale, *nextOffsetPtr, offset );
1239 nextOffsetPtr = limitOffsetPtr;
1240 break;
1241 }
1242 } else {
1243 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
1244 }
1245 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1246 if ( nextOffsetPtr > limitOffsetPtr ) {
1247 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
1248 }
1249
1250 ucol_closeElements(uce);
1251 } else {
1252 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1253 }
1254 ucol_close(ucol);
1255 } else {
1256 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1257 }
1258 }
1259 }
1260
1261 #endif /* #if !UCONFIG_NO_COLLATION */
1262