1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File CITERTST.C
11 *
12 * Modification History:
13 * Date Name Description
14 * Madhu Katragadda Ported for C API
15 * 02/19/01 synwee Modified test case for new collation iterator
16 *********************************************************************************/
17 /*
18 * Collation Iterator tests.
19 * (Let me reiterate my position...)
20 */
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_COLLATION
25
26 #include "unicode/ucol.h"
27 #include "unicode/ucoleitr.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utf16.h"
32 #include "unicode/putil.h"
33 #include "callcoll.h"
34 #include "cmemory.h"
35 #include "cintltst.h"
36 #include "citertst.h"
37 #include "ccolltst.h"
38 #include "filestrm.h"
39 #include "cstring.h"
40 #include "ucol_imp.h"
41 #include "uparse.h"
42 #include <stdbool.h>
43 #include <stdio.h>
44
45 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
46
addCollIterTest(TestNode ** root)47 void addCollIterTest(TestNode** root)
48 {
49 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
50 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
51 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
52 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
53 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
54 addTest(root, &TestNormalizedUnicodeChar,
55 "tscoll/citertst/TestNormalizedUnicodeChar");
56 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
57 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
58 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
59 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
60 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
61 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
62 }
63
64 /* The locales we support */
65
66 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
67
TestBug672()68 static void TestBug672() {
69 UErrorCode status = U_ZERO_ERROR;
70 UChar pattern[20];
71 UChar text[50];
72 int i;
73 int result[3][3];
74
75 u_uastrcpy(pattern, "resume");
76 u_uastrcpy(text, "Time to resume updating my resume.");
77
78 for (i = 0; i < 3; ++ i) {
79 UCollator *coll = ucol_open(LOCALES[i], &status);
80 UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
81 &status);
82 UCollationElements *titer = ucol_openElements(coll, text, -1,
83 &status);
84 if (U_FAILURE(status)) {
85 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
86 myErrorName(status));
87 return;
88 }
89
90 log_verbose("locale tested %s\n", LOCALES[i]);
91
92 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
93 U_SUCCESS(status)) {
94 }
95 if (U_FAILURE(status)) {
96 log_err("ERROR: reversing collation iterator :%s\n",
97 myErrorName(status));
98 return;
99 }
100 ucol_reset(pitr);
101
102 ucol_setOffset(titer, u_strlen(pattern), &status);
103 if (U_FAILURE(status)) {
104 log_err("ERROR: setting offset in collator :%s\n",
105 myErrorName(status));
106 return;
107 }
108 result[i][0] = ucol_getOffset(titer);
109 log_verbose("Text iterator set to offset %d\n", result[i][0]);
110
111 /* Use previous() */
112 ucol_previous(titer, &status);
113 result[i][1] = ucol_getOffset(titer);
114 log_verbose("Current offset %d after previous\n", result[i][1]);
115
116 /* Add one to index */
117 log_verbose("Adding one to current offset...\n");
118 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
119 if (U_FAILURE(status)) {
120 log_err("ERROR: setting offset in collator :%s\n",
121 myErrorName(status));
122 return;
123 }
124 result[i][2] = ucol_getOffset(titer);
125 log_verbose("Current offset in text = %d\n", result[i][2]);
126 ucol_closeElements(pitr);
127 ucol_closeElements(titer);
128 ucol_close(coll);
129 }
130
131 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
132 uprv_memcmp(result[1], result[2], 3) != 0) {
133 log_err("ERROR: Different locales have different offsets at the same character\n");
134 }
135 }
136
137
138
139 /* Running this test with normalization enabled showed up a bug in the incremental
140 normalization code. */
TestBug672Normalize()141 static void TestBug672Normalize() {
142 UErrorCode status = U_ZERO_ERROR;
143 UChar pattern[20];
144 UChar text[50];
145 int i;
146 int result[3][3];
147
148 u_uastrcpy(pattern, "resume");
149 u_uastrcpy(text, "Time to resume updating my resume.");
150
151 for (i = 0; i < 3; ++ i) {
152 UCollator *coll = ucol_open(LOCALES[i], &status);
153 UCollationElements *pitr = NULL;
154 UCollationElements *titer = NULL;
155
156 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
157
158 pitr = ucol_openElements(coll, pattern, -1, &status);
159 titer = ucol_openElements(coll, text, -1, &status);
160 if (U_FAILURE(status)) {
161 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
162 myErrorName(status));
163 return;
164 }
165
166 log_verbose("locale tested %s\n", LOCALES[i]);
167
168 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
169 U_SUCCESS(status)) {
170 }
171 if (U_FAILURE(status)) {
172 log_err("ERROR: reversing collation iterator :%s\n",
173 myErrorName(status));
174 return;
175 }
176 ucol_reset(pitr);
177
178 ucol_setOffset(titer, u_strlen(pattern), &status);
179 if (U_FAILURE(status)) {
180 log_err("ERROR: setting offset in collator :%s\n",
181 myErrorName(status));
182 return;
183 }
184 result[i][0] = ucol_getOffset(titer);
185 log_verbose("Text iterator set to offset %d\n", result[i][0]);
186
187 /* Use previous() */
188 ucol_previous(titer, &status);
189 result[i][1] = ucol_getOffset(titer);
190 log_verbose("Current offset %d after previous\n", result[i][1]);
191
192 /* Add one to index */
193 log_verbose("Adding one to current offset...\n");
194 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
195 if (U_FAILURE(status)) {
196 log_err("ERROR: setting offset in collator :%s\n",
197 myErrorName(status));
198 return;
199 }
200 result[i][2] = ucol_getOffset(titer);
201 log_verbose("Current offset in text = %d\n", result[i][2]);
202 ucol_closeElements(pitr);
203 ucol_closeElements(titer);
204 ucol_close(coll);
205 }
206
207 if (uprv_memcmp(result[0], result[1], 3) != 0 ||
208 uprv_memcmp(result[1], result[2], 3) != 0) {
209 log_err("ERROR: Different locales have different offsets at the same character\n");
210 }
211 }
212
213
214
215
216 /**
217 * Test for CollationElementIterator previous and next for the whole set of
218 * unicode characters.
219 */
TestUnicodeChar()220 static void TestUnicodeChar()
221 {
222 UChar source[0x100];
223 UCollator *en_us;
224 UCollationElements *iter;
225 UErrorCode status = U_ZERO_ERROR;
226 UChar codepoint;
227
228 UChar *test;
229 en_us = ucol_open("en_US", &status);
230 if (U_FAILURE(status)){
231 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
232 myErrorName(status));
233 return;
234 }
235
236 for (codepoint = 1; codepoint < 0xFFFE;)
237 {
238 test = source;
239
240 while (codepoint % 0xFF != 0)
241 {
242 if (u_isdefined(codepoint))
243 *(test ++) = codepoint;
244 codepoint ++;
245 }
246
247 if (u_isdefined(codepoint))
248 *(test ++) = codepoint;
249
250 if (codepoint != 0xFFFF)
251 codepoint ++;
252
253 *test = 0;
254 iter=ucol_openElements(en_us, source, u_strlen(source), &status);
255 if(U_FAILURE(status)){
256 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
257 myErrorName(status));
258 ucol_close(en_us);
259 return;
260 }
261 /* A basic test to see if it's working at all */
262 log_verbose("codepoint testing %x\n", codepoint);
263 backAndForth(iter);
264 ucol_closeElements(iter);
265
266 /* null termination test */
267 iter=ucol_openElements(en_us, source, -1, &status);
268 if(U_FAILURE(status)){
269 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
270 myErrorName(status));
271 ucol_close(en_us);
272 return;
273 }
274 /* A basic test to see if it's working at all */
275 backAndForth(iter);
276 ucol_closeElements(iter);
277 }
278
279 ucol_close(en_us);
280 }
281
282 /**
283 * Test for CollationElementIterator previous and next for the whole set of
284 * unicode characters with normalization on.
285 */
TestNormalizedUnicodeChar()286 static void TestNormalizedUnicodeChar()
287 {
288 UChar source[0x100];
289 UCollator *th_th;
290 UCollationElements *iter;
291 UErrorCode status = U_ZERO_ERROR;
292 UChar codepoint;
293
294 UChar *test;
295 /* thai should have normalization on */
296 th_th = ucol_open("th_TH", &status);
297 if (U_FAILURE(status)){
298 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
299 myErrorName(status));
300 return;
301 }
302
303 for (codepoint = 1; codepoint < 0xFFFE;)
304 {
305 test = source;
306
307 while (codepoint % 0xFF != 0)
308 {
309 if (u_isdefined(codepoint))
310 *(test ++) = codepoint;
311 codepoint ++;
312 }
313
314 if (u_isdefined(codepoint))
315 *(test ++) = codepoint;
316
317 if (codepoint != 0xFFFF)
318 codepoint ++;
319
320 *test = 0;
321 iter=ucol_openElements(th_th, source, u_strlen(source), &status);
322 if(U_FAILURE(status)){
323 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
324 myErrorName(status));
325 ucol_close(th_th);
326 return;
327 }
328
329 backAndForth(iter);
330 ucol_closeElements(iter);
331
332 iter=ucol_openElements(th_th, source, -1, &status);
333 if(U_FAILURE(status)){
334 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
335 myErrorName(status));
336 ucol_close(th_th);
337 return;
338 }
339
340 backAndForth(iter);
341 ucol_closeElements(iter);
342 }
343
344 ucol_close(th_th);
345 }
346
347 /**
348 * Test the incremental normalization
349 */
TestNormalization()350 static void TestNormalization()
351 {
352 UErrorCode status = U_ZERO_ERROR;
353 const char *str =
354 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
355 UCollator *coll;
356 UChar rule[50];
357 int rulelen = u_unescape(str, rule, 50);
358 int count = 0;
359 const char *testdata[] =
360 {"\\u1ED9", "o\\u0323\\u0302",
361 "\\u0300\\u0315", "\\u0315\\u0300",
362 "A\\u0300\\u0315B", "A\\u0315\\u0300B",
363 "A\\u0316\\u0315B", "A\\u0315\\u0316B",
364 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
365 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
366 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
367 int32_t srclen;
368 UChar source[10];
369 UCollationElements *iter;
370
371 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
372 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
373 if (U_FAILURE(status)){
374 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
375 myErrorName(status));
376 return;
377 }
378
379 srclen = u_unescape(testdata[0], source, 10);
380 iter = ucol_openElements(coll, source, srclen, &status);
381 backAndForth(iter);
382 ucol_closeElements(iter);
383
384 srclen = u_unescape(testdata[1], source, 10);
385 iter = ucol_openElements(coll, source, srclen, &status);
386 backAndForth(iter);
387 ucol_closeElements(iter);
388
389 while (count < 12) {
390 srclen = u_unescape(testdata[count], source, 10);
391 iter = ucol_openElements(coll, source, srclen, &status);
392
393 if (U_FAILURE(status)){
394 log_err("ERROR: in creation of collator element iterator\n %s\n",
395 myErrorName(status));
396 return;
397 }
398 backAndForth(iter);
399 ucol_closeElements(iter);
400
401 iter = ucol_openElements(coll, source, -1, &status);
402
403 if (U_FAILURE(status)){
404 log_err("ERROR: in creation of collator element iterator\n %s\n",
405 myErrorName(status));
406 return;
407 }
408 backAndForth(iter);
409 ucol_closeElements(iter);
410 count ++;
411 }
412 ucol_close(coll);
413 }
414
415 /**
416 * Test for CollationElementIterator.previous()
417 *
418 * @bug 4108758 - Make sure it works with contracting characters
419 *
420 */
TestPrevious()421 static void TestPrevious()
422 {
423 UCollator *coll=NULL;
424 UChar rule[50];
425 UChar *source;
426 UCollator *c1, *c2, *c3;
427 UCollationElements *iter;
428 UErrorCode status = U_ZERO_ERROR;
429 UChar test1[50];
430 UChar test2[50];
431
432 u_uastrcpy(test1, "What subset of all possible test cases?");
433 u_uastrcpy(test2, "has the highest probability of detecting");
434 coll = ucol_open("en_US", &status);
435
436 iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
437 log_verbose("English locale testing back and forth\n");
438 if(U_FAILURE(status)){
439 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
440 myErrorName(status));
441 ucol_close(coll);
442 return;
443 }
444 /* A basic test to see if it's working at all */
445 backAndForth(iter);
446 ucol_closeElements(iter);
447 ucol_close(coll);
448
449 /* Test with a contracting character sequence */
450 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
451 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
452
453 log_verbose("Contraction rule testing back and forth with no normalization\n");
454
455 if (c1 == NULL || U_FAILURE(status))
456 {
457 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
458 myErrorName(status));
459 return;
460 }
461 source=(UChar*)malloc(sizeof(UChar) * 20);
462 u_uastrcpy(source, "abchdcba");
463 iter=ucol_openElements(c1, source, u_strlen(source), &status);
464 if(U_FAILURE(status)){
465 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
466 myErrorName(status));
467 return;
468 }
469 backAndForth(iter);
470 ucol_closeElements(iter);
471 ucol_close(c1);
472
473 /* Test with an expanding character sequence */
474 u_uastrcpy(rule, "&a < b < c/abd < d");
475 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
476 log_verbose("Expansion rule testing back and forth with no normalization\n");
477 if (c2 == NULL || U_FAILURE(status))
478 {
479 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
480 myErrorName(status));
481 return;
482 }
483 u_uastrcpy(source, "abcd");
484 iter=ucol_openElements(c2, source, u_strlen(source), &status);
485 if(U_FAILURE(status)){
486 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
487 myErrorName(status));
488 return;
489 }
490 backAndForth(iter);
491 ucol_closeElements(iter);
492 ucol_close(c2);
493 /* Now try both */
494 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
495 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
496 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
497
498 if (c3 == NULL || U_FAILURE(status))
499 {
500 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
501 myErrorName(status));
502 return;
503 }
504 u_uastrcpy(source, "abcdbchdc");
505 iter=ucol_openElements(c3, source, u_strlen(source), &status);
506 if(U_FAILURE(status)){
507 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
508 myErrorName(status));
509 return;
510 }
511 backAndForth(iter);
512 ucol_closeElements(iter);
513 ucol_close(c3);
514 source[0] = 0x0e41;
515 source[1] = 0x0e02;
516 source[2] = 0x0e41;
517 source[3] = 0x0e02;
518 source[4] = 0x0e27;
519 source[5] = 0x61;
520 source[6] = 0x62;
521 source[7] = 0x63;
522 source[8] = 0;
523
524 coll = ucol_open("th_TH", &status);
525 log_verbose("Thai locale testing back and forth with normalization\n");
526 iter=ucol_openElements(coll, source, u_strlen(source), &status);
527 if(U_FAILURE(status)){
528 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
529 myErrorName(status));
530 return;
531 }
532 backAndForth(iter);
533 ucol_closeElements(iter);
534 ucol_close(coll);
535
536 /* prev test */
537 source[0] = 0x0061;
538 source[1] = 0x30CF;
539 source[2] = 0x3099;
540 source[3] = 0x30FC;
541 source[4] = 0;
542
543 coll = ucol_open("ja_JP", &status);
544 log_verbose("Japanese locale testing back and forth with normalization\n");
545 iter=ucol_openElements(coll, source, u_strlen(source), &status);
546 if(U_FAILURE(status)){
547 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
548 myErrorName(status));
549 return;
550 }
551 backAndForth(iter);
552 ucol_closeElements(iter);
553 ucol_close(coll);
554
555 free(source);
556 }
557
558 /**
559 * Test for getOffset() and setOffset()
560 */
TestOffset()561 static void TestOffset()
562 {
563 UErrorCode status= U_ZERO_ERROR;
564 UCollator *en_us=NULL;
565 UCollationElements *iter, *pristine;
566 int32_t offset;
567 OrderAndOffset *orders;
568 int32_t orderLength=0;
569 int count = 0;
570 UChar test1[50];
571 UChar test2[50];
572
573 u_uastrcpy(test1, "What subset of all possible test cases?");
574 u_uastrcpy(test2, "has the highest probability of detecting");
575 en_us = ucol_open("en_US", &status);
576 log_verbose("Testing getOffset and setOffset for collations\n");
577 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
578 if(U_FAILURE(status)){
579 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
580 myErrorName(status));
581 ucol_close(en_us);
582 return;
583 }
584
585 /* testing boundaries */
586 ucol_setOffset(iter, 0, &status);
587 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
588 log_err("Error: After setting offset to 0, we should be at the end "
589 "of the backwards iteration");
590 }
591 ucol_setOffset(iter, u_strlen(test1), &status);
592 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
593 log_err("Error: After setting offset to end of the string, we should "
594 "be at the end of the backwards iteration");
595 }
596
597 /* Run all the way through the iterator, then get the offset */
598
599 orders = getOrders(iter, &orderLength);
600
601 offset = ucol_getOffset(iter);
602
603 if (offset != u_strlen(test1))
604 {
605 log_err("offset at end != length %d vs %d\n", offset,
606 u_strlen(test1) );
607 }
608
609 /* Now set the offset back to the beginning and see if it works */
610 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
611 if(U_FAILURE(status)){
612 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
613 myErrorName(status));
614 ucol_close(en_us);
615 return;
616 }
617 status = U_ZERO_ERROR;
618
619 ucol_setOffset(iter, 0, &status);
620 if (U_FAILURE(status))
621 {
622 log_err("setOffset failed. %s\n", myErrorName(status));
623 }
624 else
625 {
626 assertEqual(iter, pristine);
627 }
628
629 ucol_closeElements(pristine);
630 ucol_closeElements(iter);
631 free(orders);
632
633 /* testing offsets in normalization buffer */
634 test1[0] = 0x61;
635 test1[1] = 0x300;
636 test1[2] = 0x316;
637 test1[3] = 0x62;
638 test1[4] = 0;
639 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
640 iter = ucol_openElements(en_us, test1, 4, &status);
641 if(U_FAILURE(status)){
642 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
643 myErrorName(status));
644 ucol_close(en_us);
645 return;
646 }
647
648 count = 0;
649 while (ucol_next(iter, &status) != UCOL_NULLORDER &&
650 U_SUCCESS(status)) {
651 switch (count) {
652 case 0:
653 if (ucol_getOffset(iter) != 1) {
654 log_err("ERROR: Offset of iteration should be 1\n");
655 }
656 break;
657 case 3:
658 if (ucol_getOffset(iter) != 4) {
659 log_err("ERROR: Offset of iteration should be 4\n");
660 }
661 break;
662 default:
663 if (ucol_getOffset(iter) != 3) {
664 log_err("ERROR: Offset of iteration should be 3\n");
665 }
666 }
667 count ++;
668 }
669
670 ucol_reset(iter);
671 count = 0;
672 while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
673 U_SUCCESS(status)) {
674 switch (count) {
675 case 0:
676 case 1:
677 if (ucol_getOffset(iter) != 3) {
678 log_err("ERROR: Offset of iteration should be 3\n");
679 }
680 break;
681 case 2:
682 if (ucol_getOffset(iter) != 1) {
683 log_err("ERROR: Offset of iteration should be 1\n");
684 }
685 break;
686 default:
687 if (ucol_getOffset(iter) != 0) {
688 log_err("ERROR: Offset of iteration should be 0\n");
689 }
690 }
691 count ++;
692 }
693
694 if(U_FAILURE(status)){
695 log_err("ERROR: in iterating collation elements %s\n",
696 myErrorName(status));
697 }
698
699 ucol_closeElements(iter);
700 ucol_close(en_us);
701 }
702
703 /**
704 * Test for setText()
705 */
TestSetText()706 static void TestSetText()
707 {
708 int32_t c,i;
709 UErrorCode status = U_ZERO_ERROR;
710 UCollator *en_us=NULL;
711 UCollationElements *iter1, *iter2;
712 UChar test1[50];
713 UChar test2[50];
714
715 u_uastrcpy(test1, "What subset of all possible test cases?");
716 u_uastrcpy(test2, "has the highest probability of detecting");
717 en_us = ucol_open("en_US", &status);
718 log_verbose("testing setText for Collation elements\n");
719 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
720 if(U_FAILURE(status)){
721 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
722 myErrorName(status));
723 ucol_close(en_us);
724 return;
725 }
726 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
727 if(U_FAILURE(status)){
728 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
729 myErrorName(status));
730 ucol_close(en_us);
731 return;
732 }
733
734 /* Run through the second iterator just to exercise it */
735 c = ucol_next(iter2, &status);
736 i = 0;
737
738 while ( ++i < 10 && (c != UCOL_NULLORDER))
739 {
740 if (U_FAILURE(status))
741 {
742 log_err("iter2->next() returned an error. %s\n", myErrorName(status));
743 ucol_closeElements(iter2);
744 ucol_closeElements(iter1);
745 ucol_close(en_us);
746 return;
747 }
748
749 c = ucol_next(iter2, &status);
750 }
751
752 /* Now set it to point to the same string as the first iterator */
753 ucol_setText(iter2, test1, u_strlen(test1), &status);
754 if (U_FAILURE(status))
755 {
756 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
757 }
758 else
759 {
760 assertEqual(iter1, iter2);
761 }
762
763 /* Now set it to point to a null string with fake length*/
764 ucol_setText(iter2, NULL, 2, &status);
765 if (status != U_ILLEGAL_ARGUMENT_ERROR)
766 {
767 log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
768 myErrorName(status));
769 }
770
771 ucol_closeElements(iter2);
772 ucol_closeElements(iter1);
773 ucol_close(en_us);
774 }
775
776 /** @bug 4108762
777 * Test for getMaxExpansion()
778 */
TestMaxExpansion()779 static void TestMaxExpansion()
780 {
781 UErrorCode status = U_ZERO_ERROR;
782 UCollator *coll ;/*= ucol_open("en_US", &status);*/
783 UChar ch = 0;
784 UChar32 unassigned = 0xEFFFD;
785 UChar supplementary[2];
786 uint32_t stringOffset = 0;
787 UBool isError = false;
788 uint32_t sorder = 0;
789 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/
790 uint32_t temporder = 0;
791
792 UChar rule[256];
793 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
794 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
795 UCOL_DEFAULT_STRENGTH,NULL, &status);
796 if(U_SUCCESS(status) && coll) {
797 iter = ucol_openElements(coll, &ch, 1, &status);
798
799 while (ch < 0xFFFF && U_SUCCESS(status)) {
800 int count = 1;
801 uint32_t order;
802 int32_t size = 0;
803
804 ch ++;
805
806 ucol_setText(iter, &ch, 1, &status);
807 order = ucol_previous(iter, &status);
808
809 /* thai management */
810 if (order == 0)
811 order = ucol_previous(iter, &status);
812
813 while (U_SUCCESS(status) &&
814 ucol_previous(iter, &status) != UCOL_NULLORDER) {
815 count ++;
816 }
817
818 size = ucol_getMaxExpansion(iter, order);
819 if (U_FAILURE(status) || size < count) {
820 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
821 ch, count);
822 }
823 }
824
825 /* testing for exact max expansion */
826 ch = 0;
827 while (ch < 0x61) {
828 uint32_t order;
829 int32_t size;
830 ucol_setText(iter, &ch, 1, &status);
831 order = ucol_previous(iter, &status);
832 size = ucol_getMaxExpansion(iter, order);
833 if (U_FAILURE(status) || size != 1) {
834 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
835 ch, 1);
836 }
837 ch ++;
838 }
839
840 ch = 0x63;
841 ucol_setText(iter, &ch, 1, &status);
842 temporder = ucol_previous(iter, &status);
843
844 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
845 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
846 ch, 3);
847 }
848
849 ch = 0x64;
850 ucol_setText(iter, &ch, 1, &status);
851 temporder = ucol_previous(iter, &status);
852
853 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
854 log_err("Failure at codepoint %d, maximum expansion count != %d\n",
855 ch, 3);
856 }
857
858 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
859 (void)isError; /* Suppress set but not used warning. */
860 ucol_setText(iter, supplementary, 2, &status);
861 sorder = ucol_previous(iter, &status);
862
863 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
864 log_err("Failure at codepoint %d, maximum expansion count < %d\n",
865 ch, 2);
866 }
867
868 /* testing jamo */
869 ch = 0x1165;
870
871 ucol_setText(iter, &ch, 1, &status);
872 temporder = ucol_previous(iter, &status);
873 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
874 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
875 ch, 3);
876 }
877
878 ucol_closeElements(iter);
879 ucol_close(coll);
880
881 /* testing special jamo &a<\u1160 */
882 rule[0] = 0x26;
883 rule[1] = 0x71;
884 rule[2] = 0x3c;
885 rule[3] = 0x1165;
886 rule[4] = 0x2f;
887 rule[5] = 0x71;
888 rule[6] = 0x71;
889 rule[7] = 0x71;
890 rule[8] = 0x71;
891 rule[9] = 0;
892
893 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
894 UCOL_DEFAULT_STRENGTH,NULL, &status);
895 iter = ucol_openElements(coll, &ch, 1, &status);
896
897 temporder = ucol_previous(iter, &status);
898 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
899 log_err("Failure at codepoint %d, maximum expansion count > %d\n",
900 ch, 5);
901 }
902
903 ucol_closeElements(iter);
904 ucol_close(coll);
905 } else {
906 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
907 }
908
909 }
910
911
assertEqual(UCollationElements * i1,UCollationElements * i2)912 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
913 {
914 int32_t c1, c2;
915 int32_t count = 0;
916 UErrorCode status = U_ZERO_ERROR;
917
918 do
919 {
920 c1 = ucol_next(i1, &status);
921 c2 = ucol_next(i2, &status);
922
923 if (c1 != c2)
924 {
925 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
926 break;
927 }
928
929 count += 1;
930 }
931 while (c1 != UCOL_NULLORDER);
932 }
933
934 /**
935 * Testing iterators with extremely small buffers
936 */
TestSmallBuffer()937 static void TestSmallBuffer()
938 {
939 UErrorCode status = U_ZERO_ERROR;
940 UCollator *coll;
941 UCollationElements *testiter,
942 *iter;
943 int32_t count = 0;
944 OrderAndOffset *testorders,
945 *orders;
946
947 UChar teststr[500];
948 UChar str[] = {0x300, 0x31A, 0};
949 /*
950 creating a long string of decomposable characters,
951 since by default the writable buffer is of size 256
952 */
953 while (count < 500) {
954 if ((count & 1) == 0) {
955 teststr[count ++] = 0x300;
956 }
957 else {
958 teststr[count ++] = 0x31A;
959 }
960 }
961
962 coll = ucol_open("th_TH", &status);
963 if(U_SUCCESS(status) && coll) {
964 testiter = ucol_openElements(coll, teststr, 500, &status);
965 iter = ucol_openElements(coll, str, 2, &status);
966
967 orders = getOrders(iter, &count);
968 if (count != 2) {
969 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
970 }
971
972 /*
973 this will rearrange the string data to 250 characters of 0x300 first then
974 250 characters of 0x031A
975 */
976 testorders = getOrders(testiter, &count);
977
978 if (count != 500) {
979 log_err("Error decomposition does not give the right sized collation elements\n");
980 }
981
982 while (count != 0) {
983 /* UCA collation element for 0x0F76 */
984 if ((count > 250 && testorders[-- count].order != orders[1].order) ||
985 (count <= 250 && testorders[-- count].order != orders[0].order)) {
986 log_err("Error decomposition does not give the right collation element at %d count\n", count);
987 break;
988 }
989 }
990
991 free(testorders);
992 free(orders);
993
994 ucol_reset(testiter);
995
996 /* ensures closing of elements done properly to clear writable buffer */
997 ucol_next(testiter, &status);
998 ucol_next(testiter, &status);
999 ucol_closeElements(testiter);
1000 ucol_closeElements(iter);
1001 ucol_close(coll);
1002 } else {
1003 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1004 }
1005 }
1006
1007 /**
1008 * Testing the discontiguous contractions
1009 */
TestDiscontiguos()1010 static void TestDiscontiguos() {
1011 const char *rulestr =
1012 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1013 UChar rule[50];
1014 int rulelen = u_unescape(rulestr, rule, 50);
1015 const char *src[] = {
1016 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1017 /* base character blocked */
1018 "XD\\u0300", "XD\\u0300\\u0315",
1019 /* non blocking combining character */
1020 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1021 /* blocking combining character */
1022 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1023 /* contraction prefix */
1024 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1025 "X\\u0300\\u031A\\u0315",
1026 /* ends not with a contraction character */
1027 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1028 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1029 };
1030 const char *tgt[] = {
1031 /* non blocking combining character */
1032 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1033 /* base character blocked */
1034 "X D \\u0300", "X D \\u0300\\u0315",
1035 /* non blocking combining character */
1036 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1037 /* blocking combining character */
1038 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1039 /* contraction prefix */
1040 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1041 "X\\u0300 \\u031A \\u0315",
1042 /* ends not with a contraction character */
1043 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1044 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1045 };
1046 int size = 20;
1047 UCollator *coll;
1048 UErrorCode status = U_ZERO_ERROR;
1049 int count = 0;
1050 UCollationElements *iter;
1051 UCollationElements *resultiter;
1052
1053 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1054 iter = ucol_openElements(coll, rule, 1, &status);
1055 resultiter = ucol_openElements(coll, rule, 1, &status);
1056
1057 if (U_FAILURE(status)) {
1058 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1059 return;
1060 }
1061
1062 while (count < size) {
1063 UChar str[20];
1064 UChar tstr[20];
1065 int strLen = u_unescape(src[count], str, 20);
1066 UChar *s;
1067
1068 ucol_setText(iter, str, strLen, &status);
1069 if (U_FAILURE(status)) {
1070 log_err("Error opening collation iterator\n");
1071 return;
1072 }
1073
1074 u_unescape(tgt[count], tstr, 20);
1075 s = tstr;
1076
1077 log_verbose("count %d\n", count);
1078
1079 for (;;) {
1080 int32_t ce;
1081 UChar *e = u_strchr(s, 0x20);
1082 if (e == 0) {
1083 e = u_strchr(s, 0);
1084 }
1085 ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1086 ce = ucol_next(resultiter, &status);
1087 if (U_FAILURE(status)) {
1088 log_err("Error manipulating collation iterator\n");
1089 return;
1090 }
1091 while (ce != UCOL_NULLORDER) {
1092 if (ce != ucol_next(iter, &status) ||
1093 U_FAILURE(status)) {
1094 log_err("Discontiguous contraction test mismatch\n");
1095 return;
1096 }
1097 ce = ucol_next(resultiter, &status);
1098 if (U_FAILURE(status)) {
1099 log_err("Error getting next collation element\n");
1100 return;
1101 }
1102 }
1103 s = e + 1;
1104 if (*e == 0) {
1105 break;
1106 }
1107 }
1108 ucol_reset(iter);
1109 backAndForth(iter);
1110 count ++;
1111 }
1112 ucol_closeElements(resultiter);
1113 ucol_closeElements(iter);
1114 ucol_close(coll);
1115 }
1116
1117 /**
1118 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1119 * normalization on AND jamo tailoring, among other things.
1120 *
1121 * Note: This test is sensitive to changes of the root collator,
1122 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1123 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1124 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1125 * For example, the DUCET's artificial secondary CE in the ae-ligature
1126 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1127 */
1128 static const UChar tsceText[] = { /* Nothing in here should be ignorable */
1129 0x0020, 0xAC00, /* simple LV Hangul */
1130 0x0020, 0xAC01, /* simple LVT Hangul */
1131 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */
1132 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */
1133 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1134 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1135 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1136 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1137 0x0020, 0x00E6, /* small letter ae, expands */
1138 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */
1139 0x0020
1140 };
1141 enum { kLen_tsceText = UPRV_LENGTHOF(tsceText) };
1142
1143 static const int32_t rootStandardOffsets[] = {
1144 0, 1,2,
1145 2, 3,4,4,
1146 4, 5,6,6,
1147 6, 7,8,8,
1148 8, 9,10,11,
1149 12, 13,14,15,
1150 16, 17,18,19,
1151 20, 21,22,23,
1152 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1153 26, 27,28,28,
1154 28,
1155 29
1156 };
1157 enum { kLen_rootStandardOffsets = UPRV_LENGTHOF(rootStandardOffsets) };
1158
1159 static const int32_t rootSearchOffsets[] = {
1160 0, 1,2,
1161 2, 3,4,4,
1162 4, 5,6,6,6,
1163 6, 7,8,8,8,8,8,8,
1164 8, 9,10,11,
1165 12, 13,14,15,
1166 16, 17,18,19,20,
1167 20, 21,22,22,23,23,23,24,
1168 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1169 26, 27,28,28,
1170 28,
1171 29
1172 };
1173 enum { kLen_rootSearchOffsets = UPRV_LENGTHOF(rootSearchOffsets) };
1174
1175 typedef struct {
1176 const char * locale;
1177 const int32_t * offsets;
1178 int32_t offsetsLen;
1179 } TSCEItem;
1180
1181 static const TSCEItem tsceItems[] = {
1182 { "root", rootStandardOffsets, kLen_rootStandardOffsets },
1183 { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets },
1184 { NULL, NULL, 0 }
1185 };
1186
TestSearchCollatorElements(void)1187 static void TestSearchCollatorElements(void)
1188 {
1189 const TSCEItem * tsceItemPtr;
1190 for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
1191 UErrorCode status = U_ZERO_ERROR;
1192 UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
1193 if ( U_SUCCESS(status) ) {
1194 UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
1195 if ( U_SUCCESS(status) ) {
1196 int32_t offset, element;
1197 const int32_t * nextOffsetPtr;
1198 const int32_t * limitOffsetPtr;
1199
1200 nextOffsetPtr = tsceItemPtr->offsets;
1201 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1202 do {
1203 offset = ucol_getOffset(uce);
1204 element = ucol_next(uce, &status);
1205 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->locale, offset, element);
1206 if ( element == 0 ) {
1207 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
1208 }
1209 if ( nextOffsetPtr < limitOffsetPtr ) {
1210 if (offset != *nextOffsetPtr) {
1211 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1212 tsceItemPtr->locale, *nextOffsetPtr, offset );
1213 nextOffsetPtr = limitOffsetPtr;
1214 break;
1215 }
1216 nextOffsetPtr++;
1217 } else {
1218 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
1219 }
1220 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1221 if ( nextOffsetPtr < limitOffsetPtr ) {
1222 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
1223 }
1224
1225 ucol_setOffset(uce, kLen_tsceText, &status);
1226 status = U_ZERO_ERROR;
1227 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1228 limitOffsetPtr = tsceItemPtr->offsets;
1229 do {
1230 offset = ucol_getOffset(uce);
1231 element = ucol_previous(uce, &status);
1232 if ( element == 0 ) {
1233 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
1234 }
1235 if ( nextOffsetPtr > limitOffsetPtr ) {
1236 nextOffsetPtr--;
1237 if (offset != *nextOffsetPtr) {
1238 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1239 tsceItemPtr->locale, *nextOffsetPtr, offset );
1240 nextOffsetPtr = limitOffsetPtr;
1241 break;
1242 }
1243 } else {
1244 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
1245 }
1246 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1247 if ( nextOffsetPtr > limitOffsetPtr ) {
1248 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
1249 }
1250
1251 ucol_closeElements(uce);
1252 } else {
1253 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1254 }
1255 ucol_close(ucol);
1256 } else {
1257 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1258 }
1259 }
1260 }
1261
1262 #endif /* #if !UCONFIG_NO_COLLATION */
1263