• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /********************************************************************************
7 *
8 * File CITERTST.C
9 *
10 * Modification History:
11 * Date      Name               Description
12 *           Madhu Katragadda   Ported for C API
13 * 02/19/01  synwee             Modified test case for new collation iterator
14 *********************************************************************************/
15 /*
16  * Collation Iterator tests.
17  * (Let me reiterate my position...)
18  */
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_COLLATION
23 
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
27 #include "unicode/uchar.h"
28 #include "unicode/ustring.h"
29 #include "unicode/putil.h"
30 #include "callcoll.h"
31 #include "cmemory.h"
32 #include "cintltst.h"
33 #include "citertst.h"
34 #include "ccolltst.h"
35 #include "filestrm.h"
36 #include "cstring.h"
37 #include "ucol_imp.h"
38 #include "uparse.h"
39 #include <stdio.h>
40 
41 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
42 
addCollIterTest(TestNode ** root)43 void addCollIterTest(TestNode** root)
44 {
45     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
46     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
47     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
48     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
49     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
50     addTest(root, &TestNormalizedUnicodeChar,
51                                 "tscoll/citertst/TestNormalizedUnicodeChar");
52     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
53     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
54     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
55     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
56     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
57     addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
58 }
59 
60 /* The locales we support */
61 
62 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
63 
TestBug672()64 static void TestBug672() {
65     UErrorCode  status = U_ZERO_ERROR;
66     UChar       pattern[20];
67     UChar       text[50];
68     int         i;
69     int         result[3][3];
70 
71     u_uastrcpy(pattern, "resume");
72     u_uastrcpy(text, "Time to resume updating my resume.");
73 
74     for (i = 0; i < 3; ++ i) {
75         UCollator          *coll = ucol_open(LOCALES[i], &status);
76         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
77                                                      &status);
78         UCollationElements *titer = ucol_openElements(coll, text, -1,
79                                                      &status);
80         if (U_FAILURE(status)) {
81             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
82                     myErrorName(status));
83             return;
84         }
85 
86         log_verbose("locale tested %s\n", LOCALES[i]);
87 
88         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
89                U_SUCCESS(status)) {
90         }
91         if (U_FAILURE(status)) {
92             log_err("ERROR: reversing collation iterator :%s\n",
93                     myErrorName(status));
94             return;
95         }
96         ucol_reset(pitr);
97 
98         ucol_setOffset(titer, u_strlen(pattern), &status);
99         if (U_FAILURE(status)) {
100             log_err("ERROR: setting offset in collator :%s\n",
101                     myErrorName(status));
102             return;
103         }
104         result[i][0] = ucol_getOffset(titer);
105         log_verbose("Text iterator set to offset %d\n", result[i][0]);
106 
107         /* Use previous() */
108         ucol_previous(titer, &status);
109         result[i][1] = ucol_getOffset(titer);
110         log_verbose("Current offset %d after previous\n", result[i][1]);
111 
112         /* Add one to index */
113         log_verbose("Adding one to current offset...\n");
114         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
115         if (U_FAILURE(status)) {
116             log_err("ERROR: setting offset in collator :%s\n",
117                     myErrorName(status));
118             return;
119         }
120         result[i][2] = ucol_getOffset(titer);
121         log_verbose("Current offset in text = %d\n", result[i][2]);
122         ucol_closeElements(pitr);
123         ucol_closeElements(titer);
124         ucol_close(coll);
125     }
126 
127     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
128         uprv_memcmp(result[1], result[2], 3) != 0) {
129         log_err("ERROR: Different locales have different offsets at the same character\n");
130     }
131 }
132 
133 
134 
135 /*  Running this test with normalization enabled showed up a bug in the incremental
136     normalization code. */
TestBug672Normalize()137 static void TestBug672Normalize() {
138     UErrorCode  status = U_ZERO_ERROR;
139     UChar       pattern[20];
140     UChar       text[50];
141     int         i;
142     int         result[3][3];
143 
144     u_uastrcpy(pattern, "resume");
145     u_uastrcpy(text, "Time to resume updating my resume.");
146 
147     for (i = 0; i < 3; ++ i) {
148         UCollator          *coll = ucol_open(LOCALES[i], &status);
149         UCollationElements *pitr = NULL;
150         UCollationElements *titer = NULL;
151 
152         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
153 
154         pitr = ucol_openElements(coll, pattern, -1, &status);
155         titer = ucol_openElements(coll, text, -1, &status);
156         if (U_FAILURE(status)) {
157             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
158                     myErrorName(status));
159             return;
160         }
161 
162         log_verbose("locale tested %s\n", LOCALES[i]);
163 
164         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
165                U_SUCCESS(status)) {
166         }
167         if (U_FAILURE(status)) {
168             log_err("ERROR: reversing collation iterator :%s\n",
169                     myErrorName(status));
170             return;
171         }
172         ucol_reset(pitr);
173 
174         ucol_setOffset(titer, u_strlen(pattern), &status);
175         if (U_FAILURE(status)) {
176             log_err("ERROR: setting offset in collator :%s\n",
177                     myErrorName(status));
178             return;
179         }
180         result[i][0] = ucol_getOffset(titer);
181         log_verbose("Text iterator set to offset %d\n", result[i][0]);
182 
183         /* Use previous() */
184         ucol_previous(titer, &status);
185         result[i][1] = ucol_getOffset(titer);
186         log_verbose("Current offset %d after previous\n", result[i][1]);
187 
188         /* Add one to index */
189         log_verbose("Adding one to current offset...\n");
190         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
191         if (U_FAILURE(status)) {
192             log_err("ERROR: setting offset in collator :%s\n",
193                     myErrorName(status));
194             return;
195         }
196         result[i][2] = ucol_getOffset(titer);
197         log_verbose("Current offset in text = %d\n", result[i][2]);
198         ucol_closeElements(pitr);
199         ucol_closeElements(titer);
200         ucol_close(coll);
201     }
202 
203     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
204         uprv_memcmp(result[1], result[2], 3) != 0) {
205         log_err("ERROR: Different locales have different offsets at the same character\n");
206     }
207 }
208 
209 
210 
211 
212 /**
213  * Test for CollationElementIterator previous and next for the whole set of
214  * unicode characters.
215  */
TestUnicodeChar()216 static void TestUnicodeChar()
217 {
218     UChar source[0x100];
219     UCollator *en_us;
220     UCollationElements *iter;
221     UErrorCode status = U_ZERO_ERROR;
222     UChar codepoint;
223 
224     UChar *test;
225     en_us = ucol_open("en_US", &status);
226     if (U_FAILURE(status)){
227        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
228               myErrorName(status));
229        return;
230     }
231 
232     for (codepoint = 1; codepoint < 0xFFFE;)
233     {
234       test = source;
235 
236       while (codepoint % 0xFF != 0)
237       {
238         if (u_isdefined(codepoint))
239           *(test ++) = codepoint;
240         codepoint ++;
241       }
242 
243       if (u_isdefined(codepoint))
244         *(test ++) = codepoint;
245 
246       if (codepoint != 0xFFFF)
247         codepoint ++;
248 
249       *test = 0;
250       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
251       if(U_FAILURE(status)){
252           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
253               myErrorName(status));
254           ucol_close(en_us);
255           return;
256       }
257       /* A basic test to see if it's working at all */
258       log_verbose("codepoint testing %x\n", codepoint);
259       backAndForth(iter);
260       ucol_closeElements(iter);
261 
262       /* null termination test */
263       iter=ucol_openElements(en_us, source, -1, &status);
264       if(U_FAILURE(status)){
265           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
266               myErrorName(status));
267           ucol_close(en_us);
268           return;
269       }
270       /* A basic test to see if it's working at all */
271       backAndForth(iter);
272       ucol_closeElements(iter);
273     }
274 
275     ucol_close(en_us);
276 }
277 
278 /**
279  * Test for CollationElementIterator previous and next for the whole set of
280  * unicode characters with normalization on.
281  */
TestNormalizedUnicodeChar()282 static void TestNormalizedUnicodeChar()
283 {
284     UChar source[0x100];
285     UCollator *th_th;
286     UCollationElements *iter;
287     UErrorCode status = U_ZERO_ERROR;
288     UChar codepoint;
289 
290     UChar *test;
291     /* thai should have normalization on */
292     th_th = ucol_open("th_TH", &status);
293     if (U_FAILURE(status)){
294         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
295               myErrorName(status));
296         return;
297     }
298 
299     for (codepoint = 1; codepoint < 0xFFFE;)
300     {
301       test = source;
302 
303       while (codepoint % 0xFF != 0)
304       {
305         if (u_isdefined(codepoint))
306           *(test ++) = codepoint;
307         codepoint ++;
308       }
309 
310       if (u_isdefined(codepoint))
311         *(test ++) = codepoint;
312 
313       if (codepoint != 0xFFFF)
314         codepoint ++;
315 
316       *test = 0;
317       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
318       if(U_FAILURE(status)){
319           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
320               myErrorName(status));
321             ucol_close(th_th);
322           return;
323       }
324 
325       backAndForth(iter);
326       ucol_closeElements(iter);
327 
328       iter=ucol_openElements(th_th, source, -1, &status);
329       if(U_FAILURE(status)){
330           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
331               myErrorName(status));
332             ucol_close(th_th);
333           return;
334       }
335 
336       backAndForth(iter);
337       ucol_closeElements(iter);
338     }
339 
340     ucol_close(th_th);
341 }
342 
343 /**
344 * Test the incremental normalization
345 */
TestNormalization()346 static void TestNormalization()
347 {
348           UErrorCode          status = U_ZERO_ERROR;
349     const char               *str    =
350                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
351           UCollator          *coll;
352           UChar               rule[50];
353           int                 rulelen = u_unescape(str, rule, 50);
354           int                 count = 0;
355     const char                *testdata[] =
356                         {"\\u1ED9", "o\\u0323\\u0302",
357                         "\\u0300\\u0315", "\\u0315\\u0300",
358                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
359                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
360                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
361                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
362                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
363     int32_t   srclen;
364     UChar source[10];
365     UCollationElements *iter;
366 
367     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
368     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
369     if (U_FAILURE(status)){
370         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
371               myErrorName(status));
372         return;
373     }
374 
375     srclen = u_unescape(testdata[0], source, 10);
376     iter = ucol_openElements(coll, source, srclen, &status);
377     backAndForth(iter);
378     ucol_closeElements(iter);
379 
380     srclen = u_unescape(testdata[1], source, 10);
381     iter = ucol_openElements(coll, source, srclen, &status);
382     backAndForth(iter);
383     ucol_closeElements(iter);
384 
385     while (count < 12) {
386         srclen = u_unescape(testdata[count], source, 10);
387         iter = ucol_openElements(coll, source, srclen, &status);
388 
389         if (U_FAILURE(status)){
390             log_err("ERROR: in creation of collator element iterator\n %s\n",
391                   myErrorName(status));
392             return;
393         }
394         backAndForth(iter);
395         ucol_closeElements(iter);
396 
397         iter = ucol_openElements(coll, source, -1, &status);
398 
399         if (U_FAILURE(status)){
400             log_err("ERROR: in creation of collator element iterator\n %s\n",
401                   myErrorName(status));
402             return;
403         }
404         backAndForth(iter);
405         ucol_closeElements(iter);
406         count ++;
407     }
408     ucol_close(coll);
409 }
410 
411 /**
412  * Test for CollationElementIterator.previous()
413  *
414  * @bug 4108758 - Make sure it works with contracting characters
415  *
416  */
TestPrevious()417 static void TestPrevious()
418 {
419     UCollator *coll=NULL;
420     UChar rule[50];
421     UChar *source;
422     UCollator *c1, *c2, *c3;
423     UCollationElements *iter;
424     UErrorCode status = U_ZERO_ERROR;
425     UChar test1[50];
426     UChar test2[50];
427 
428     u_uastrcpy(test1, "What subset of all possible test cases?");
429     u_uastrcpy(test2, "has the highest probability of detecting");
430     coll = ucol_open("en_US", &status);
431 
432     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
433     log_verbose("English locale testing back and forth\n");
434     if(U_FAILURE(status)){
435         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
436             myErrorName(status));
437         ucol_close(coll);
438         return;
439     }
440     /* A basic test to see if it's working at all */
441     backAndForth(iter);
442     ucol_closeElements(iter);
443     ucol_close(coll);
444 
445     /* Test with a contracting character sequence */
446     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
447     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
448 
449     log_verbose("Contraction rule testing back and forth with no normalization\n");
450 
451     if (c1 == NULL || U_FAILURE(status))
452     {
453         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
454             myErrorName(status));
455         return;
456     }
457     source=(UChar*)malloc(sizeof(UChar) * 20);
458     u_uastrcpy(source, "abchdcba");
459     iter=ucol_openElements(c1, source, u_strlen(source), &status);
460     if(U_FAILURE(status)){
461         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
462             myErrorName(status));
463         return;
464     }
465     backAndForth(iter);
466     ucol_closeElements(iter);
467     ucol_close(c1);
468 
469     /* Test with an expanding character sequence */
470     u_uastrcpy(rule, "&a < b < c/abd < d");
471     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
472     log_verbose("Expansion rule testing back and forth with no normalization\n");
473     if (c2 == NULL || U_FAILURE(status))
474     {
475         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
476             myErrorName(status));
477         return;
478     }
479     u_uastrcpy(source, "abcd");
480     iter=ucol_openElements(c2, source, u_strlen(source), &status);
481     if(U_FAILURE(status)){
482         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
483             myErrorName(status));
484         return;
485     }
486     backAndForth(iter);
487     ucol_closeElements(iter);
488     ucol_close(c2);
489     /* Now try both */
490     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
491     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
492     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
493 
494     if (c3 == NULL || U_FAILURE(status))
495     {
496         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
497             myErrorName(status));
498         return;
499     }
500     u_uastrcpy(source, "abcdbchdc");
501     iter=ucol_openElements(c3, source, u_strlen(source), &status);
502     if(U_FAILURE(status)){
503         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
504             myErrorName(status));
505         return;
506     }
507     backAndForth(iter);
508     ucol_closeElements(iter);
509     ucol_close(c3);
510     source[0] = 0x0e41;
511     source[1] = 0x0e02;
512     source[2] = 0x0e41;
513     source[3] = 0x0e02;
514     source[4] = 0x0e27;
515     source[5] = 0x61;
516     source[6] = 0x62;
517     source[7] = 0x63;
518     source[8] = 0;
519 
520     coll = ucol_open("th_TH", &status);
521     log_verbose("Thai locale testing back and forth with normalization\n");
522     iter=ucol_openElements(coll, source, u_strlen(source), &status);
523     if(U_FAILURE(status)){
524         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
525             myErrorName(status));
526         return;
527     }
528     backAndForth(iter);
529     ucol_closeElements(iter);
530     ucol_close(coll);
531 
532     /* prev test */
533     source[0] = 0x0061;
534     source[1] = 0x30CF;
535     source[2] = 0x3099;
536     source[3] = 0x30FC;
537     source[4] = 0;
538 
539     coll = ucol_open("ja_JP", &status);
540     log_verbose("Japanese locale testing back and forth with normalization\n");
541     iter=ucol_openElements(coll, source, u_strlen(source), &status);
542     if(U_FAILURE(status)){
543         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
544             myErrorName(status));
545         return;
546     }
547     backAndForth(iter);
548     ucol_closeElements(iter);
549     ucol_close(coll);
550 
551     free(source);
552 }
553 
554 /**
555  * Test for getOffset() and setOffset()
556  */
TestOffset()557 static void TestOffset()
558 {
559     UErrorCode status= U_ZERO_ERROR;
560     UCollator *en_us=NULL;
561     UCollationElements *iter, *pristine;
562     int32_t offset;
563     OrderAndOffset *orders;
564     int32_t orderLength=0;
565     int     count = 0;
566     UChar test1[50];
567     UChar test2[50];
568 
569     u_uastrcpy(test1, "What subset of all possible test cases?");
570     u_uastrcpy(test2, "has the highest probability of detecting");
571     en_us = ucol_open("en_US", &status);
572     log_verbose("Testing getOffset and setOffset for collations\n");
573     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
574     if(U_FAILURE(status)){
575         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
576             myErrorName(status));
577         ucol_close(en_us);
578         return;
579     }
580 
581     /* testing boundaries */
582     ucol_setOffset(iter, 0, &status);
583     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
584         log_err("Error: After setting offset to 0, we should be at the end "
585                 "of the backwards iteration");
586     }
587     ucol_setOffset(iter, u_strlen(test1), &status);
588     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
589         log_err("Error: After setting offset to end of the string, we should "
590                 "be at the end of the backwards iteration");
591     }
592 
593     /* Run all the way through the iterator, then get the offset */
594 
595     orders = getOrders(iter, &orderLength);
596 
597     offset = ucol_getOffset(iter);
598 
599     if (offset != u_strlen(test1))
600     {
601         log_err("offset at end != length %d vs %d\n", offset,
602             u_strlen(test1) );
603     }
604 
605     /* Now set the offset back to the beginning and see if it works */
606     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
607     if(U_FAILURE(status)){
608         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
609             myErrorName(status));
610     ucol_close(en_us);
611         return;
612     }
613     status = U_ZERO_ERROR;
614 
615     ucol_setOffset(iter, 0, &status);
616     if (U_FAILURE(status))
617     {
618         log_err("setOffset failed. %s\n",    myErrorName(status));
619     }
620     else
621     {
622         assertEqual(iter, pristine);
623     }
624 
625     ucol_closeElements(pristine);
626     ucol_closeElements(iter);
627     free(orders);
628 
629     /* testing offsets in normalization buffer */
630     test1[0] = 0x61;
631     test1[1] = 0x300;
632     test1[2] = 0x316;
633     test1[3] = 0x62;
634     test1[4] = 0;
635     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
636     iter = ucol_openElements(en_us, test1, 4, &status);
637     if(U_FAILURE(status)){
638         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
639             myErrorName(status));
640         ucol_close(en_us);
641         return;
642     }
643 
644     count = 0;
645     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
646         U_SUCCESS(status)) {
647         switch (count) {
648         case 0:
649             if (ucol_getOffset(iter) != 1) {
650                 log_err("ERROR: Offset of iteration should be 1\n");
651             }
652             break;
653         case 3:
654             if (ucol_getOffset(iter) != 4) {
655                 log_err("ERROR: Offset of iteration should be 4\n");
656             }
657             break;
658         default:
659             if (ucol_getOffset(iter) != 3) {
660                 log_err("ERROR: Offset of iteration should be 3\n");
661             }
662         }
663         count ++;
664     }
665 
666     ucol_reset(iter);
667     count = 0;
668     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
669         U_SUCCESS(status)) {
670         switch (count) {
671         case 0:
672         case 1:
673             if (ucol_getOffset(iter) != 3) {
674                 log_err("ERROR: Offset of iteration should be 3\n");
675             }
676             break;
677         case 2:
678             if (ucol_getOffset(iter) != 1) {
679                 log_err("ERROR: Offset of iteration should be 1\n");
680             }
681             break;
682         default:
683             if (ucol_getOffset(iter) != 0) {
684                 log_err("ERROR: Offset of iteration should be 0\n");
685             }
686         }
687         count ++;
688     }
689 
690     if(U_FAILURE(status)){
691         log_err("ERROR: in iterating collation elements %s\n",
692             myErrorName(status));
693     }
694 
695     ucol_closeElements(iter);
696     ucol_close(en_us);
697 }
698 
699 /**
700  * Test for setText()
701  */
TestSetText()702 static void TestSetText()
703 {
704     int32_t c,i;
705     UErrorCode status = U_ZERO_ERROR;
706     UCollator *en_us=NULL;
707     UCollationElements *iter1, *iter2;
708     UChar test1[50];
709     UChar test2[50];
710 
711     u_uastrcpy(test1, "What subset of all possible test cases?");
712     u_uastrcpy(test2, "has the highest probability of detecting");
713     en_us = ucol_open("en_US", &status);
714     log_verbose("testing setText for Collation elements\n");
715     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
716     if(U_FAILURE(status)){
717         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
718             myErrorName(status));
719     ucol_close(en_us);
720         return;
721     }
722     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
723     if(U_FAILURE(status)){
724         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
725             myErrorName(status));
726     ucol_close(en_us);
727         return;
728     }
729 
730     /* Run through the second iterator just to exercise it */
731     c = ucol_next(iter2, &status);
732     i = 0;
733 
734     while ( ++i < 10 && (c != UCOL_NULLORDER))
735     {
736         if (U_FAILURE(status))
737         {
738             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
739             ucol_closeElements(iter2);
740             ucol_closeElements(iter1);
741     ucol_close(en_us);
742             return;
743         }
744 
745         c = ucol_next(iter2, &status);
746     }
747 
748     /* Now set it to point to the same string as the first iterator */
749     ucol_setText(iter2, test1, u_strlen(test1), &status);
750     if (U_FAILURE(status))
751     {
752         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
753     }
754     else
755     {
756         assertEqual(iter1, iter2);
757     }
758 
759     /* Now set it to point to a null string with fake length*/
760     ucol_setText(iter2, NULL, 2, &status);
761     if (status != U_ILLEGAL_ARGUMENT_ERROR)
762     {
763         log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
764                 myErrorName(status));
765     }
766 
767     ucol_closeElements(iter2);
768     ucol_closeElements(iter1);
769     ucol_close(en_us);
770 }
771 
772 /** @bug 4108762
773  * Test for getMaxExpansion()
774  */
TestMaxExpansion()775 static void TestMaxExpansion()
776 {
777     UErrorCode          status = U_ZERO_ERROR;
778     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
779     UChar               ch     = 0;
780     UChar32             unassigned = 0xEFFFD;
781     UChar               supplementary[2];
782     uint32_t            stringOffset = 0;
783     UBool               isError = FALSE;
784     uint32_t            sorder = 0;
785     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
786     uint32_t            temporder = 0;
787 
788     UChar rule[256];
789     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
790     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
791         UCOL_DEFAULT_STRENGTH,NULL, &status);
792     if(U_SUCCESS(status) && coll) {
793       iter = ucol_openElements(coll, &ch, 1, &status);
794 
795       while (ch < 0xFFFF && U_SUCCESS(status)) {
796           int      count = 1;
797           uint32_t order;
798           int32_t  size = 0;
799 
800           ch ++;
801 
802           ucol_setText(iter, &ch, 1, &status);
803           order = ucol_previous(iter, &status);
804 
805           /* thai management */
806           if (order == 0)
807               order = ucol_previous(iter, &status);
808 
809           while (U_SUCCESS(status) &&
810               ucol_previous(iter, &status) != UCOL_NULLORDER) {
811               count ++;
812           }
813 
814           size = ucol_getMaxExpansion(iter, order);
815           if (U_FAILURE(status) || size < count) {
816               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
817                   ch, count);
818           }
819       }
820 
821       /* testing for exact max expansion */
822       ch = 0;
823       while (ch < 0x61) {
824           uint32_t order;
825           int32_t  size;
826           ucol_setText(iter, &ch, 1, &status);
827           order = ucol_previous(iter, &status);
828           size  = ucol_getMaxExpansion(iter, order);
829           if (U_FAILURE(status) || size != 1) {
830               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
831                   ch, 1);
832           }
833           ch ++;
834       }
835 
836       ch = 0x63;
837       ucol_setText(iter, &ch, 1, &status);
838       temporder = ucol_previous(iter, &status);
839 
840       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
841           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
842                   ch, 3);
843       }
844 
845       ch = 0x64;
846       ucol_setText(iter, &ch, 1, &status);
847       temporder = ucol_previous(iter, &status);
848 
849       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
850           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
851                   ch, 3);
852       }
853 
854       U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
855       (void)isError;    /* Suppress set but not used warning. */
856       ucol_setText(iter, supplementary, 2, &status);
857       sorder = ucol_previous(iter, &status);
858 
859       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
860           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
861                   ch, 2);
862       }
863 
864       /* testing jamo */
865       ch = 0x1165;
866 
867       ucol_setText(iter, &ch, 1, &status);
868       temporder = ucol_previous(iter, &status);
869       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
870           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
871                   ch, 3);
872       }
873 
874       ucol_closeElements(iter);
875       ucol_close(coll);
876 
877       /* testing special jamo &a<\u1160 */
878       rule[0] = 0x26;
879       rule[1] = 0x71;
880       rule[2] = 0x3c;
881       rule[3] = 0x1165;
882       rule[4] = 0x2f;
883       rule[5] = 0x71;
884       rule[6] = 0x71;
885       rule[7] = 0x71;
886       rule[8] = 0x71;
887       rule[9] = 0;
888 
889       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
890           UCOL_DEFAULT_STRENGTH,NULL, &status);
891       iter = ucol_openElements(coll, &ch, 1, &status);
892 
893       temporder = ucol_previous(iter, &status);
894       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
895           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
896                   ch, 5);
897       }
898 
899       ucol_closeElements(iter);
900       ucol_close(coll);
901     } else {
902       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
903     }
904 
905 }
906 
907 
assertEqual(UCollationElements * i1,UCollationElements * i2)908 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
909 {
910     int32_t c1, c2;
911     int32_t count = 0;
912     UErrorCode status = U_ZERO_ERROR;
913 
914     do
915     {
916         c1 = ucol_next(i1, &status);
917         c2 = ucol_next(i2, &status);
918 
919         if (c1 != c2)
920         {
921             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
922             break;
923         }
924 
925         count += 1;
926     }
927     while (c1 != UCOL_NULLORDER);
928 }
929 
930 /**
931  * Testing iterators with extremely small buffers
932  */
TestSmallBuffer()933 static void TestSmallBuffer()
934 {
935     UErrorCode          status = U_ZERO_ERROR;
936     UCollator          *coll;
937     UCollationElements *testiter,
938                        *iter;
939     int32_t             count = 0;
940     OrderAndOffset     *testorders,
941                        *orders;
942 
943     UChar teststr[500];
944     UChar str[] = {0x300, 0x31A, 0};
945     /*
946     creating a long string of decomposable characters,
947     since by default the writable buffer is of size 256
948     */
949     while (count < 500) {
950         if ((count & 1) == 0) {
951             teststr[count ++] = 0x300;
952         }
953         else {
954             teststr[count ++] = 0x31A;
955         }
956     }
957 
958     coll = ucol_open("th_TH", &status);
959     if(U_SUCCESS(status) && coll) {
960       testiter = ucol_openElements(coll, teststr, 500, &status);
961       iter = ucol_openElements(coll, str, 2, &status);
962 
963       orders     = getOrders(iter, &count);
964       if (count != 2) {
965           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
966       }
967 
968       /*
969       this will rearrange the string data to 250 characters of 0x300 first then
970       250 characters of 0x031A
971       */
972       testorders = getOrders(testiter, &count);
973 
974       if (count != 500) {
975           log_err("Error decomposition does not give the right sized collation elements\n");
976       }
977 
978       while (count != 0) {
979           /* UCA collation element for 0x0F76 */
980           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
981               (count <= 250 && testorders[-- count].order != orders[0].order)) {
982               log_err("Error decomposition does not give the right collation element at %d count\n", count);
983               break;
984           }
985       }
986 
987       free(testorders);
988       free(orders);
989 
990       ucol_reset(testiter);
991 
992       /* ensures closing of elements done properly to clear writable buffer */
993       ucol_next(testiter, &status);
994       ucol_next(testiter, &status);
995       ucol_closeElements(testiter);
996       ucol_closeElements(iter);
997       ucol_close(coll);
998     } else {
999       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1000     }
1001 }
1002 
1003 /**
1004 * Testing the discontigous contractions
1005 */
TestDiscontiguos()1006 static void TestDiscontiguos() {
1007     const char               *rulestr    =
1008                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1009           UChar               rule[50];
1010           int                 rulelen = u_unescape(rulestr, rule, 50);
1011     const char               *src[] = {
1012      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1013     /* base character blocked */
1014      "XD\\u0300", "XD\\u0300\\u0315",
1015     /* non blocking combining character */
1016      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1017      /* blocking combining character */
1018      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1019      /* contraction prefix */
1020      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1021      "X\\u0300\\u031A\\u0315",
1022      /* ends not with a contraction character */
1023      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1024      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1025     };
1026     const char               *tgt[] = {
1027      /* non blocking combining character */
1028      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1029     /* base character blocked */
1030      "X D \\u0300", "X D \\u0300\\u0315",
1031     /* non blocking combining character */
1032      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1033      /* blocking combining character */
1034      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1035      /* contraction prefix */
1036      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1037      "X\\u0300 \\u031A \\u0315",
1038      /* ends not with a contraction character */
1039      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1040      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1041     };
1042           int                 size   = 20;
1043           UCollator          *coll;
1044           UErrorCode          status    = U_ZERO_ERROR;
1045           int                 count     = 0;
1046           UCollationElements *iter;
1047           UCollationElements *resultiter;
1048 
1049     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1050     iter       = ucol_openElements(coll, rule, 1, &status);
1051     resultiter = ucol_openElements(coll, rule, 1, &status);
1052 
1053     if (U_FAILURE(status)) {
1054         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1055         return;
1056     }
1057 
1058     while (count < size) {
1059         UChar  str[20];
1060         UChar  tstr[20];
1061         int    strLen = u_unescape(src[count], str, 20);
1062         UChar *s;
1063 
1064         ucol_setText(iter, str, strLen, &status);
1065         if (U_FAILURE(status)) {
1066             log_err("Error opening collation iterator\n");
1067             return;
1068         }
1069 
1070         u_unescape(tgt[count], tstr, 20);
1071         s = tstr;
1072 
1073         log_verbose("count %d\n", count);
1074 
1075         for (;;) {
1076             uint32_t  ce;
1077             UChar    *e = u_strchr(s, 0x20);
1078             if (e == 0) {
1079                 e = u_strchr(s, 0);
1080             }
1081             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1082             ce = ucol_next(resultiter, &status);
1083             if (U_FAILURE(status)) {
1084                 log_err("Error manipulating collation iterator\n");
1085                 return;
1086             }
1087             while (ce != UCOL_NULLORDER) {
1088                 if (ce != (uint32_t)ucol_next(iter, &status) ||
1089                     U_FAILURE(status)) {
1090                     log_err("Discontiguos contraction test mismatch\n");
1091                     return;
1092                 }
1093                 ce = ucol_next(resultiter, &status);
1094                 if (U_FAILURE(status)) {
1095                     log_err("Error getting next collation element\n");
1096                     return;
1097                 }
1098             }
1099             s = e + 1;
1100             if (*e == 0) {
1101                 break;
1102             }
1103         }
1104         ucol_reset(iter);
1105         backAndForth(iter);
1106         count ++;
1107     }
1108     ucol_closeElements(resultiter);
1109     ucol_closeElements(iter);
1110     ucol_close(coll);
1111 }
1112 
1113 /**
1114 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1115 * normalization on AND jamo tailoring, among other things.
1116 *
1117 * Note: This test is sensitive to changes of the root collator,
1118 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1119 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1120 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1121 * For example, the DUCET's artificial secondary CE in the ae-ligature
1122 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1123 */
1124 static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
1125     0x0020, 0xAC00,                 /* simple LV Hangul */
1126     0x0020, 0xAC01,                 /* simple LVT Hangul */
1127     0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
1128     0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
1129     0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1130     0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1131     0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1132     0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1133     0x0020, 0x00E6,                 /* small letter ae, expands */
1134     0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
1135     0x0020
1136 };
1137 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
1138 
1139 static const int32_t rootStandardOffsets[] = {
1140     0,  1,2,
1141     2,  3,4,4,
1142     4,  5,6,6,
1143     6,  7,8,8,
1144     8,  9,10,11,
1145     12, 13,14,15,
1146     16, 17,18,19,
1147     20, 21,22,23,
1148     24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1149     26, 27,28,28,
1150     28,
1151     29
1152 };
1153 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
1154 
1155 static const int32_t rootSearchOffsets[] = {
1156     0,  1,2,
1157     2,  3,4,4,
1158     4,  5,6,6,6,
1159     6,  7,8,8,8,8,8,8,
1160     8,  9,10,11,
1161     12, 13,14,15,
1162     16, 17,18,19,20,
1163     20, 21,22,22,23,23,23,24,
1164     24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1165     26, 27,28,28,
1166     28,
1167     29
1168 };
1169 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
1170 
1171 typedef struct {
1172     const char *    locale;
1173     const int32_t * offsets;
1174     int32_t         offsetsLen;
1175 } TSCEItem;
1176 
1177 static const TSCEItem tsceItems[] = {
1178     { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
1179     { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
1180     { NULL,                    NULL,                0                        }
1181 };
1182 
TestSearchCollatorElements(void)1183 static void TestSearchCollatorElements(void)
1184 {
1185     const TSCEItem * tsceItemPtr;
1186     for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
1187         UErrorCode status = U_ZERO_ERROR;
1188         UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
1189         if ( U_SUCCESS(status) ) {
1190             UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
1191             if ( U_SUCCESS(status) ) {
1192                 int32_t offset, element;
1193                 const int32_t * nextOffsetPtr;
1194                 const int32_t * limitOffsetPtr;
1195 
1196                 nextOffsetPtr = tsceItemPtr->offsets;
1197                 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1198                 do {
1199                     offset = ucol_getOffset(uce);
1200                     element = ucol_next(uce, &status);
1201                     log_verbose("(%s) offset=%2d  ce=%08x\n", tsceItemPtr->locale, offset, element);
1202                     if ( element == 0 ) {
1203                         log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
1204                     }
1205                     if ( nextOffsetPtr < limitOffsetPtr ) {
1206                         if (offset != *nextOffsetPtr) {
1207                             log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1208                                                             tsceItemPtr->locale, *nextOffsetPtr, offset );
1209                             nextOffsetPtr = limitOffsetPtr;
1210                             break;
1211                         }
1212                         nextOffsetPtr++;
1213                     } else {
1214                         log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
1215                     }
1216                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1217                 if ( nextOffsetPtr < limitOffsetPtr ) {
1218                     log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
1219                 }
1220 
1221                 ucol_setOffset(uce, kLen_tsceText, &status);
1222                 status = U_ZERO_ERROR;
1223                 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1224                 limitOffsetPtr = tsceItemPtr->offsets;
1225                 do {
1226                     offset = ucol_getOffset(uce);
1227                     element = ucol_previous(uce, &status);
1228                     if ( element == 0 ) {
1229                         log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
1230                     }
1231                     if ( nextOffsetPtr > limitOffsetPtr ) {
1232                         nextOffsetPtr--;
1233                         if (offset != *nextOffsetPtr) {
1234                             log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1235                                                                 tsceItemPtr->locale, *nextOffsetPtr, offset );
1236                             nextOffsetPtr = limitOffsetPtr;
1237                             break;
1238                         }
1239                    } else {
1240                         log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
1241                     }
1242                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1243                 if ( nextOffsetPtr > limitOffsetPtr ) {
1244                     log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
1245                 }
1246 
1247                 ucol_closeElements(uce);
1248             } else {
1249                 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1250             }
1251             ucol_close(ucol);
1252         } else {
1253             log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1254         }
1255     }
1256 }
1257 
1258 #endif /* #if !UCONFIG_NO_COLLATION */
1259