• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File CITERTST.C
11 *
12 * Modification History:
13 * Date      Name               Description
14 *           Madhu Katragadda   Ported for C API
15 * 02/19/01  synwee             Modified test case for new collation iterator
16 *********************************************************************************/
17 /*
18  * Collation Iterator tests.
19  * (Let me reiterate my position...)
20  */
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_COLLATION
25 
26 #include "unicode/ucol.h"
27 #include "unicode/ucoleitr.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utf16.h"
32 #include "unicode/putil.h"
33 #include "callcoll.h"
34 #include "cmemory.h"
35 #include "cintltst.h"
36 #include "citertst.h"
37 #include "ccolltst.h"
38 #include "filestrm.h"
39 #include "cstring.h"
40 #include "ucol_imp.h"
41 #include "uparse.h"
42 #include <stdbool.h>
43 #include <stdio.h>
44 
45 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
46 
addCollIterTest(TestNode ** root)47 void addCollIterTest(TestNode** root)
48 {
49     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
50     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
51     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
52     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
53     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
54     addTest(root, &TestNormalizedUnicodeChar,
55                                 "tscoll/citertst/TestNormalizedUnicodeChar");
56     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
57     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
58     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
59     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
60     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
61     addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
62 }
63 
64 /* The locales we support */
65 
66 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
67 
TestBug672()68 static void TestBug672() {
69     UErrorCode  status = U_ZERO_ERROR;
70     UChar       pattern[20];
71     UChar       text[50];
72     int         i;
73     int         result[3][3];
74 
75     u_uastrcpy(pattern, "resume");
76     u_uastrcpy(text, "Time to resume updating my resume.");
77 
78     for (i = 0; i < 3; ++ i) {
79         UCollator          *coll = ucol_open(LOCALES[i], &status);
80         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
81                                                      &status);
82         UCollationElements *titer = ucol_openElements(coll, text, -1,
83                                                      &status);
84         if (U_FAILURE(status)) {
85             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
86                     myErrorName(status));
87             return;
88         }
89 
90         log_verbose("locale tested %s\n", LOCALES[i]);
91 
92         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
93                U_SUCCESS(status)) {
94         }
95         if (U_FAILURE(status)) {
96             log_err("ERROR: reversing collation iterator :%s\n",
97                     myErrorName(status));
98             return;
99         }
100         ucol_reset(pitr);
101 
102         ucol_setOffset(titer, u_strlen(pattern), &status);
103         if (U_FAILURE(status)) {
104             log_err("ERROR: setting offset in collator :%s\n",
105                     myErrorName(status));
106             return;
107         }
108         result[i][0] = ucol_getOffset(titer);
109         log_verbose("Text iterator set to offset %d\n", result[i][0]);
110 
111         /* Use previous() */
112         ucol_previous(titer, &status);
113         result[i][1] = ucol_getOffset(titer);
114         log_verbose("Current offset %d after previous\n", result[i][1]);
115 
116         /* Add one to index */
117         log_verbose("Adding one to current offset...\n");
118         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
119         if (U_FAILURE(status)) {
120             log_err("ERROR: setting offset in collator :%s\n",
121                     myErrorName(status));
122             return;
123         }
124         result[i][2] = ucol_getOffset(titer);
125         log_verbose("Current offset in text = %d\n", result[i][2]);
126         ucol_closeElements(pitr);
127         ucol_closeElements(titer);
128         ucol_close(coll);
129     }
130 
131     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
132         uprv_memcmp(result[1], result[2], 3) != 0) {
133         log_err("ERROR: Different locales have different offsets at the same character\n");
134     }
135 }
136 
137 
138 
139 /*  Running this test with normalization enabled showed up a bug in the incremental
140     normalization code. */
TestBug672Normalize()141 static void TestBug672Normalize() {
142     UErrorCode  status = U_ZERO_ERROR;
143     UChar       pattern[20];
144     UChar       text[50];
145     int         i;
146     int         result[3][3];
147 
148     u_uastrcpy(pattern, "resume");
149     u_uastrcpy(text, "Time to resume updating my resume.");
150 
151     for (i = 0; i < 3; ++ i) {
152         UCollator          *coll = ucol_open(LOCALES[i], &status);
153         UCollationElements *pitr = NULL;
154         UCollationElements *titer = NULL;
155 
156         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
157 
158         pitr = ucol_openElements(coll, pattern, -1, &status);
159         titer = ucol_openElements(coll, text, -1, &status);
160         if (U_FAILURE(status)) {
161             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
162                     myErrorName(status));
163             return;
164         }
165 
166         log_verbose("locale tested %s\n", LOCALES[i]);
167 
168         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
169                U_SUCCESS(status)) {
170         }
171         if (U_FAILURE(status)) {
172             log_err("ERROR: reversing collation iterator :%s\n",
173                     myErrorName(status));
174             return;
175         }
176         ucol_reset(pitr);
177 
178         ucol_setOffset(titer, u_strlen(pattern), &status);
179         if (U_FAILURE(status)) {
180             log_err("ERROR: setting offset in collator :%s\n",
181                     myErrorName(status));
182             return;
183         }
184         result[i][0] = ucol_getOffset(titer);
185         log_verbose("Text iterator set to offset %d\n", result[i][0]);
186 
187         /* Use previous() */
188         ucol_previous(titer, &status);
189         result[i][1] = ucol_getOffset(titer);
190         log_verbose("Current offset %d after previous\n", result[i][1]);
191 
192         /* Add one to index */
193         log_verbose("Adding one to current offset...\n");
194         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
195         if (U_FAILURE(status)) {
196             log_err("ERROR: setting offset in collator :%s\n",
197                     myErrorName(status));
198             return;
199         }
200         result[i][2] = ucol_getOffset(titer);
201         log_verbose("Current offset in text = %d\n", result[i][2]);
202         ucol_closeElements(pitr);
203         ucol_closeElements(titer);
204         ucol_close(coll);
205     }
206 
207     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
208         uprv_memcmp(result[1], result[2], 3) != 0) {
209         log_err("ERROR: Different locales have different offsets at the same character\n");
210     }
211 }
212 
213 
214 
215 
216 /**
217  * Test for CollationElementIterator previous and next for the whole set of
218  * unicode characters.
219  */
TestUnicodeChar()220 static void TestUnicodeChar()
221 {
222     UChar source[0x100];
223     UCollator *en_us;
224     UCollationElements *iter;
225     UErrorCode status = U_ZERO_ERROR;
226     UChar codepoint;
227 
228     UChar *test;
229     en_us = ucol_open("en_US", &status);
230     if (U_FAILURE(status)){
231        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
232               myErrorName(status));
233        return;
234     }
235 
236     for (codepoint = 1; codepoint < 0xFFFE;)
237     {
238       test = source;
239 
240       while (codepoint % 0xFF != 0)
241       {
242         if (u_isdefined(codepoint))
243           *(test ++) = codepoint;
244         codepoint ++;
245       }
246 
247       if (u_isdefined(codepoint))
248         *(test ++) = codepoint;
249 
250       if (codepoint != 0xFFFF)
251         codepoint ++;
252 
253       *test = 0;
254       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
255       if(U_FAILURE(status)){
256           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
257               myErrorName(status));
258           ucol_close(en_us);
259           return;
260       }
261       /* A basic test to see if it's working at all */
262       log_verbose("codepoint testing %x\n", codepoint);
263       backAndForth(iter);
264       ucol_closeElements(iter);
265 
266       /* null termination test */
267       iter=ucol_openElements(en_us, source, -1, &status);
268       if(U_FAILURE(status)){
269           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
270               myErrorName(status));
271           ucol_close(en_us);
272           return;
273       }
274       /* A basic test to see if it's working at all */
275       backAndForth(iter);
276       ucol_closeElements(iter);
277     }
278 
279     ucol_close(en_us);
280 }
281 
282 /**
283  * Test for CollationElementIterator previous and next for the whole set of
284  * unicode characters with normalization on.
285  */
TestNormalizedUnicodeChar()286 static void TestNormalizedUnicodeChar()
287 {
288     UChar source[0x100];
289     UCollator *th_th;
290     UCollationElements *iter;
291     UErrorCode status = U_ZERO_ERROR;
292     UChar codepoint;
293 
294     UChar *test;
295     /* thai should have normalization on */
296     th_th = ucol_open("th_TH", &status);
297     if (U_FAILURE(status)){
298         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
299               myErrorName(status));
300         return;
301     }
302 
303     for (codepoint = 1; codepoint < 0xFFFE;)
304     {
305       test = source;
306 
307       while (codepoint % 0xFF != 0)
308       {
309         if (u_isdefined(codepoint))
310           *(test ++) = codepoint;
311         codepoint ++;
312       }
313 
314       if (u_isdefined(codepoint))
315         *(test ++) = codepoint;
316 
317       if (codepoint != 0xFFFF)
318         codepoint ++;
319 
320       *test = 0;
321       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
322       if(U_FAILURE(status)){
323           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
324               myErrorName(status));
325             ucol_close(th_th);
326           return;
327       }
328 
329       backAndForth(iter);
330       ucol_closeElements(iter);
331 
332       iter=ucol_openElements(th_th, source, -1, &status);
333       if(U_FAILURE(status)){
334           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
335               myErrorName(status));
336             ucol_close(th_th);
337           return;
338       }
339 
340       backAndForth(iter);
341       ucol_closeElements(iter);
342     }
343 
344     ucol_close(th_th);
345 }
346 
347 /**
348 * Test the incremental normalization
349 */
TestNormalization()350 static void TestNormalization()
351 {
352           UErrorCode          status = U_ZERO_ERROR;
353     const char               *str    =
354                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
355           UCollator          *coll;
356           UChar               rule[50];
357           int                 rulelen = u_unescape(str, rule, 50);
358           int                 count = 0;
359     const char                *testdata[] =
360                         {"\\u1ED9", "o\\u0323\\u0302",
361                         "\\u0300\\u0315", "\\u0315\\u0300",
362                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
363                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
364                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
365                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
366                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
367     int32_t   srclen;
368     UChar source[10];
369     UCollationElements *iter;
370 
371     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
372     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
373     if (U_FAILURE(status)){
374         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
375               myErrorName(status));
376         return;
377     }
378 
379     srclen = u_unescape(testdata[0], source, 10);
380     iter = ucol_openElements(coll, source, srclen, &status);
381     backAndForth(iter);
382     ucol_closeElements(iter);
383 
384     srclen = u_unescape(testdata[1], source, 10);
385     iter = ucol_openElements(coll, source, srclen, &status);
386     backAndForth(iter);
387     ucol_closeElements(iter);
388 
389     while (count < 12) {
390         srclen = u_unescape(testdata[count], source, 10);
391         iter = ucol_openElements(coll, source, srclen, &status);
392 
393         if (U_FAILURE(status)){
394             log_err("ERROR: in creation of collator element iterator\n %s\n",
395                   myErrorName(status));
396             return;
397         }
398         backAndForth(iter);
399         ucol_closeElements(iter);
400 
401         iter = ucol_openElements(coll, source, -1, &status);
402 
403         if (U_FAILURE(status)){
404             log_err("ERROR: in creation of collator element iterator\n %s\n",
405                   myErrorName(status));
406             return;
407         }
408         backAndForth(iter);
409         ucol_closeElements(iter);
410         count ++;
411     }
412     ucol_close(coll);
413 }
414 
415 /**
416  * Test for CollationElementIterator.previous()
417  *
418  * @bug 4108758 - Make sure it works with contracting characters
419  *
420  */
TestPrevious()421 static void TestPrevious()
422 {
423     UCollator *coll=NULL;
424     UChar rule[50];
425     UChar *source;
426     UCollator *c1, *c2, *c3;
427     UCollationElements *iter;
428     UErrorCode status = U_ZERO_ERROR;
429     UChar test1[50];
430     UChar test2[50];
431 
432     u_uastrcpy(test1, "What subset of all possible test cases?");
433     u_uastrcpy(test2, "has the highest probability of detecting");
434     coll = ucol_open("en_US", &status);
435 
436     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
437     log_verbose("English locale testing back and forth\n");
438     if(U_FAILURE(status)){
439         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
440             myErrorName(status));
441         ucol_close(coll);
442         return;
443     }
444     /* A basic test to see if it's working at all */
445     backAndForth(iter);
446     ucol_closeElements(iter);
447     ucol_close(coll);
448 
449     /* Test with a contracting character sequence */
450     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
451     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
452 
453     log_verbose("Contraction rule testing back and forth with no normalization\n");
454 
455     if (c1 == NULL || U_FAILURE(status))
456     {
457         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
458             myErrorName(status));
459         return;
460     }
461     source=(UChar*)malloc(sizeof(UChar) * 20);
462     u_uastrcpy(source, "abchdcba");
463     iter=ucol_openElements(c1, source, u_strlen(source), &status);
464     if(U_FAILURE(status)){
465         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
466             myErrorName(status));
467         return;
468     }
469     backAndForth(iter);
470     ucol_closeElements(iter);
471     ucol_close(c1);
472 
473     /* Test with an expanding character sequence */
474     u_uastrcpy(rule, "&a < b < c/abd < d");
475     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
476     log_verbose("Expansion rule testing back and forth with no normalization\n");
477     if (c2 == NULL || U_FAILURE(status))
478     {
479         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
480             myErrorName(status));
481         return;
482     }
483     u_uastrcpy(source, "abcd");
484     iter=ucol_openElements(c2, source, u_strlen(source), &status);
485     if(U_FAILURE(status)){
486         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
487             myErrorName(status));
488         return;
489     }
490     backAndForth(iter);
491     ucol_closeElements(iter);
492     ucol_close(c2);
493     /* Now try both */
494     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
495     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
496     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
497 
498     if (c3 == NULL || U_FAILURE(status))
499     {
500         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
501             myErrorName(status));
502         return;
503     }
504     u_uastrcpy(source, "abcdbchdc");
505     iter=ucol_openElements(c3, source, u_strlen(source), &status);
506     if(U_FAILURE(status)){
507         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
508             myErrorName(status));
509         return;
510     }
511     backAndForth(iter);
512     ucol_closeElements(iter);
513     ucol_close(c3);
514     source[0] = 0x0e41;
515     source[1] = 0x0e02;
516     source[2] = 0x0e41;
517     source[3] = 0x0e02;
518     source[4] = 0x0e27;
519     source[5] = 0x61;
520     source[6] = 0x62;
521     source[7] = 0x63;
522     source[8] = 0;
523 
524     coll = ucol_open("th_TH", &status);
525     log_verbose("Thai locale testing back and forth with normalization\n");
526     iter=ucol_openElements(coll, source, u_strlen(source), &status);
527     if(U_FAILURE(status)){
528         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
529             myErrorName(status));
530         return;
531     }
532     backAndForth(iter);
533     ucol_closeElements(iter);
534     ucol_close(coll);
535 
536     /* prev test */
537     source[0] = 0x0061;
538     source[1] = 0x30CF;
539     source[2] = 0x3099;
540     source[3] = 0x30FC;
541     source[4] = 0;
542 
543     coll = ucol_open("ja_JP", &status);
544     log_verbose("Japanese locale testing back and forth with normalization\n");
545     iter=ucol_openElements(coll, source, u_strlen(source), &status);
546     if(U_FAILURE(status)){
547         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
548             myErrorName(status));
549         return;
550     }
551     backAndForth(iter);
552     ucol_closeElements(iter);
553     ucol_close(coll);
554 
555     free(source);
556 }
557 
558 /**
559  * Test for getOffset() and setOffset()
560  */
TestOffset()561 static void TestOffset()
562 {
563     UErrorCode status= U_ZERO_ERROR;
564     UCollator *en_us=NULL;
565     UCollationElements *iter, *pristine;
566     int32_t offset;
567     OrderAndOffset *orders;
568     int32_t orderLength=0;
569     int     count = 0;
570     UChar test1[50];
571     UChar test2[50];
572 
573     u_uastrcpy(test1, "What subset of all possible test cases?");
574     u_uastrcpy(test2, "has the highest probability of detecting");
575     en_us = ucol_open("en_US", &status);
576     log_verbose("Testing getOffset and setOffset for collations\n");
577     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
578     if(U_FAILURE(status)){
579         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
580             myErrorName(status));
581         ucol_close(en_us);
582         return;
583     }
584 
585     /* testing boundaries */
586     ucol_setOffset(iter, 0, &status);
587     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
588         log_err("Error: After setting offset to 0, we should be at the end "
589                 "of the backwards iteration");
590     }
591     ucol_setOffset(iter, u_strlen(test1), &status);
592     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
593         log_err("Error: After setting offset to end of the string, we should "
594                 "be at the end of the backwards iteration");
595     }
596 
597     /* Run all the way through the iterator, then get the offset */
598 
599     orders = getOrders(iter, &orderLength);
600 
601     offset = ucol_getOffset(iter);
602 
603     if (offset != u_strlen(test1))
604     {
605         log_err("offset at end != length %d vs %d\n", offset,
606             u_strlen(test1) );
607     }
608 
609     /* Now set the offset back to the beginning and see if it works */
610     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
611     if(U_FAILURE(status)){
612         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
613             myErrorName(status));
614     ucol_close(en_us);
615         return;
616     }
617     status = U_ZERO_ERROR;
618 
619     ucol_setOffset(iter, 0, &status);
620     if (U_FAILURE(status))
621     {
622         log_err("setOffset failed. %s\n",    myErrorName(status));
623     }
624     else
625     {
626         assertEqual(iter, pristine);
627     }
628 
629     ucol_closeElements(pristine);
630     ucol_closeElements(iter);
631     free(orders);
632 
633     /* testing offsets in normalization buffer */
634     test1[0] = 0x61;
635     test1[1] = 0x300;
636     test1[2] = 0x316;
637     test1[3] = 0x62;
638     test1[4] = 0;
639     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
640     iter = ucol_openElements(en_us, test1, 4, &status);
641     if(U_FAILURE(status)){
642         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
643             myErrorName(status));
644         ucol_close(en_us);
645         return;
646     }
647 
648     count = 0;
649     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
650         U_SUCCESS(status)) {
651         switch (count) {
652         case 0:
653             if (ucol_getOffset(iter) != 1) {
654                 log_err("ERROR: Offset of iteration should be 1\n");
655             }
656             break;
657         case 3:
658             if (ucol_getOffset(iter) != 4) {
659                 log_err("ERROR: Offset of iteration should be 4\n");
660             }
661             break;
662         default:
663             if (ucol_getOffset(iter) != 3) {
664                 log_err("ERROR: Offset of iteration should be 3\n");
665             }
666         }
667         count ++;
668     }
669 
670     ucol_reset(iter);
671     count = 0;
672     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
673         U_SUCCESS(status)) {
674         switch (count) {
675         case 0:
676         case 1:
677             if (ucol_getOffset(iter) != 3) {
678                 log_err("ERROR: Offset of iteration should be 3\n");
679             }
680             break;
681         case 2:
682             if (ucol_getOffset(iter) != 1) {
683                 log_err("ERROR: Offset of iteration should be 1\n");
684             }
685             break;
686         default:
687             if (ucol_getOffset(iter) != 0) {
688                 log_err("ERROR: Offset of iteration should be 0\n");
689             }
690         }
691         count ++;
692     }
693 
694     if(U_FAILURE(status)){
695         log_err("ERROR: in iterating collation elements %s\n",
696             myErrorName(status));
697     }
698 
699     ucol_closeElements(iter);
700     ucol_close(en_us);
701 }
702 
703 /**
704  * Test for setText()
705  */
TestSetText()706 static void TestSetText()
707 {
708     int32_t c,i;
709     UErrorCode status = U_ZERO_ERROR;
710     UCollator *en_us=NULL;
711     UCollationElements *iter1, *iter2;
712     UChar test1[50];
713     UChar test2[50];
714 
715     u_uastrcpy(test1, "What subset of all possible test cases?");
716     u_uastrcpy(test2, "has the highest probability of detecting");
717     en_us = ucol_open("en_US", &status);
718     log_verbose("testing setText for Collation elements\n");
719     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
720     if(U_FAILURE(status)){
721         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
722             myErrorName(status));
723     ucol_close(en_us);
724         return;
725     }
726     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
727     if(U_FAILURE(status)){
728         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
729             myErrorName(status));
730     ucol_close(en_us);
731         return;
732     }
733 
734     /* Run through the second iterator just to exercise it */
735     c = ucol_next(iter2, &status);
736     i = 0;
737 
738     while ( ++i < 10 && (c != UCOL_NULLORDER))
739     {
740         if (U_FAILURE(status))
741         {
742             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
743             ucol_closeElements(iter2);
744             ucol_closeElements(iter1);
745     ucol_close(en_us);
746             return;
747         }
748 
749         c = ucol_next(iter2, &status);
750     }
751 
752     /* Now set it to point to the same string as the first iterator */
753     ucol_setText(iter2, test1, u_strlen(test1), &status);
754     if (U_FAILURE(status))
755     {
756         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
757     }
758     else
759     {
760         assertEqual(iter1, iter2);
761     }
762 
763     /* Now set it to point to a null string with fake length*/
764     ucol_setText(iter2, NULL, 2, &status);
765     if (status != U_ILLEGAL_ARGUMENT_ERROR)
766     {
767         log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
768                 myErrorName(status));
769     }
770 
771     ucol_closeElements(iter2);
772     ucol_closeElements(iter1);
773     ucol_close(en_us);
774 }
775 
776 /** @bug 4108762
777  * Test for getMaxExpansion()
778  */
TestMaxExpansion()779 static void TestMaxExpansion()
780 {
781     UErrorCode          status = U_ZERO_ERROR;
782     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
783     UChar               ch     = 0;
784     UChar32             unassigned = 0xEFFFD;
785     UChar               supplementary[2];
786     uint32_t            stringOffset = 0;
787     UBool               isError = false;
788     uint32_t            sorder = 0;
789     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
790     uint32_t            temporder = 0;
791 
792     UChar rule[256];
793     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
794     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
795         UCOL_DEFAULT_STRENGTH,NULL, &status);
796     if(U_SUCCESS(status) && coll) {
797       iter = ucol_openElements(coll, &ch, 1, &status);
798 
799       while (ch < 0xFFFF && U_SUCCESS(status)) {
800           int      count = 1;
801           uint32_t order;
802           int32_t  size = 0;
803 
804           ch ++;
805 
806           ucol_setText(iter, &ch, 1, &status);
807           order = ucol_previous(iter, &status);
808 
809           /* thai management */
810           if (order == 0)
811               order = ucol_previous(iter, &status);
812 
813           while (U_SUCCESS(status) &&
814               ucol_previous(iter, &status) != UCOL_NULLORDER) {
815               count ++;
816           }
817 
818           size = ucol_getMaxExpansion(iter, order);
819           if (U_FAILURE(status) || size < count) {
820               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
821                   ch, count);
822           }
823       }
824 
825       /* testing for exact max expansion */
826       ch = 0;
827       while (ch < 0x61) {
828           uint32_t order;
829           int32_t  size;
830           ucol_setText(iter, &ch, 1, &status);
831           order = ucol_previous(iter, &status);
832           size  = ucol_getMaxExpansion(iter, order);
833           if (U_FAILURE(status) || size != 1) {
834               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
835                   ch, 1);
836           }
837           ch ++;
838       }
839 
840       ch = 0x63;
841       ucol_setText(iter, &ch, 1, &status);
842       temporder = ucol_previous(iter, &status);
843 
844       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
845           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
846                   ch, 3);
847       }
848 
849       ch = 0x64;
850       ucol_setText(iter, &ch, 1, &status);
851       temporder = ucol_previous(iter, &status);
852 
853       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
854           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
855                   ch, 3);
856       }
857 
858       U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
859       (void)isError;    /* Suppress set but not used warning. */
860       ucol_setText(iter, supplementary, 2, &status);
861       sorder = ucol_previous(iter, &status);
862 
863       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
864           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
865                   ch, 2);
866       }
867 
868       /* testing jamo */
869       ch = 0x1165;
870 
871       ucol_setText(iter, &ch, 1, &status);
872       temporder = ucol_previous(iter, &status);
873       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
874           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
875                   ch, 3);
876       }
877 
878       ucol_closeElements(iter);
879       ucol_close(coll);
880 
881       /* testing special jamo &a<\u1160 */
882       rule[0] = 0x26;
883       rule[1] = 0x71;
884       rule[2] = 0x3c;
885       rule[3] = 0x1165;
886       rule[4] = 0x2f;
887       rule[5] = 0x71;
888       rule[6] = 0x71;
889       rule[7] = 0x71;
890       rule[8] = 0x71;
891       rule[9] = 0;
892 
893       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
894           UCOL_DEFAULT_STRENGTH,NULL, &status);
895       iter = ucol_openElements(coll, &ch, 1, &status);
896 
897       temporder = ucol_previous(iter, &status);
898       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
899           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
900                   ch, 5);
901       }
902 
903       ucol_closeElements(iter);
904       ucol_close(coll);
905     } else {
906       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
907     }
908 
909 }
910 
911 
assertEqual(UCollationElements * i1,UCollationElements * i2)912 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
913 {
914     int32_t c1, c2;
915     int32_t count = 0;
916     UErrorCode status = U_ZERO_ERROR;
917 
918     do
919     {
920         c1 = ucol_next(i1, &status);
921         c2 = ucol_next(i2, &status);
922 
923         if (c1 != c2)
924         {
925             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
926             break;
927         }
928 
929         count += 1;
930     }
931     while (c1 != UCOL_NULLORDER);
932 }
933 
934 /**
935  * Testing iterators with extremely small buffers
936  */
TestSmallBuffer()937 static void TestSmallBuffer()
938 {
939     UErrorCode          status = U_ZERO_ERROR;
940     UCollator          *coll;
941     UCollationElements *testiter,
942                        *iter;
943     int32_t             count = 0;
944     OrderAndOffset     *testorders,
945                        *orders;
946 
947     UChar teststr[500];
948     UChar str[] = {0x300, 0x31A, 0};
949     /*
950     creating a long string of decomposable characters,
951     since by default the writable buffer is of size 256
952     */
953     while (count < 500) {
954         if ((count & 1) == 0) {
955             teststr[count ++] = 0x300;
956         }
957         else {
958             teststr[count ++] = 0x31A;
959         }
960     }
961 
962     coll = ucol_open("th_TH", &status);
963     if(U_SUCCESS(status) && coll) {
964       testiter = ucol_openElements(coll, teststr, 500, &status);
965       iter = ucol_openElements(coll, str, 2, &status);
966 
967       orders     = getOrders(iter, &count);
968       if (count != 2) {
969           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
970       }
971 
972       /*
973       this will rearrange the string data to 250 characters of 0x300 first then
974       250 characters of 0x031A
975       */
976       testorders = getOrders(testiter, &count);
977 
978       if (count != 500) {
979           log_err("Error decomposition does not give the right sized collation elements\n");
980       }
981 
982       while (count != 0) {
983           /* UCA collation element for 0x0F76 */
984           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
985               (count <= 250 && testorders[-- count].order != orders[0].order)) {
986               log_err("Error decomposition does not give the right collation element at %d count\n", count);
987               break;
988           }
989       }
990 
991       free(testorders);
992       free(orders);
993 
994       ucol_reset(testiter);
995 
996       /* ensures closing of elements done properly to clear writable buffer */
997       ucol_next(testiter, &status);
998       ucol_next(testiter, &status);
999       ucol_closeElements(testiter);
1000       ucol_closeElements(iter);
1001       ucol_close(coll);
1002     } else {
1003       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1004     }
1005 }
1006 
1007 /**
1008 * Testing the discontiguous contractions
1009 */
TestDiscontiguos()1010 static void TestDiscontiguos() {
1011     const char               *rulestr    =
1012                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1013           UChar               rule[50];
1014           int                 rulelen = u_unescape(rulestr, rule, 50);
1015     const char               *src[] = {
1016      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1017     /* base character blocked */
1018      "XD\\u0300", "XD\\u0300\\u0315",
1019     /* non blocking combining character */
1020      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1021      /* blocking combining character */
1022      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1023      /* contraction prefix */
1024      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1025      "X\\u0300\\u031A\\u0315",
1026      /* ends not with a contraction character */
1027      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1028      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1029     };
1030     const char               *tgt[] = {
1031      /* non blocking combining character */
1032      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1033     /* base character blocked */
1034      "X D \\u0300", "X D \\u0300\\u0315",
1035     /* non blocking combining character */
1036      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1037      /* blocking combining character */
1038      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1039      /* contraction prefix */
1040      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1041      "X\\u0300 \\u031A \\u0315",
1042      /* ends not with a contraction character */
1043      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1044      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1045     };
1046           int                 size   = 20;
1047           UCollator          *coll;
1048           UErrorCode          status    = U_ZERO_ERROR;
1049           int                 count     = 0;
1050           UCollationElements *iter;
1051           UCollationElements *resultiter;
1052 
1053     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1054     iter       = ucol_openElements(coll, rule, 1, &status);
1055     resultiter = ucol_openElements(coll, rule, 1, &status);
1056 
1057     if (U_FAILURE(status)) {
1058         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1059         return;
1060     }
1061 
1062     while (count < size) {
1063         UChar  str[20];
1064         UChar  tstr[20];
1065         int    strLen = u_unescape(src[count], str, 20);
1066         UChar *s;
1067 
1068         ucol_setText(iter, str, strLen, &status);
1069         if (U_FAILURE(status)) {
1070             log_err("Error opening collation iterator\n");
1071             return;
1072         }
1073 
1074         u_unescape(tgt[count], tstr, 20);
1075         s = tstr;
1076 
1077         log_verbose("count %d\n", count);
1078 
1079         for (;;) {
1080             int32_t  ce;
1081             UChar    *e = u_strchr(s, 0x20);
1082             if (e == 0) {
1083                 e = u_strchr(s, 0);
1084             }
1085             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1086             ce = ucol_next(resultiter, &status);
1087             if (U_FAILURE(status)) {
1088                 log_err("Error manipulating collation iterator\n");
1089                 return;
1090             }
1091             while (ce != UCOL_NULLORDER) {
1092                 if (ce != ucol_next(iter, &status) ||
1093                     U_FAILURE(status)) {
1094                     log_err("Discontiguous contraction test mismatch\n");
1095                     return;
1096                 }
1097                 ce = ucol_next(resultiter, &status);
1098                 if (U_FAILURE(status)) {
1099                     log_err("Error getting next collation element\n");
1100                     return;
1101                 }
1102             }
1103             s = e + 1;
1104             if (*e == 0) {
1105                 break;
1106             }
1107         }
1108         ucol_reset(iter);
1109         backAndForth(iter);
1110         count ++;
1111     }
1112     ucol_closeElements(resultiter);
1113     ucol_closeElements(iter);
1114     ucol_close(coll);
1115 }
1116 
1117 /**
1118 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1119 * normalization on AND jamo tailoring, among other things.
1120 *
1121 * Note: This test is sensitive to changes of the root collator,
1122 * for example whether the ae-ligature maps to three CEs (as in the DUCET)
1123 * or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1124 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1125 * For example, the DUCET's artificial secondary CE in the ae-ligature
1126 * may map to two 32-bit iterator CEs (as it did until ICU 52).
1127 */
1128 static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
1129     0x0020, 0xAC00,                 /* simple LV Hangul */
1130     0x0020, 0xAC01,                 /* simple LVT Hangul */
1131     0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
1132     0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
1133     0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1134     0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1135     0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1136     0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1137     0x0020, 0x00E6,                 /* small letter ae, expands */
1138     0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
1139     0x0020
1140 };
1141 enum { kLen_tsceText = UPRV_LENGTHOF(tsceText) };
1142 
1143 static const int32_t rootStandardOffsets[] = {
1144     0,  1,2,
1145     2,  3,4,4,
1146     4,  5,6,6,
1147     6,  7,8,8,
1148     8,  9,10,11,
1149     12, 13,14,15,
1150     16, 17,18,19,
1151     20, 21,22,23,
1152     24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1153     26, 27,28,28,
1154     28,
1155     29
1156 };
1157 enum { kLen_rootStandardOffsets = UPRV_LENGTHOF(rootStandardOffsets) };
1158 
1159 static const int32_t rootSearchOffsets[] = {
1160     0,  1,2,
1161     2,  3,4,4,
1162     4,  5,6,6,6,
1163     6,  7,8,8,8,8,8,8,
1164     8,  9,10,11,
1165     12, 13,14,15,
1166     16, 17,18,19,20,
1167     20, 21,22,22,23,23,23,24,
1168     24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1169     26, 27,28,28,
1170     28,
1171     29
1172 };
1173 enum { kLen_rootSearchOffsets = UPRV_LENGTHOF(rootSearchOffsets) };
1174 
1175 typedef struct {
1176     const char *    locale;
1177     const int32_t * offsets;
1178     int32_t         offsetsLen;
1179 } TSCEItem;
1180 
1181 static const TSCEItem tsceItems[] = {
1182     { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
1183     { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
1184     { NULL,                    NULL,                0                        }
1185 };
1186 
TestSearchCollatorElements(void)1187 static void TestSearchCollatorElements(void)
1188 {
1189     const TSCEItem * tsceItemPtr;
1190     for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
1191         UErrorCode status = U_ZERO_ERROR;
1192         UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
1193         if ( U_SUCCESS(status) ) {
1194             UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
1195             if ( U_SUCCESS(status) ) {
1196                 int32_t offset, element;
1197                 const int32_t * nextOffsetPtr;
1198                 const int32_t * limitOffsetPtr;
1199 
1200                 nextOffsetPtr = tsceItemPtr->offsets;
1201                 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1202                 do {
1203                     offset = ucol_getOffset(uce);
1204                     element = ucol_next(uce, &status);
1205                     log_verbose("(%s) offset=%2d  ce=%08x\n", tsceItemPtr->locale, offset, element);
1206                     if ( element == 0 ) {
1207                         log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
1208                     }
1209                     if ( nextOffsetPtr < limitOffsetPtr ) {
1210                         if (offset != *nextOffsetPtr) {
1211                             log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1212                                                             tsceItemPtr->locale, *nextOffsetPtr, offset );
1213                             nextOffsetPtr = limitOffsetPtr;
1214                             break;
1215                         }
1216                         nextOffsetPtr++;
1217                     } else {
1218                         log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
1219                     }
1220                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1221                 if ( nextOffsetPtr < limitOffsetPtr ) {
1222                     log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
1223                 }
1224 
1225                 ucol_setOffset(uce, kLen_tsceText, &status);
1226                 status = U_ZERO_ERROR;
1227                 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1228                 limitOffsetPtr = tsceItemPtr->offsets;
1229                 do {
1230                     offset = ucol_getOffset(uce);
1231                     element = ucol_previous(uce, &status);
1232                     if ( element == 0 ) {
1233                         log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
1234                     }
1235                     if ( nextOffsetPtr > limitOffsetPtr ) {
1236                         nextOffsetPtr--;
1237                         if (offset != *nextOffsetPtr) {
1238                             log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1239                                                                 tsceItemPtr->locale, *nextOffsetPtr, offset );
1240                             nextOffsetPtr = limitOffsetPtr;
1241                             break;
1242                         }
1243                    } else {
1244                         log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
1245                     }
1246                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1247                 if ( nextOffsetPtr > limitOffsetPtr ) {
1248                     log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
1249                 }
1250 
1251                 ucol_closeElements(uce);
1252             } else {
1253                 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1254             }
1255             ucol_close(ucol);
1256         } else {
1257             log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1258         }
1259     }
1260 }
1261 
1262 #endif /* #if !UCONFIG_NO_COLLATION */
1263