• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2011, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /********************************************************************************
7 *
8 * File CITERTST.C
9 *
10 * Modification History:
11 * Date      Name               Description
12 *           Madhu Katragadda   Ported for C API
13 * 02/19/01  synwee             Modified test case for new collation iterator
14 *********************************************************************************/
15 /*
16  * Collation Iterator tests.
17  * (Let me reiterate my position...)
18  */
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_COLLATION
23 
24 #include "unicode/ucol.h"
25 #include "unicode/ucoleitr.h"
26 #include "unicode/uloc.h"
27 #include "unicode/uchar.h"
28 #include "unicode/ustring.h"
29 #include "unicode/putil.h"
30 #include "callcoll.h"
31 #include "cmemory.h"
32 #include "cintltst.h"
33 #include "citertst.h"
34 #include "ccolltst.h"
35 #include "filestrm.h"
36 #include "cstring.h"
37 #include "ucol_imp.h"
38 #include "ucol_tok.h"
39 #include "uparse.h"
40 #include <stdio.h>
41 
42 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
43 
addCollIterTest(TestNode ** root)44 void addCollIterTest(TestNode** root)
45 {
46     addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
47     addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
48     addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
49     addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
50     addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
51     addTest(root, &TestNormalizedUnicodeChar,
52                                 "tscoll/citertst/TestNormalizedUnicodeChar");
53     addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
54     addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
55     addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
56     addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
57     addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
58     addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
59     addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
60     addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
61     addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
62     addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
63 }
64 
65 /* The locales we support */
66 
67 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
68 
TestBug672()69 static void TestBug672() {
70     UErrorCode  status = U_ZERO_ERROR;
71     UChar       pattern[20];
72     UChar       text[50];
73     int         i;
74     int         result[3][3];
75 
76     u_uastrcpy(pattern, "resume");
77     u_uastrcpy(text, "Time to resume updating my resume.");
78 
79     for (i = 0; i < 3; ++ i) {
80         UCollator          *coll = ucol_open(LOCALES[i], &status);
81         UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
82                                                      &status);
83         UCollationElements *titer = ucol_openElements(coll, text, -1,
84                                                      &status);
85         if (U_FAILURE(status)) {
86             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
87                     myErrorName(status));
88             return;
89         }
90 
91         log_verbose("locale tested %s\n", LOCALES[i]);
92 
93         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
94                U_SUCCESS(status)) {
95         }
96         if (U_FAILURE(status)) {
97             log_err("ERROR: reversing collation iterator :%s\n",
98                     myErrorName(status));
99             return;
100         }
101         ucol_reset(pitr);
102 
103         ucol_setOffset(titer, u_strlen(pattern), &status);
104         if (U_FAILURE(status)) {
105             log_err("ERROR: setting offset in collator :%s\n",
106                     myErrorName(status));
107             return;
108         }
109         result[i][0] = ucol_getOffset(titer);
110         log_verbose("Text iterator set to offset %d\n", result[i][0]);
111 
112         /* Use previous() */
113         ucol_previous(titer, &status);
114         result[i][1] = ucol_getOffset(titer);
115         log_verbose("Current offset %d after previous\n", result[i][1]);
116 
117         /* Add one to index */
118         log_verbose("Adding one to current offset...\n");
119         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
120         if (U_FAILURE(status)) {
121             log_err("ERROR: setting offset in collator :%s\n",
122                     myErrorName(status));
123             return;
124         }
125         result[i][2] = ucol_getOffset(titer);
126         log_verbose("Current offset in text = %d\n", result[i][2]);
127         ucol_closeElements(pitr);
128         ucol_closeElements(titer);
129         ucol_close(coll);
130     }
131 
132     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
133         uprv_memcmp(result[1], result[2], 3) != 0) {
134         log_err("ERROR: Different locales have different offsets at the same character\n");
135     }
136 }
137 
138 
139 
140 /*  Running this test with normalization enabled showed up a bug in the incremental
141     normalization code. */
TestBug672Normalize()142 static void TestBug672Normalize() {
143     UErrorCode  status = U_ZERO_ERROR;
144     UChar       pattern[20];
145     UChar       text[50];
146     int         i;
147     int         result[3][3];
148 
149     u_uastrcpy(pattern, "resume");
150     u_uastrcpy(text, "Time to resume updating my resume.");
151 
152     for (i = 0; i < 3; ++ i) {
153         UCollator          *coll = ucol_open(LOCALES[i], &status);
154         UCollationElements *pitr = NULL;
155         UCollationElements *titer = NULL;
156 
157         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
158 
159         pitr = ucol_openElements(coll, pattern, -1, &status);
160         titer = ucol_openElements(coll, text, -1, &status);
161         if (U_FAILURE(status)) {
162             log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
163                     myErrorName(status));
164             return;
165         }
166 
167         log_verbose("locale tested %s\n", LOCALES[i]);
168 
169         while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
170                U_SUCCESS(status)) {
171         }
172         if (U_FAILURE(status)) {
173             log_err("ERROR: reversing collation iterator :%s\n",
174                     myErrorName(status));
175             return;
176         }
177         ucol_reset(pitr);
178 
179         ucol_setOffset(titer, u_strlen(pattern), &status);
180         if (U_FAILURE(status)) {
181             log_err("ERROR: setting offset in collator :%s\n",
182                     myErrorName(status));
183             return;
184         }
185         result[i][0] = ucol_getOffset(titer);
186         log_verbose("Text iterator set to offset %d\n", result[i][0]);
187 
188         /* Use previous() */
189         ucol_previous(titer, &status);
190         result[i][1] = ucol_getOffset(titer);
191         log_verbose("Current offset %d after previous\n", result[i][1]);
192 
193         /* Add one to index */
194         log_verbose("Adding one to current offset...\n");
195         ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
196         if (U_FAILURE(status)) {
197             log_err("ERROR: setting offset in collator :%s\n",
198                     myErrorName(status));
199             return;
200         }
201         result[i][2] = ucol_getOffset(titer);
202         log_verbose("Current offset in text = %d\n", result[i][2]);
203         ucol_closeElements(pitr);
204         ucol_closeElements(titer);
205         ucol_close(coll);
206     }
207 
208     if (uprv_memcmp(result[0], result[1], 3) != 0 ||
209         uprv_memcmp(result[1], result[2], 3) != 0) {
210         log_err("ERROR: Different locales have different offsets at the same character\n");
211     }
212 }
213 
214 
215 
216 
217 /**
218  * Test for CollationElementIterator previous and next for the whole set of
219  * unicode characters.
220  */
TestUnicodeChar()221 static void TestUnicodeChar()
222 {
223     UChar source[0x100];
224     UCollator *en_us;
225     UCollationElements *iter;
226     UErrorCode status = U_ZERO_ERROR;
227     UChar codepoint;
228 
229     UChar *test;
230     en_us = ucol_open("en_US", &status);
231     if (U_FAILURE(status)){
232        log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
233               myErrorName(status));
234        return;
235     }
236 
237     for (codepoint = 1; codepoint < 0xFFFE;)
238     {
239       test = source;
240 
241       while (codepoint % 0xFF != 0)
242       {
243         if (u_isdefined(codepoint))
244           *(test ++) = codepoint;
245         codepoint ++;
246       }
247 
248       if (u_isdefined(codepoint))
249         *(test ++) = codepoint;
250 
251       if (codepoint != 0xFFFF)
252         codepoint ++;
253 
254       *test = 0;
255       iter=ucol_openElements(en_us, source, u_strlen(source), &status);
256       if(U_FAILURE(status)){
257           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
258               myErrorName(status));
259           ucol_close(en_us);
260           return;
261       }
262       /* A basic test to see if it's working at all */
263       log_verbose("codepoint testing %x\n", codepoint);
264       backAndForth(iter);
265       ucol_closeElements(iter);
266 
267       /* null termination test */
268       iter=ucol_openElements(en_us, source, -1, &status);
269       if(U_FAILURE(status)){
270           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
271               myErrorName(status));
272           ucol_close(en_us);
273           return;
274       }
275       /* A basic test to see if it's working at all */
276       backAndForth(iter);
277       ucol_closeElements(iter);
278     }
279 
280     ucol_close(en_us);
281 }
282 
283 /**
284  * Test for CollationElementIterator previous and next for the whole set of
285  * unicode characters with normalization on.
286  */
TestNormalizedUnicodeChar()287 static void TestNormalizedUnicodeChar()
288 {
289     UChar source[0x100];
290     UCollator *th_th;
291     UCollationElements *iter;
292     UErrorCode status = U_ZERO_ERROR;
293     UChar codepoint;
294 
295     UChar *test;
296     /* thai should have normalization on */
297     th_th = ucol_open("th_TH", &status);
298     if (U_FAILURE(status)){
299         log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
300               myErrorName(status));
301         return;
302     }
303 
304     for (codepoint = 1; codepoint < 0xFFFE;)
305     {
306       test = source;
307 
308       while (codepoint % 0xFF != 0)
309       {
310         if (u_isdefined(codepoint))
311           *(test ++) = codepoint;
312         codepoint ++;
313       }
314 
315       if (u_isdefined(codepoint))
316         *(test ++) = codepoint;
317 
318       if (codepoint != 0xFFFF)
319         codepoint ++;
320 
321       *test = 0;
322       iter=ucol_openElements(th_th, source, u_strlen(source), &status);
323       if(U_FAILURE(status)){
324           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
325               myErrorName(status));
326             ucol_close(th_th);
327           return;
328       }
329 
330       backAndForth(iter);
331       ucol_closeElements(iter);
332 
333       iter=ucol_openElements(th_th, source, -1, &status);
334       if(U_FAILURE(status)){
335           log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
336               myErrorName(status));
337             ucol_close(th_th);
338           return;
339       }
340 
341       backAndForth(iter);
342       ucol_closeElements(iter);
343     }
344 
345     ucol_close(th_th);
346 }
347 
348 /**
349 * Test the incremental normalization
350 */
TestNormalization()351 static void TestNormalization()
352 {
353           UErrorCode          status = U_ZERO_ERROR;
354     const char               *str    =
355                             "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
356           UCollator          *coll;
357           UChar               rule[50];
358           int                 rulelen = u_unescape(str, rule, 50);
359           int                 count = 0;
360     const char                *testdata[] =
361                         {"\\u1ED9", "o\\u0323\\u0302",
362                         "\\u0300\\u0315", "\\u0315\\u0300",
363                         "A\\u0300\\u0315B", "A\\u0315\\u0300B",
364                         "A\\u0316\\u0315B", "A\\u0315\\u0316B",
365                         "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
366                         "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
367                         "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
368     int32_t   srclen;
369     UChar source[10];
370     UCollationElements *iter;
371 
372     coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
373     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
374     if (U_FAILURE(status)){
375         log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
376               myErrorName(status));
377         return;
378     }
379 
380     srclen = u_unescape(testdata[0], source, 10);
381     iter = ucol_openElements(coll, source, srclen, &status);
382     backAndForth(iter);
383     ucol_closeElements(iter);
384 
385     srclen = u_unescape(testdata[1], source, 10);
386     iter = ucol_openElements(coll, source, srclen, &status);
387     backAndForth(iter);
388     ucol_closeElements(iter);
389 
390     while (count < 12) {
391         srclen = u_unescape(testdata[count], source, 10);
392         iter = ucol_openElements(coll, source, srclen, &status);
393 
394         if (U_FAILURE(status)){
395             log_err("ERROR: in creation of collator element iterator\n %s\n",
396                   myErrorName(status));
397             return;
398         }
399         backAndForth(iter);
400         ucol_closeElements(iter);
401 
402         iter = ucol_openElements(coll, source, -1, &status);
403 
404         if (U_FAILURE(status)){
405             log_err("ERROR: in creation of collator element iterator\n %s\n",
406                   myErrorName(status));
407             return;
408         }
409         backAndForth(iter);
410         ucol_closeElements(iter);
411         count ++;
412     }
413     ucol_close(coll);
414 }
415 
416 /**
417  * Test for CollationElementIterator.previous()
418  *
419  * @bug 4108758 - Make sure it works with contracting characters
420  *
421  */
TestPrevious()422 static void TestPrevious()
423 {
424     UCollator *coll=NULL;
425     UChar rule[50];
426     UChar *source;
427     UCollator *c1, *c2, *c3;
428     UCollationElements *iter;
429     UErrorCode status = U_ZERO_ERROR;
430     UChar test1[50];
431     UChar test2[50];
432 
433     u_uastrcpy(test1, "What subset of all possible test cases?");
434     u_uastrcpy(test2, "has the highest probability of detecting");
435     coll = ucol_open("en_US", &status);
436 
437     iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
438     log_verbose("English locale testing back and forth\n");
439     if(U_FAILURE(status)){
440         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
441             myErrorName(status));
442         ucol_close(coll);
443         return;
444     }
445     /* A basic test to see if it's working at all */
446     backAndForth(iter);
447     ucol_closeElements(iter);
448     ucol_close(coll);
449 
450     /* Test with a contracting character sequence */
451     u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
452     c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
453 
454     log_verbose("Contraction rule testing back and forth with no normalization\n");
455 
456     if (c1 == NULL || U_FAILURE(status))
457     {
458         log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
459             myErrorName(status));
460         return;
461     }
462     source=(UChar*)malloc(sizeof(UChar) * 20);
463     u_uastrcpy(source, "abchdcba");
464     iter=ucol_openElements(c1, source, u_strlen(source), &status);
465     if(U_FAILURE(status)){
466         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
467             myErrorName(status));
468         return;
469     }
470     backAndForth(iter);
471     ucol_closeElements(iter);
472     ucol_close(c1);
473 
474     /* Test with an expanding character sequence */
475     u_uastrcpy(rule, "&a < b < c/abd < d");
476     c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
477     log_verbose("Expansion rule testing back and forth with no normalization\n");
478     if (c2 == NULL || U_FAILURE(status))
479     {
480         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
481             myErrorName(status));
482         return;
483     }
484     u_uastrcpy(source, "abcd");
485     iter=ucol_openElements(c2, source, u_strlen(source), &status);
486     if(U_FAILURE(status)){
487         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
488             myErrorName(status));
489         return;
490     }
491     backAndForth(iter);
492     ucol_closeElements(iter);
493     ucol_close(c2);
494     /* Now try both */
495     u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
496     c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
497     log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
498 
499     if (c3 == NULL || U_FAILURE(status))
500     {
501         log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
502             myErrorName(status));
503         return;
504     }
505     u_uastrcpy(source, "abcdbchdc");
506     iter=ucol_openElements(c3, source, u_strlen(source), &status);
507     if(U_FAILURE(status)){
508         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
509             myErrorName(status));
510         return;
511     }
512     backAndForth(iter);
513     ucol_closeElements(iter);
514     ucol_close(c3);
515     source[0] = 0x0e41;
516     source[1] = 0x0e02;
517     source[2] = 0x0e41;
518     source[3] = 0x0e02;
519     source[4] = 0x0e27;
520     source[5] = 0x61;
521     source[6] = 0x62;
522     source[7] = 0x63;
523     source[8] = 0;
524 
525     coll = ucol_open("th_TH", &status);
526     log_verbose("Thai locale testing back and forth with normalization\n");
527     iter=ucol_openElements(coll, source, u_strlen(source), &status);
528     if(U_FAILURE(status)){
529         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
530             myErrorName(status));
531         return;
532     }
533     backAndForth(iter);
534     ucol_closeElements(iter);
535     ucol_close(coll);
536 
537     /* prev test */
538     source[0] = 0x0061;
539     source[1] = 0x30CF;
540     source[2] = 0x3099;
541     source[3] = 0x30FC;
542     source[4] = 0;
543 
544     coll = ucol_open("ja_JP", &status);
545     log_verbose("Japanese locale testing back and forth with normalization\n");
546     iter=ucol_openElements(coll, source, u_strlen(source), &status);
547     if(U_FAILURE(status)){
548         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
549             myErrorName(status));
550         return;
551     }
552     backAndForth(iter);
553     ucol_closeElements(iter);
554     ucol_close(coll);
555 
556     free(source);
557 }
558 
559 /**
560  * Test for getOffset() and setOffset()
561  */
TestOffset()562 static void TestOffset()
563 {
564     UErrorCode status= U_ZERO_ERROR;
565     UCollator *en_us=NULL;
566     UCollationElements *iter, *pristine;
567     int32_t offset;
568     OrderAndOffset *orders;
569     int32_t orderLength=0;
570     int     count = 0;
571     UChar test1[50];
572     UChar test2[50];
573 
574     u_uastrcpy(test1, "What subset of all possible test cases?");
575     u_uastrcpy(test2, "has the highest probability of detecting");
576     en_us = ucol_open("en_US", &status);
577     log_verbose("Testing getOffset and setOffset for collations\n");
578     iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
579     if(U_FAILURE(status)){
580         log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
581             myErrorName(status));
582         ucol_close(en_us);
583         return;
584     }
585 
586     /* testing boundaries */
587     ucol_setOffset(iter, 0, &status);
588     if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
589         log_err("Error: After setting offset to 0, we should be at the end "
590                 "of the backwards iteration");
591     }
592     ucol_setOffset(iter, u_strlen(test1), &status);
593     if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
594         log_err("Error: After setting offset to end of the string, we should "
595                 "be at the end of the backwards iteration");
596     }
597 
598     /* Run all the way through the iterator, then get the offset */
599 
600     orders = getOrders(iter, &orderLength);
601 
602     offset = ucol_getOffset(iter);
603 
604     if (offset != u_strlen(test1))
605     {
606         log_err("offset at end != length %d vs %d\n", offset,
607             u_strlen(test1) );
608     }
609 
610     /* Now set the offset back to the beginning and see if it works */
611     pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
612     if(U_FAILURE(status)){
613         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
614             myErrorName(status));
615     ucol_close(en_us);
616         return;
617     }
618     status = U_ZERO_ERROR;
619 
620     ucol_setOffset(iter, 0, &status);
621     if (U_FAILURE(status))
622     {
623         log_err("setOffset failed. %s\n",    myErrorName(status));
624     }
625     else
626     {
627         assertEqual(iter, pristine);
628     }
629 
630     ucol_closeElements(pristine);
631     ucol_closeElements(iter);
632     free(orders);
633 
634     /* testing offsets in normalization buffer */
635     test1[0] = 0x61;
636     test1[1] = 0x300;
637     test1[2] = 0x316;
638     test1[3] = 0x62;
639     test1[4] = 0;
640     ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
641     iter = ucol_openElements(en_us, test1, 4, &status);
642     if(U_FAILURE(status)){
643         log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
644             myErrorName(status));
645         ucol_close(en_us);
646         return;
647     }
648 
649     count = 0;
650     while (ucol_next(iter, &status) != UCOL_NULLORDER &&
651         U_SUCCESS(status)) {
652         switch (count) {
653         case 0:
654             if (ucol_getOffset(iter) != 1) {
655                 log_err("ERROR: Offset of iteration should be 1\n");
656             }
657             break;
658         case 3:
659             if (ucol_getOffset(iter) != 4) {
660                 log_err("ERROR: Offset of iteration should be 4\n");
661             }
662             break;
663         default:
664             if (ucol_getOffset(iter) != 3) {
665                 log_err("ERROR: Offset of iteration should be 3\n");
666             }
667         }
668         count ++;
669     }
670 
671     ucol_reset(iter);
672     count = 0;
673     while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
674         U_SUCCESS(status)) {
675         switch (count) {
676         case 0:
677         case 1:
678             if (ucol_getOffset(iter) != 3) {
679                 log_err("ERROR: Offset of iteration should be 3\n");
680             }
681             break;
682         case 2:
683             if (ucol_getOffset(iter) != 1) {
684                 log_err("ERROR: Offset of iteration should be 1\n");
685             }
686             break;
687         default:
688             if (ucol_getOffset(iter) != 0) {
689                 log_err("ERROR: Offset of iteration should be 0\n");
690             }
691         }
692         count ++;
693     }
694 
695     if(U_FAILURE(status)){
696         log_err("ERROR: in iterating collation elements %s\n",
697             myErrorName(status));
698     }
699 
700     ucol_closeElements(iter);
701     ucol_close(en_us);
702 }
703 
704 /**
705  * Test for setText()
706  */
TestSetText()707 static void TestSetText()
708 {
709     int32_t c,i;
710     UErrorCode status = U_ZERO_ERROR;
711     UCollator *en_us=NULL;
712     UCollationElements *iter1, *iter2;
713     UChar test1[50];
714     UChar test2[50];
715 
716     u_uastrcpy(test1, "What subset of all possible test cases?");
717     u_uastrcpy(test2, "has the highest probability of detecting");
718     en_us = ucol_open("en_US", &status);
719     log_verbose("testing setText for Collation elements\n");
720     iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
721     if(U_FAILURE(status)){
722         log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
723             myErrorName(status));
724     ucol_close(en_us);
725         return;
726     }
727     iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
728     if(U_FAILURE(status)){
729         log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
730             myErrorName(status));
731     ucol_close(en_us);
732         return;
733     }
734 
735     /* Run through the second iterator just to exercise it */
736     c = ucol_next(iter2, &status);
737     i = 0;
738 
739     while ( ++i < 10 && (c != UCOL_NULLORDER))
740     {
741         if (U_FAILURE(status))
742         {
743             log_err("iter2->next() returned an error. %s\n", myErrorName(status));
744             ucol_closeElements(iter2);
745             ucol_closeElements(iter1);
746     ucol_close(en_us);
747             return;
748         }
749 
750         c = ucol_next(iter2, &status);
751     }
752 
753     /* Now set it to point to the same string as the first iterator */
754     ucol_setText(iter2, test1, u_strlen(test1), &status);
755     if (U_FAILURE(status))
756     {
757         log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
758     }
759     else
760     {
761         assertEqual(iter1, iter2);
762     }
763 
764     /* Now set it to point to a null string with fake length*/
765     ucol_setText(iter2, NULL, 2, &status);
766     if (U_FAILURE(status))
767     {
768         log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
769     }
770     else
771     {
772         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
773             log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
774         }
775     }
776 
777     ucol_closeElements(iter2);
778     ucol_closeElements(iter1);
779     ucol_close(en_us);
780 }
781 
782 /** @bug 4108762
783  * Test for getMaxExpansion()
784  */
TestMaxExpansion()785 static void TestMaxExpansion()
786 {
787     UErrorCode          status = U_ZERO_ERROR;
788     UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
789     UChar               ch     = 0;
790     UChar32             unassigned = 0xEFFFD;
791     UChar               supplementary[2];
792     uint32_t            stringOffset = 0;
793     UBool               isError = FALSE;
794     uint32_t            sorder = 0;
795     UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
796     uint32_t            temporder = 0;
797 
798     UChar rule[256];
799     u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
800     coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
801         UCOL_DEFAULT_STRENGTH,NULL, &status);
802     if(U_SUCCESS(status) && coll) {
803       iter = ucol_openElements(coll, &ch, 1, &status);
804 
805       while (ch < 0xFFFF && U_SUCCESS(status)) {
806           int      count = 1;
807           uint32_t order;
808           int32_t  size = 0;
809 
810           ch ++;
811 
812           ucol_setText(iter, &ch, 1, &status);
813           order = ucol_previous(iter, &status);
814 
815           /* thai management */
816           if (order == 0)
817               order = ucol_previous(iter, &status);
818 
819           while (U_SUCCESS(status) &&
820               ucol_previous(iter, &status) != UCOL_NULLORDER) {
821               count ++;
822           }
823 
824           size = ucol_getMaxExpansion(iter, order);
825           if (U_FAILURE(status) || size < count) {
826               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
827                   ch, count);
828           }
829       }
830 
831       /* testing for exact max expansion */
832       ch = 0;
833       while (ch < 0x61) {
834           uint32_t order;
835           int32_t  size;
836           ucol_setText(iter, &ch, 1, &status);
837           order = ucol_previous(iter, &status);
838           size  = ucol_getMaxExpansion(iter, order);
839           if (U_FAILURE(status) || size != 1) {
840               log_err("Failure at codepoint %d, maximum expansion count < %d\n",
841                   ch, 1);
842           }
843           ch ++;
844       }
845 
846       ch = 0x63;
847       ucol_setText(iter, &ch, 1, &status);
848       temporder = ucol_previous(iter, &status);
849 
850       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
851           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
852                   ch, 3);
853       }
854 
855       ch = 0x64;
856       ucol_setText(iter, &ch, 1, &status);
857       temporder = ucol_previous(iter, &status);
858 
859       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
860           log_err("Failure at codepoint %d, maximum expansion count != %d\n",
861                   ch, 3);
862       }
863 
864       U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
865       ucol_setText(iter, supplementary, 2, &status);
866       sorder = ucol_previous(iter, &status);
867 
868       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
869           log_err("Failure at codepoint %d, maximum expansion count < %d\n",
870                   ch, 2);
871       }
872 
873       /* testing jamo */
874       ch = 0x1165;
875 
876       ucol_setText(iter, &ch, 1, &status);
877       temporder = ucol_previous(iter, &status);
878       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
879           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
880                   ch, 3);
881       }
882 
883       ucol_closeElements(iter);
884       ucol_close(coll);
885 
886       /* testing special jamo &a<\u1160 */
887       rule[0] = 0x26;
888       rule[1] = 0x71;
889       rule[2] = 0x3c;
890       rule[3] = 0x1165;
891       rule[4] = 0x2f;
892       rule[5] = 0x71;
893       rule[6] = 0x71;
894       rule[7] = 0x71;
895       rule[8] = 0x71;
896       rule[9] = 0;
897 
898       coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
899           UCOL_DEFAULT_STRENGTH,NULL, &status);
900       iter = ucol_openElements(coll, &ch, 1, &status);
901 
902       temporder = ucol_previous(iter, &status);
903       if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
904           log_err("Failure at codepoint %d, maximum expansion count > %d\n",
905                   ch, 5);
906       }
907 
908       ucol_closeElements(iter);
909       ucol_close(coll);
910     } else {
911       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
912     }
913 
914 }
915 
916 
assertEqual(UCollationElements * i1,UCollationElements * i2)917 static void assertEqual(UCollationElements *i1, UCollationElements *i2)
918 {
919     int32_t c1, c2;
920     int32_t count = 0;
921     UErrorCode status = U_ZERO_ERROR;
922 
923     do
924     {
925         c1 = ucol_next(i1, &status);
926         c2 = ucol_next(i2, &status);
927 
928         if (c1 != c2)
929         {
930             log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
931             break;
932         }
933 
934         count += 1;
935     }
936     while (c1 != UCOL_NULLORDER);
937 }
938 
939 /**
940  * Testing iterators with extremely small buffers
941  */
TestSmallBuffer()942 static void TestSmallBuffer()
943 {
944     UErrorCode          status = U_ZERO_ERROR;
945     UCollator          *coll;
946     UCollationElements *testiter,
947                        *iter;
948     int32_t             count = 0;
949     OrderAndOffset     *testorders,
950                        *orders;
951 
952     UChar teststr[500];
953     UChar str[] = {0x300, 0x31A, 0};
954     /*
955     creating a long string of decomposable characters,
956     since by default the writable buffer is of size 256
957     */
958     while (count < 500) {
959         if ((count & 1) == 0) {
960             teststr[count ++] = 0x300;
961         }
962         else {
963             teststr[count ++] = 0x31A;
964         }
965     }
966 
967     coll = ucol_open("th_TH", &status);
968     if(U_SUCCESS(status) && coll) {
969       testiter = ucol_openElements(coll, teststr, 500, &status);
970       iter = ucol_openElements(coll, str, 2, &status);
971 
972       orders     = getOrders(iter, &count);
973       if (count != 2) {
974           log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
975       }
976 
977       /*
978       this will rearrange the string data to 250 characters of 0x300 first then
979       250 characters of 0x031A
980       */
981       testorders = getOrders(testiter, &count);
982 
983       if (count != 500) {
984           log_err("Error decomposition does not give the right sized collation elements\n");
985       }
986 
987       while (count != 0) {
988           /* UCA collation element for 0x0F76 */
989           if ((count > 250 && testorders[-- count].order != orders[1].order) ||
990               (count <= 250 && testorders[-- count].order != orders[0].order)) {
991               log_err("Error decomposition does not give the right collation element at %d count\n", count);
992               break;
993           }
994       }
995 
996       free(testorders);
997       free(orders);
998 
999       ucol_reset(testiter);
1000 
1001       /* ensures closing of elements done properly to clear writable buffer */
1002       ucol_next(testiter, &status);
1003       ucol_next(testiter, &status);
1004       ucol_closeElements(testiter);
1005       ucol_closeElements(iter);
1006       ucol_close(coll);
1007     } else {
1008       log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1009     }
1010 }
1011 
1012 /**
1013 * Sniplets of code from genuca
1014 */
hex2num(char hex)1015 static int32_t hex2num(char hex) {
1016     if(hex>='0' && hex <='9') {
1017         return hex-'0';
1018     } else if(hex>='a' && hex<='f') {
1019         return hex-'a'+10;
1020     } else if(hex>='A' && hex<='F') {
1021         return hex-'A'+10;
1022     } else {
1023         return 0;
1024     }
1025 }
1026 
1027 /**
1028 * Getting codepoints from a string
1029 * @param str character string contain codepoints seperated by space and ended
1030 *        by a semicolon
1031 * @param codepoints array for storage, assuming size > 5
1032 * @return position at the end of the codepoint section
1033 */
getCodePoints(char * str,UChar * codepoints,UChar * contextCPs)1034 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
1035     UErrorCode errorCode = U_ZERO_ERROR;
1036     char *semi = uprv_strchr(str, ';');
1037     char *pipe = uprv_strchr(str, '|');
1038     char *s;
1039     *codepoints = 0;
1040     *contextCPs = 0;
1041     if(semi == NULL) {
1042         log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
1043         return str;
1044     }
1045     if(pipe != NULL) {
1046         int32_t contextLength;
1047         *pipe = 0;
1048         contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
1049         *pipe = '|';
1050         if(U_FAILURE(errorCode)) {
1051             log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
1052             return str;
1053         }
1054         /* prepend the precontext string to the codepoints */
1055         u_memcpy(codepoints, contextCPs, contextLength);
1056         codepoints += contextLength;
1057         /* start of the code point string */
1058         s = pipe + 1;
1059     } else {
1060         s = str;
1061     }
1062     u_parseString(s, codepoints, 99, NULL, &errorCode);
1063     if(U_FAILURE(errorCode)) {
1064         log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
1065         return str;
1066     }
1067     return semi + 1;
1068 }
1069 
1070 /**
1071 * Sniplets of code from genuca
1072 */
1073 static int32_t
readElement(char ** from,char * to,char separator,UErrorCode * status)1074 readElement(char **from, char *to, char separator, UErrorCode *status)
1075 {
1076     if (U_SUCCESS(*status)) {
1077         char    buffer[1024];
1078         int32_t i = 0;
1079         while (**from != separator) {
1080             if (**from != ' ') {
1081                 *(buffer+i++) = **from;
1082             }
1083             (*from)++;
1084         }
1085         (*from)++;
1086         *(buffer + i) = 0;
1087         strcpy(to, buffer);
1088         return i/2;
1089     }
1090 
1091     return 0;
1092 }
1093 
1094 /**
1095 * Sniplets of code from genuca
1096 */
1097 static uint32_t
getSingleCEValue(char * primary,char * secondary,char * tertiary,UErrorCode * status)1098 getSingleCEValue(char *primary, char *secondary, char *tertiary,
1099                           UErrorCode *status)
1100 {
1101     if (U_SUCCESS(*status)) {
1102         uint32_t  value    = 0;
1103         char      primsave = '\0';
1104         char      secsave  = '\0';
1105         char      tersave  = '\0';
1106         char     *primend  = primary+4;
1107         char     *secend   = secondary+2;
1108         char     *terend   = tertiary+2;
1109         uint32_t  primvalue;
1110         uint32_t  secvalue;
1111         uint32_t  tervalue;
1112 
1113         if (uprv_strlen(primary) > 4) {
1114             primsave = *primend;
1115             *primend = '\0';
1116         }
1117 
1118         if (uprv_strlen(secondary) > 2) {
1119             secsave = *secend;
1120             *secend = '\0';
1121         }
1122 
1123         if (uprv_strlen(tertiary) > 2) {
1124             tersave = *terend;
1125             *terend = '\0';
1126         }
1127 
1128         primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1129         secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1130         tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1131         if(primvalue <= 0xFF) {
1132           primvalue <<= 8;
1133         }
1134 
1135         value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1136            | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1137            | (tervalue & UCOL_TERTIARYORDERMASK);
1138 
1139         if(primsave!='\0') {
1140             *primend = primsave;
1141         }
1142         if(secsave!='\0') {
1143             *secend = secsave;
1144         }
1145         if(tersave!='\0') {
1146             *terend = tersave;
1147         }
1148         return value;
1149     }
1150     return 0;
1151 }
1152 
1153 /**
1154 * Getting collation elements generated from a string
1155 * @param str character string contain collation elements contained in [] and
1156 *        seperated by space
1157 * @param ce array for storage, assuming size > 20
1158 * @param status error status
1159 * @return position at the end of the codepoint section
1160 */
getCEs(char * str,uint32_t * ces,UErrorCode * status)1161 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1162     char       *pStartCP     = uprv_strchr(str, '[');
1163     int         count        = 0;
1164     char       *pEndCP;
1165     char        primary[100];
1166     char        secondary[100];
1167     char        tertiary[100];
1168 
1169     while (*pStartCP == '[') {
1170         uint32_t primarycount   = 0;
1171         uint32_t secondarycount = 0;
1172         uint32_t tertiarycount  = 0;
1173         uint32_t CEi = 1;
1174         pEndCP = strchr(pStartCP, ']');
1175         if(pEndCP == NULL) {
1176             break;
1177         }
1178         pStartCP ++;
1179 
1180         primarycount   = readElement(&pStartCP, primary, ',', status);
1181         secondarycount = readElement(&pStartCP, secondary, ',', status);
1182         tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
1183 
1184         /* I want to get the CEs entered right here, including continuation */
1185         ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1186         if (U_FAILURE(*status)) {
1187             break;
1188         }
1189 
1190         while (2 * CEi < primarycount || CEi < secondarycount ||
1191                CEi < tertiarycount) {
1192             uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1193             if (2 * CEi < primarycount) {
1194                 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1195                 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1196             }
1197 
1198             if (2 * CEi + 1 < primarycount) {
1199                 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1200                 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1201             }
1202 
1203             if (CEi < secondarycount) {
1204                 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1205                 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1206             }
1207 
1208             if (CEi < tertiarycount) {
1209                 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1210                 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1211             }
1212 
1213             CEi ++;
1214             ces[count ++] = value;
1215         }
1216 
1217       pStartCP = pEndCP + 1;
1218     }
1219     ces[count] = 0;
1220     return pStartCP;
1221 }
1222 
1223 /**
1224 * Getting the FractionalUCA.txt file stream
1225 */
getFractionalUCA(void)1226 static FileStream * getFractionalUCA(void)
1227 {
1228     char        newPath[256];
1229     char        backupPath[256];
1230     FileStream *result = NULL;
1231 
1232     /* Look inside ICU_DATA first */
1233     uprv_strcpy(newPath, ctest_dataSrcDir());
1234     uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1235     uprv_strcat(newPath, "FractionalUCA.txt");
1236 
1237     /* As a fallback, try to guess where the source data was located
1238      *   at the time ICU was built, and look there.
1239      */
1240 #if defined (U_TOPSRCDIR)
1241     strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
1242 #else
1243     {
1244         UErrorCode errorCode = U_ZERO_ERROR;
1245         strcpy(backupPath, loadTestData(&errorCode));
1246         strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1247     }
1248 #endif
1249     strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1250 
1251     result = T_FileStream_open(newPath, "rb");
1252 
1253     if (result == NULL) {
1254         result = T_FileStream_open(backupPath, "rb");
1255         if (result == NULL) {
1256             log_err("Failed to open either %s or %s\n", newPath, backupPath);
1257         }
1258     }
1259     return result;
1260 }
1261 
1262 /**
1263 * Testing the CEs returned by the iterator
1264 */
TestCEs()1265 static void TestCEs() {
1266     FileStream *file = NULL;
1267     char        line[2048];
1268     char       *str;
1269     UChar       codepoints[10];
1270     uint32_t    ces[20];
1271     UErrorCode  status = U_ZERO_ERROR;
1272     UCollator          *coll = ucol_open("", &status);
1273     uint32_t lineNo = 0;
1274     UChar       contextCPs[5];
1275 
1276     if (U_FAILURE(status)) {
1277         log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
1278         return;
1279     }
1280 
1281     file = getFractionalUCA();
1282 
1283     if (file == NULL) {
1284         log_err("*** unable to open input FractionalUCA.txt file ***\n");
1285         return;
1286     }
1287 
1288 
1289     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1290         int                 count = 0;
1291         UCollationElements *iter;
1292         int32_t            preContextCeLen=0;
1293         lineNo++;
1294         /* skip this line if it is empty or a comment or is a return value
1295         or start of some variable section */
1296         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1297             line[0] == 0x000D || line[0] == '[') {
1298             continue;
1299         }
1300 
1301         str = getCodePoints(line, codepoints, contextCPs);
1302 
1303         /* these are 'fake' codepoints in the fractional UCA, and are used just
1304          * for positioning of indirect values. They should not go through this
1305          * test.
1306          */
1307         if(*codepoints == 0xFDD0) {
1308           continue;
1309         }
1310         if (*contextCPs != 0) {
1311             iter = ucol_openElements(coll, contextCPs, -1, &status);
1312             if (U_FAILURE(status)) {
1313                 log_err("Error in opening collation elements\n");
1314                 break;
1315             }
1316             while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
1317                 preContextCeLen++;
1318             }
1319             ucol_closeElements(iter);
1320         }
1321 
1322         getCEs(str, ces+preContextCeLen, &status);
1323         if (U_FAILURE(status)) {
1324             log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1325             break;
1326         }
1327         iter = ucol_openElements(coll, codepoints, -1, &status);
1328         if (U_FAILURE(status)) {
1329             log_err("Error in opening collation elements\n");
1330             break;
1331         }
1332         for (;;) {
1333             uint32_t ce = (uint32_t)ucol_next(iter, &status);
1334             if (ce == 0xFFFFFFFF) {
1335                 ce = 0;
1336             }
1337             /* we now unconditionally reorder Thai/Lao prevowels, so this
1338              * test would fail if we don't skip here.
1339              */
1340             if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1341               continue;
1342             }
1343             if (ce != ces[count] || U_FAILURE(status)) {
1344                 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1345                 break;
1346             }
1347             if (ces[count] == 0) {
1348                 break;
1349             }
1350             count ++;
1351         }
1352         ucol_closeElements(iter);
1353     }
1354 
1355     T_FileStream_close(file);
1356     ucol_close(coll);
1357 }
1358 
1359 /**
1360 * Testing the discontigous contractions
1361 */
TestDiscontiguos()1362 static void TestDiscontiguos() {
1363     const char               *rulestr    =
1364                             "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1365           UChar               rule[50];
1366           int                 rulelen = u_unescape(rulestr, rule, 50);
1367     const char               *src[] = {
1368      "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1369     /* base character blocked */
1370      "XD\\u0300", "XD\\u0300\\u0315",
1371     /* non blocking combining character */
1372      "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1373      /* blocking combining character */
1374      "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1375      /* contraction prefix */
1376      "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1377      "X\\u0300\\u031A\\u0315",
1378      /* ends not with a contraction character */
1379      "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1380      "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1381     };
1382     const char               *tgt[] = {
1383      /* non blocking combining character */
1384      "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1385     /* base character blocked */
1386      "X D \\u0300", "X D \\u0300\\u0315",
1387     /* non blocking combining character */
1388      "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1389      /* blocking combining character */
1390      "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1391      /* contraction prefix */
1392      "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1393      "X\\u0300 \\u031A \\u0315",
1394      /* ends not with a contraction character */
1395      "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1396      "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1397     };
1398           int                 size   = 20;
1399           UCollator          *coll;
1400           UErrorCode          status    = U_ZERO_ERROR;
1401           int                 count     = 0;
1402           UCollationElements *iter;
1403           UCollationElements *resultiter;
1404 
1405     coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1406     iter       = ucol_openElements(coll, rule, 1, &status);
1407     resultiter = ucol_openElements(coll, rule, 1, &status);
1408 
1409     if (U_FAILURE(status)) {
1410         log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1411         return;
1412     }
1413 
1414     while (count < size) {
1415         UChar  str[20];
1416         UChar  tstr[20];
1417         int    strLen = u_unescape(src[count], str, 20);
1418         UChar *s;
1419 
1420         ucol_setText(iter, str, strLen, &status);
1421         if (U_FAILURE(status)) {
1422             log_err("Error opening collation iterator\n");
1423             return;
1424         }
1425 
1426         u_unescape(tgt[count], tstr, 20);
1427         s = tstr;
1428 
1429         log_verbose("count %d\n", count);
1430 
1431         for (;;) {
1432             uint32_t  ce;
1433             UChar    *e = u_strchr(s, 0x20);
1434             if (e == 0) {
1435                 e = u_strchr(s, 0);
1436             }
1437             ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1438             ce = ucol_next(resultiter, &status);
1439             if (U_FAILURE(status)) {
1440                 log_err("Error manipulating collation iterator\n");
1441                 return;
1442             }
1443             while (ce != UCOL_NULLORDER) {
1444                 if (ce != (uint32_t)ucol_next(iter, &status) ||
1445                     U_FAILURE(status)) {
1446                     log_err("Discontiguos contraction test mismatch\n");
1447                     return;
1448                 }
1449                 ce = ucol_next(resultiter, &status);
1450                 if (U_FAILURE(status)) {
1451                     log_err("Error getting next collation element\n");
1452                     return;
1453                 }
1454             }
1455             s = e + 1;
1456             if (*e == 0) {
1457                 break;
1458             }
1459         }
1460         ucol_reset(iter);
1461         backAndForth(iter);
1462         count ++;
1463     }
1464     ucol_closeElements(resultiter);
1465     ucol_closeElements(iter);
1466     ucol_close(coll);
1467 }
1468 
TestCEBufferOverflow()1469 static void TestCEBufferOverflow()
1470 {
1471     UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1472     UErrorCode          status = U_ZERO_ERROR;
1473     UChar               rule[10];
1474     UCollator          *coll;
1475     UCollationElements *iter;
1476 
1477     u_uastrcpy(rule, "&z < AB");
1478     coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1479     if (U_FAILURE(status)) {
1480         log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
1481         return;
1482     }
1483 
1484     /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1485     test. this will cause an overflow in getPrev */
1486     str[0] = 0x0041;    /* 'A' */
1487     /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1488     uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1489     str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
1490     iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1491                              &status);
1492     if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
1493         status == U_BUFFER_OVERFLOW_ERROR) {
1494         log_err("CE buffer should not overflow with long string of trail surrogates\n");
1495     }
1496     ucol_closeElements(iter);
1497     ucol_close(coll);
1498 }
1499 
1500 /**
1501 * Checking collation element validity.
1502 */
1503 #define MAX_CODEPOINTS_TO_SHOW 10
showCodepoints(const UChar * codepoints,int length,char * codepointText)1504 static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
1505     int i, lengthToUse = length;
1506     if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
1507         lengthToUse = MAX_CODEPOINTS_TO_SHOW;
1508     }
1509     for (i = 0; i < lengthToUse; ++i) {
1510         int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
1511         if (bytesWritten <= 0) {
1512             break;
1513         }
1514         codepointText += bytesWritten;
1515     }
1516     if (i < length) {
1517         sprintf(codepointText, " ...");
1518     }
1519 }
1520 
checkCEValidity(const UCollator * coll,const UChar * codepoints,int length)1521 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1522                              int length)
1523 {
1524     UErrorCode          status = U_ZERO_ERROR;
1525     UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
1526                                                   &status);
1527     UBool result = FALSE;
1528     UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
1529     const char * collLocale;
1530 
1531     if (U_FAILURE(status)) {
1532         log_err("Error creating iterator for testing validity\n");
1533         return FALSE;
1534     }
1535     collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
1536     if (U_FAILURE(status) || collLocale==NULL) {
1537         status = U_ZERO_ERROR;
1538         collLocale = "?";
1539     }
1540 
1541     for (;;) {
1542         uint32_t ce = ucol_next(iter, &status);
1543         uint32_t primary, p1, p2, secondary, tertiary;
1544         if (ce == UCOL_NULLORDER) {
1545             result = TRUE;
1546             break;
1547         }
1548         if (ce == 0) {
1549             continue;
1550         }
1551         if (ce == 0x02000202) {
1552             /* special CE for merge-sort character */
1553             if (*codepoints == 0xFFFE /* && length == 1 */) {
1554                 /*
1555                  * Note: We should check for length==1 but the token parser appears
1556                  * to give us trailing NUL characters.
1557                  * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
1558                  *                     rather than the internal collation rule parser
1559                  */
1560                 continue;
1561             } else {
1562                 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
1563                         (int)*codepoints, (int)length);
1564                 break;
1565             }
1566         }
1567         primary   = UCOL_PRIMARYORDER(ce);
1568         p1 = primary >> 8;
1569         p2 = primary & 0xFF;
1570         secondary = UCOL_SECONDARYORDER(ce);
1571         tertiary  = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
1572 
1573         if (!isContinuation(ce)) {
1574             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1575                 log_err("Empty CE %08lX except for case bits\n", (long)ce);
1576                 break;
1577             }
1578             if (p1 == 0) {
1579                 if (p2 != 0) {
1580                     log_err("Primary 00 xx in %08lX\n", (long)ce);
1581                     break;
1582                 }
1583                 primaryDone = TRUE;
1584             } else {
1585                 if (p1 <= 2 || p1 >= 0xF0) {
1586                     /* Primary first bytes F0..FF are specials. */
1587                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
1588                     break;
1589                 }
1590                 if (p2 == 0) {
1591                     primaryDone = TRUE;
1592                 } else {
1593                     if (p2 <= 3 || p2 >= 0xFF) {
1594                         /* Primary second bytes 03 and FF are sort key compression terminators. */
1595                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
1596                         break;
1597                     }
1598                     primaryDone = FALSE;
1599                 }
1600             }
1601             if (secondary == 0) {
1602                 if (primary != 0) {
1603                     log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
1604                     break;
1605                 }
1606                 secondaryDone = TRUE;
1607             } else {
1608                 if (secondary <= 2 ||
1609                     (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
1610                 ) {
1611                     /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
1612                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
1613                     break;
1614                 }
1615                 secondaryDone = FALSE;
1616             }
1617             if (tertiary == 0) {
1618                 /* We know that ce != 0. */
1619                 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
1620                 break;
1621             }
1622             if (tertiary <= 2) {
1623                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1624                 break;
1625             }
1626             tertiaryDone = FALSE;
1627         } else {
1628             if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1629                 log_err("Empty continuation %08lX\n", (long)ce);
1630                 break;
1631             }
1632             if (primaryDone && primary != 0) {
1633                 log_err("Primary was done but continues in %08lX\n", (long)ce);
1634                 break;
1635             }
1636             if (p1 == 0) {
1637                 if (p2 != 0) {
1638                     log_err("Primary 00 xx in %08lX\n", (long)ce);
1639                     break;
1640                 }
1641                 primaryDone = TRUE;
1642             } else {
1643                 if (p1 <= 2) {
1644                     log_err("Primary first byte of %08lX out of range\n", (long)ce);
1645                     break;
1646                 }
1647                 if (p2 == 0) {
1648                     primaryDone = TRUE;
1649                 } else {
1650                     if (p2 <= 3) {
1651                         log_err("Primary second byte of %08lX out of range\n", (long)ce);
1652                         break;
1653                     }
1654                 }
1655             }
1656             if (secondaryDone && secondary != 0) {
1657                 log_err("Secondary was done but continues in %08lX\n", (long)ce);
1658                 break;
1659             }
1660             if (secondary == 0) {
1661                 secondaryDone = TRUE;
1662             } else {
1663                 if (secondary <= 2) {
1664                     log_err("Secondary byte of %08lX out of range\n", (long)ce);
1665                     break;
1666                 }
1667             }
1668             if (tertiaryDone && tertiary != 0) {
1669                 log_err("Tertiary was done but continues in %08lX\n", (long)ce);
1670                 break;
1671             }
1672             if (tertiary == 0) {
1673                 tertiaryDone = TRUE;
1674             } else if (tertiary <= 2) {
1675                 log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1676                 break;
1677             }
1678         }
1679     }
1680     if (!result) {
1681         char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
1682         showCodepoints(codepoints, length, codepointText);
1683         log_err("Locale: %s  Code point string: %s\n", collLocale, codepointText);
1684     }
1685     ucol_closeElements(iter);
1686     return result;
1687 }
1688 
TestCEValidity()1689 static void TestCEValidity()
1690 {
1691     /* testing UCA collation elements */
1692     UErrorCode  status      = U_ZERO_ERROR;
1693     /* en_US has no tailorings */
1694     UCollator  *coll        = ucol_open("root", &status);
1695     /* tailored locales */
1696     char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1697     const char *loc;
1698     FileStream *file = NULL;
1699     char        line[2048];
1700     UChar       codepoints[11];
1701     int         count = 0;
1702     int         maxCount = 0;
1703     UChar       contextCPs[3];
1704     UChar32     c;
1705     UParseError parseError;
1706     if (U_FAILURE(status)) {
1707         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1708         return;
1709     }
1710     log_verbose("Testing UCA elements\n");
1711     file = getFractionalUCA();
1712     if (file == NULL) {
1713         log_err("Fractional UCA data can not be opened\n");
1714         return;
1715     }
1716 
1717     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1718         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1719             line[0] == 0x000D || line[0] == '[') {
1720             continue;
1721         }
1722 
1723         getCodePoints(line, codepoints, contextCPs);
1724         checkCEValidity(coll, codepoints, u_strlen(codepoints));
1725     }
1726 
1727     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1728     for (c = 0; c <= 0xffff; ++c) {
1729         if (u_isdefined(c)) {
1730             codepoints[0] = (UChar)c;
1731             checkCEValidity(coll, codepoints, 1);
1732         }
1733     }
1734     for (; c <= 0x10ffff; ++c) {
1735         if (u_isdefined(c)) {
1736             int32_t i = 0;
1737             U16_APPEND_UNSAFE(codepoints, i, c);
1738             checkCEValidity(coll, codepoints, i);
1739         }
1740     }
1741 
1742     ucol_close(coll);
1743 
1744     /* testing tailored collation elements */
1745     log_verbose("Testing tailored elements\n");
1746     if(getTestOption(QUICK_OPTION)) {
1747         maxCount = sizeof(locale)/sizeof(locale[0]);
1748     } else {
1749         maxCount = uloc_countAvailable();
1750     }
1751     while (count < maxCount) {
1752         const UChar *rules = NULL,
1753                     *current = NULL;
1754         UChar *rulesCopy = NULL;
1755         int32_t ruleLen = 0;
1756 
1757         uint32_t chOffset = 0;
1758         uint32_t chLen = 0;
1759         uint32_t exOffset = 0;
1760         uint32_t exLen = 0;
1761         uint32_t prefixOffset = 0;
1762         uint32_t prefixLen = 0;
1763         UBool    startOfRules = TRUE;
1764         UColOptionSet opts;
1765 
1766         UColTokenParser src;
1767         uint32_t strength = 0;
1768         uint16_t specs = 0;
1769         if(getTestOption(QUICK_OPTION)) {
1770             loc = locale[count];
1771         } else {
1772             loc = uloc_getAvailable(count);
1773             if(!hasCollationElements(loc)) {
1774                 count++;
1775                 continue;
1776             }
1777         }
1778 
1779         uprv_memset(&src, 0, sizeof(UColTokenParser));
1780 
1781         log_verbose("Testing CEs for %s\n", loc);
1782 
1783         coll      = ucol_open(loc, &status);
1784         if (U_FAILURE(status)) {
1785             log_err("%s collator creation failed\n", loc);
1786             return;
1787         }
1788 
1789         src.opts = &opts;
1790         rules = ucol_getRules(coll, &ruleLen);
1791 
1792         if (ruleLen > 0) {
1793             rulesCopy = (UChar *)uprv_malloc((ruleLen +
1794                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1795             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1796             src.current = src.source = rulesCopy;
1797             src.end = rulesCopy + ruleLen;
1798             src.extraCurrent = src.end;
1799             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1800 
1801 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1802 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1803             while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
1804               strength = src.parsedToken.strength;
1805               chOffset = src.parsedToken.charsOffset;
1806               chLen = src.parsedToken.charsLen;
1807               exOffset = src.parsedToken.extensionOffset;
1808               exLen = src.parsedToken.extensionLen;
1809               prefixOffset = src.parsedToken.prefixOffset;
1810               prefixLen = src.parsedToken.prefixLen;
1811               specs = src.parsedToken.flags;
1812 
1813                 startOfRules = FALSE;
1814                 uprv_memcpy(codepoints, src.source + chOffset,
1815                                                        chLen * sizeof(UChar));
1816                 codepoints[chLen] = 0;
1817                 checkCEValidity(coll, codepoints, chLen);
1818             }
1819             uprv_free(src.source);
1820         }
1821 
1822         ucol_close(coll);
1823         count ++;
1824     }
1825     T_FileStream_close(file);
1826 }
1827 
printSortKeyError(const UChar * codepoints,int length,uint8_t * sortkey,int sklen)1828 static void printSortKeyError(const UChar   *codepoints, int length,
1829                                     uint8_t *sortkey, int sklen)
1830 {
1831     int count = 0;
1832     log_err("Sortkey not valid for ");
1833     while (length > 0) {
1834         log_err("0x%04x ", *codepoints);
1835         length --;
1836         codepoints ++;
1837     }
1838     log_err("\nSortkey : ");
1839     while (count < sklen) {
1840         log_err("0x%02x ", sortkey[count]);
1841         count ++;
1842     }
1843     log_err("\n");
1844 }
1845 
1846 /**
1847 * Checking sort key validity for all levels
1848 */
checkSortKeyValidity(UCollator * coll,const UChar * codepoints,int length)1849 static UBool checkSortKeyValidity(UCollator *coll,
1850                                   const UChar *codepoints,
1851                                   int length)
1852 {
1853     UErrorCode status  = U_ZERO_ERROR;
1854     UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1855                                       UCOL_TERTIARY, UCOL_QUATERNARY,
1856                                       UCOL_IDENTICAL};
1857     int        strengthlen = 5;
1858     int        strengthIndex = 0;
1859     int        caselevel   = 0;
1860 
1861     while (caselevel < 1) {
1862         if (caselevel == 0) {
1863             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1864         }
1865         else {
1866             ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1867         }
1868 
1869         while (strengthIndex < strengthlen) {
1870             int        count01 = 0;
1871             uint32_t   count   = 0;
1872             uint8_t    sortkey[128];
1873             uint32_t   sklen;
1874 
1875             ucol_setStrength(coll, strength[strengthIndex]);
1876             sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1877             while (sortkey[count] != 0) {
1878                 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
1879                     printSortKeyError(codepoints, length, sortkey, sklen);
1880                     return FALSE;
1881                 }
1882                 if (sortkey[count] == 1) {
1883                     count01 ++;
1884                 }
1885                 count ++;
1886             }
1887 
1888             if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
1889                 printSortKeyError(codepoints, length, sortkey, sklen);
1890                 return FALSE;
1891             }
1892             strengthIndex ++;
1893         }
1894         caselevel ++;
1895     }
1896     return TRUE;
1897 }
1898 
TestSortKeyValidity(void)1899 static void TestSortKeyValidity(void)
1900 {
1901     /* testing UCA collation elements */
1902     UErrorCode  status      = U_ZERO_ERROR;
1903     /* en_US has no tailorings */
1904     UCollator  *coll        = ucol_open("en_US", &status);
1905     /* tailored locales */
1906     char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1907     FileStream *file = NULL;
1908     char        line[2048];
1909     UChar       codepoints[10];
1910     int         count = 0;
1911     UChar       contextCPs[5];
1912     UParseError parseError;
1913     if (U_FAILURE(status)) {
1914         log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1915         return;
1916     }
1917     log_verbose("Testing UCA elements\n");
1918     file = getFractionalUCA();
1919     if (file == NULL) {
1920         log_err("Fractional UCA data can not be opened\n");
1921         return;
1922     }
1923 
1924     while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1925         if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1926             line[0] == 0x000D || line[0] == '[') {
1927             continue;
1928         }
1929 
1930         getCodePoints(line, codepoints, contextCPs);
1931         if(codepoints[0] == 0xFFFE) {
1932             /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
1933             continue;
1934         }
1935         checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1936     }
1937 
1938     log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1939     codepoints[0] = 0;
1940 
1941     while (codepoints[0] < 0xFFFF) {
1942         if (u_isdefined((UChar32)codepoints[0])) {
1943             checkSortKeyValidity(coll, codepoints, 1);
1944         }
1945         codepoints[0] ++;
1946     }
1947 
1948     ucol_close(coll);
1949 
1950     /* testing tailored collation elements */
1951     log_verbose("Testing tailored elements\n");
1952     while (count < 5) {
1953         const UChar *rules = NULL,
1954                     *current = NULL;
1955         UChar *rulesCopy = NULL;
1956         int32_t ruleLen = 0;
1957 
1958         uint32_t chOffset = 0;
1959         uint32_t chLen = 0;
1960         uint32_t exOffset = 0;
1961         uint32_t exLen = 0;
1962         uint32_t prefixOffset = 0;
1963         uint32_t prefixLen = 0;
1964         UBool    startOfRules = TRUE;
1965         UColOptionSet opts;
1966 
1967         UColTokenParser src;
1968         uint32_t strength = 0;
1969         uint16_t specs = 0;
1970 
1971         uprv_memset(&src, 0, sizeof(UColTokenParser));
1972 
1973         coll      = ucol_open(locale[count], &status);
1974         if (U_FAILURE(status)) {
1975             log_err("%s collator creation failed\n", locale[count]);
1976             return;
1977         }
1978 
1979         src.opts = &opts;
1980         rules = ucol_getRules(coll, &ruleLen);
1981 
1982         if (ruleLen > 0) {
1983             rulesCopy = (UChar *)uprv_malloc((ruleLen +
1984                 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1985             uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1986             src.current = src.source = rulesCopy;
1987             src.end = rulesCopy + ruleLen;
1988             src.extraCurrent = src.end;
1989             src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1990 
1991 	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1992 	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1993             while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
1994                 strength = src.parsedToken.strength;
1995                 chOffset = src.parsedToken.charsOffset;
1996                 chLen = src.parsedToken.charsLen;
1997                 exOffset = src.parsedToken.extensionOffset;
1998                 exLen = src.parsedToken.extensionLen;
1999                 prefixOffset = src.parsedToken.prefixOffset;
2000                 prefixLen = src.parsedToken.prefixLen;
2001                 specs = src.parsedToken.flags;
2002 
2003                 startOfRules = FALSE;
2004                 uprv_memcpy(codepoints, src.source + chOffset,
2005                                                        chLen * sizeof(UChar));
2006                 codepoints[chLen] = 0;
2007                 if(codepoints[0] == 0xFFFE) {
2008                     /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
2009                     continue;
2010                 }
2011                 checkSortKeyValidity(coll, codepoints, chLen);
2012             }
2013             uprv_free(src.source);
2014         }
2015 
2016         ucol_close(coll);
2017         count ++;
2018     }
2019     T_FileStream_close(file);
2020 }
2021 
2022 /**
2023 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
2024 * normalization on AND jamo tailoring, among other things.
2025 */
2026 static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
2027     0x0020, 0xAC00,                 /* simple LV Hangul */
2028     0x0020, 0xAC01,                 /* simple LVT Hangul */
2029     0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
2030     0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
2031     0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
2032     0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
2033     0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
2034     0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
2035     0x0020, 0x00E6,                 /* small letter ae, expands */
2036     0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
2037     0x0020
2038 };
2039 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
2040 
2041 static const int32_t rootStandardOffsets[] = {
2042     0,  1,2,
2043     2,  3,4,4,
2044     4,  5,6,6,
2045     6,  7,8,8,
2046     8,  9,10,11,
2047     12, 13,14,15,
2048     16, 17,18,19,
2049     20, 21,22,23,
2050     24, 25,26,26,26,
2051     26, 27,28,28,
2052     28,
2053     29
2054 };
2055 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
2056 
2057 static const int32_t rootSearchOffsets[] = {
2058     0,  1,2,
2059     2,  3,4,4,
2060     4,  5,6,6,6,
2061     6,  7,8,8,8,8,8,8,
2062     8,  9,10,11,
2063     12, 13,14,15,
2064     16, 17,18,19,20,
2065     20, 21,22,22,23,23,23,24,
2066     24, 25,26,26,26,
2067     26, 27,28,28,
2068     28,
2069     29
2070 };
2071 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
2072 
2073 typedef struct {
2074     const char *    locale;
2075     const int32_t * offsets;
2076     int32_t         offsetsLen;
2077 } TSCEItem;
2078 
2079 static const TSCEItem tsceItems[] = {
2080     { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
2081     { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
2082     { NULL,                    NULL,                0                        }
2083 };
2084 
TestSearchCollatorElements(void)2085 static void TestSearchCollatorElements(void)
2086 {
2087     const TSCEItem * tsceItemPtr;
2088     for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
2089         UErrorCode status = U_ZERO_ERROR;
2090         UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
2091         if ( U_SUCCESS(status) ) {
2092             UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
2093             if ( U_SUCCESS(status) ) {
2094                 int32_t offset, element;
2095                 const int32_t * nextOffsetPtr;
2096                 const int32_t * limitOffsetPtr;
2097 
2098                 nextOffsetPtr = tsceItemPtr->offsets;
2099                 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2100                 do {
2101                     offset = ucol_getOffset(uce);
2102                     element = ucol_next(uce, &status);
2103                     if ( element == 0 ) {
2104                         log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
2105                     }
2106                     if ( nextOffsetPtr < limitOffsetPtr ) {
2107                         if (offset != *nextOffsetPtr) {
2108                             log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
2109                                                             tsceItemPtr->locale, *nextOffsetPtr, offset );
2110                             nextOffsetPtr = limitOffsetPtr;
2111                             break;
2112                         }
2113                         nextOffsetPtr++;
2114                     } else {
2115                         log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
2116                     }
2117                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2118                 if ( nextOffsetPtr < limitOffsetPtr ) {
2119                     log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
2120                 }
2121 
2122                 ucol_setOffset(uce, kLen_tsceText, &status);
2123                 status = U_ZERO_ERROR;
2124                 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2125                 limitOffsetPtr = tsceItemPtr->offsets;
2126                 do {
2127                     offset = ucol_getOffset(uce);
2128                     element = ucol_previous(uce, &status);
2129                     if ( element == 0 ) {
2130                         log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
2131                     }
2132                     if ( nextOffsetPtr > limitOffsetPtr ) {
2133                         nextOffsetPtr--;
2134                         if (offset != *nextOffsetPtr) {
2135                             log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
2136                                                                 tsceItemPtr->locale, *nextOffsetPtr, offset );
2137                             nextOffsetPtr = limitOffsetPtr;
2138                             break;
2139                         }
2140                    } else {
2141                         log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
2142                     }
2143                 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2144                 if ( nextOffsetPtr > limitOffsetPtr ) {
2145                     log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
2146                 }
2147 
2148                 ucol_closeElements(uce);
2149             } else {
2150                 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2151             }
2152             ucol_close(ucol);
2153         } else {
2154             log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2155         }
2156     }
2157 }
2158 
2159 #endif /* #if !UCONFIG_NO_COLLATION */
2160