• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File CALLCOLL.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda              Ported for C API
15 ********************************************************************************
16 */
17 
18 /*
19  * Important: This file is included into intltest/allcoll.cpp so that the
20  * test data is shared. This makes it easier to maintain the test data,
21  * especially since the Unicode data must be portable and quoted character
22  * literals will not work.
23  * If it is included, then there will be a #define INCLUDE_CALLCOLL_C
24  * that must prevent the actual code in here from being part of the
25  * allcoll.cpp compilation.
26  */
27 
28 /**
29  * CollationDummyTest is a third level test class.  This tests creation of
30  * a customized collator object.  For example, number 1 to be sorted
31  * equlivalent to word 'one'.
32  */
33 
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #include <string.h>
37 
38 #include "unicode/utypes.h"
39 
40 #if !UCONFIG_NO_COLLATION
41 
42 #include "unicode/ucol.h"
43 #include "unicode/uloc.h"
44 #include "unicode/ures.h"
45 #include "unicode/udata.h"
46 #include "unicode/ucoleitr.h"
47 #include "unicode/ustring.h"
48 #include "unicode/uclean.h"
49 #include "unicode/putil.h"
50 #include "unicode/uenum.h"
51 
52 #include "cintltst.h"
53 #include "ccolltst.h"
54 #include "callcoll.h"
55 #include "calldata.h"
56 #include "cstring.h"
57 #include "cmemory.h"
58 
59 /* set to 1 to test offsets in backAndForth() */
60 #define TEST_OFFSETS 0
61 
62 /* perform test with strength PRIMARY */
63 static void TestPrimary(void);
64 
65 /* perform test with strength SECONDARY */
66 static void TestSecondary(void);
67 
68 /* perform test with strength tertiary */
69 static void TestTertiary(void);
70 
71 /*perform tests with strength Identical */
72 static void TestIdentical(void);
73 
74 /* perform extra tests */
75 static void TestExtra(void);
76 
77 /* Test jitterbug 581 */
78 static void TestJB581(void);
79 
80 /* Test jitterbug 1401 */
81 static void TestJB1401(void);
82 
83 /* Test [variable top] in the rule syntax */
84 static void TestVariableTop(void);
85 
86 /* Test surrogates */
87 static void TestSurrogates(void);
88 
89 static void TestInvalidRules(void);
90 
91 static void TestJitterbug1098(void);
92 
93 static void TestFCDCrash(void);
94 
95 static void TestJ5298(void);
96 
97 static void TestBadKey(void);
98 
99 const UCollationResult results[] = {
100     UCOL_LESS,
101     UCOL_LESS, /*UCOL_GREATER,*/
102     UCOL_LESS,
103     UCOL_LESS,
104     UCOL_LESS,
105     UCOL_LESS,
106     UCOL_LESS,
107     UCOL_GREATER,
108     UCOL_GREATER,
109     UCOL_LESS,                                     /*  10 */
110     UCOL_GREATER,
111     UCOL_LESS,
112     UCOL_GREATER,
113     UCOL_GREATER,
114     UCOL_LESS,
115     UCOL_LESS,
116     UCOL_LESS,
117     /*  test primary > 17 */
118     UCOL_EQUAL,
119     UCOL_EQUAL,
120     UCOL_EQUAL,                                    /*  20 */
121     UCOL_LESS,
122     UCOL_LESS,
123     UCOL_EQUAL,
124     UCOL_EQUAL,
125     UCOL_EQUAL,
126     UCOL_LESS,
127     /*  test secondary > 26 */
128     UCOL_EQUAL,
129     UCOL_EQUAL,
130     UCOL_EQUAL,
131     UCOL_EQUAL,
132     UCOL_EQUAL,                                    /*  30 */
133     UCOL_EQUAL,
134     UCOL_LESS,
135     UCOL_EQUAL,                                     /*  34 */
136     UCOL_EQUAL,
137     UCOL_EQUAL,
138     UCOL_LESS                                        /* 37 */
139 };
140 
141 
142 static
uprv_appendByteToHexString(char * dst,uint8_t val)143 void uprv_appendByteToHexString(char *dst, uint8_t val) {
144   uint32_t len = (uint32_t)uprv_strlen(dst);
145   *(dst+len) = T_CString_itosOffset((val >> 4));
146   *(dst+len+1) = T_CString_itosOffset((val & 0xF));
147   *(dst+len+2) = 0;
148 }
149 
150 /* this function makes a string with representation of a sortkey */
sortKeyToString(const UCollator * coll,const uint8_t * sortkey,char * buffer,uint32_t * len)151 static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) {
152     int32_t strength = UCOL_PRIMARY;
153     uint32_t res_size = 0;
154     UBool doneCase = false;
155     UErrorCode errorCode = U_ZERO_ERROR;
156 
157     char *current = buffer;
158     const uint8_t *currentSk = sortkey;
159 
160     uprv_strcpy(current, "[");
161 
162     while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) {
163         if(strength > UCOL_PRIMARY) {
164             uprv_strcat(current, " . ");
165         }
166         while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */
167             uprv_appendByteToHexString(current, *currentSk++);
168             uprv_strcat(current, " ");
169         }
170         if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == false) {
171             doneCase = true;
172         } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == true || strength != UCOL_SECONDARY) {
173             strength ++;
174         }
175         if (*currentSk) {
176             uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */
177         }
178         if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) {
179             break;
180         }
181     }
182 
183     if(ucol_getStrength(coll) == UCOL_IDENTICAL) {
184         uprv_strcat(current, " . ");
185         while(*currentSk != 0) {
186             uprv_appendByteToHexString(current, *currentSk++);
187             uprv_strcat(current, " ");
188         }
189 
190         uprv_appendByteToHexString(current, *currentSk++);
191     }
192     uprv_strcat(current, "]");
193 
194     if(res_size > *len) {
195         return NULL;
196     }
197 
198     return buffer;
199 }
200 
addAllCollTest(TestNode ** root)201 void addAllCollTest(TestNode** root)
202 {
203     addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary");
204     addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary");
205     addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary");
206     addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical");
207     addTest(root, &TestExtra, "tscoll/callcoll/TestExtra");
208     addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
209     addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
210     addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates");
211     addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules");
212     addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
213     addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
214     addTest(root, &TestFCDCrash, "tscoll/callcoll/TestFCDCrash");
215     addTest(root, &TestJ5298, "tscoll/callcoll/TestJ5298");
216     addTest(root, &TestBadKey, "tscoll/callcoll/TestBadKey");
217 }
218 
hasCollationElements(const char * locName)219 UBool hasCollationElements(const char *locName) {
220 
221   UErrorCode status = U_ZERO_ERROR;
222 
223   UResourceBundle *loc = ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll", locName, &status);
224 
225   if(U_SUCCESS(status)) {
226     status = U_ZERO_ERROR;
227     loc = ures_getByKey(loc, "collations", loc, &status);
228     ures_close(loc);
229     if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
230       return true;
231     }
232   }
233   return false;
234 }
235 
compareUsingPartials(UCollator * coll,const UChar source[],int32_t sLen,const UChar target[],int32_t tLen,int32_t pieceSize,UErrorCode * status)236 static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) {
237   int32_t partialSKResult = 0;
238   UCharIterator sIter, tIter;
239   uint32_t sState[2], tState[2];
240   int32_t sSize = pieceSize, tSize = pieceSize;
241   /*int32_t i = 0;*/
242   uint8_t sBuf[16384], tBuf[16384];
243   if(pieceSize > 16384) {
244     log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
245     *status = U_BUFFER_OVERFLOW_ERROR;
246     return UCOL_EQUAL;
247   }
248   *status = U_ZERO_ERROR;
249   sState[0] = 0; sState[1] = 0;
250   tState[0] = 0; tState[1] = 0;
251   while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
252     uiter_setString(&sIter, source, sLen);
253     uiter_setString(&tIter, target, tLen);
254     sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status);
255     tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status);
256 
257     if(sState[0] != 0 || tState[0] != 0) {
258       /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
259     }
260     /*log_verbose("%i ", i++);*/
261 
262     partialSKResult = memcmp(sBuf, tBuf, pieceSize);
263   }
264 
265   if(partialSKResult < 0) {
266       return UCOL_LESS;
267   } else if(partialSKResult > 0) {
268     return UCOL_GREATER;
269   } else {
270     return UCOL_EQUAL;
271   }
272 }
273 
doTestVariant(UCollator * myCollation,const UChar source[],const UChar target[],UCollationResult result)274 static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
275 {
276     int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
277     int temp=0, gSortklen1=0,gSortklen2=0;
278     UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
279     uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
280     uint32_t sLen = u_strlen(source);
281     uint32_t tLen = u_strlen(target);
282     char buffer[256];
283     uint32_t len;
284     UErrorCode status = U_ZERO_ERROR;
285     UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
286 
287     UCharIterator sIter, tIter;
288 
289     compareResult  = ucol_strcoll(myCollation, source, sLen, target, tLen);
290     if (compareResult != result) {
291         log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n",
292             compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
293     }
294     compareResulta = ucol_strcoll(myCollation, source, -1,   target, -1);
295     if (compareResulta != result) {
296         log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
297             compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
298     }
299 
300     uiter_setString(&sIter, source, sLen);
301     uiter_setString(&tIter, target, tLen);
302     compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
303     if(compareResultIter != result) {
304         log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
305     }
306 
307     /* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */
308     {
309         char utf8Source[256], utf8Target[256];
310         int32_t utf8SourceLen = 0, utf8TargetLen = 0;
311 
312         u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
313         if(U_FAILURE(status)) { /* probably buffer is not big enough */
314             log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
315         } else {
316             u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
317             if(U_SUCCESS(status)) {
318                 {
319                     /* ucol_strcollUTF8 */
320                     compareResulta = ucol_strcollUTF8(myCollation, utf8Source, utf8SourceLen, utf8Target, utf8TargetLen, &status);
321                     if (U_FAILURE(status)) {
322                         log_err("Error in ucol_strcollUTF8 with explicit length\n");
323                         status = U_ZERO_ERROR;
324                     } else if (compareResulta != result) {
325                         log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n",
326                             compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
327                     }
328                     compareResulta = ucol_strcollUTF8(myCollation, utf8Source, -1, utf8Target, -1, &status);
329                     if (U_FAILURE(status)) {
330                         log_err("Error in ucol_strcollUTF8 with null terminated strings\n");
331                         status = U_ZERO_ERROR;
332                     } else if (compareResulta != result) {
333                         log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
334                             compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
335                     }
336                 }
337 
338                 {
339                     /* char iterator over UTF8 */
340                     UCollationResult compareResultUTF8Iter = result, compareResultUTF8IterNorm = result;
341 
342                     uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
343                     uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
344                     compareResultUTF8Iter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
345 
346                     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
347                     sIter.move(&sIter, 0, UITER_START);
348                     tIter.move(&tIter, 0, UITER_START);
349                     compareResultUTF8IterNorm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
350 
351                     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
352                     if(compareResultUTF8Iter != compareResultIter) {
353                         log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
354                     }
355                     if(compareResultUTF8Iter != compareResultUTF8IterNorm) {
356                         log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
357                     }
358                 }
359             } else {
360                 log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
361             }
362             if(U_FAILURE(status)) {
363                 log_verbose("UTF-8 strcoll failed! Ignoring result\n");
364             }
365         }
366     }
367 
368     /* testing the partial sortkeys */
369     if(1) { /*!QUICK*/
370       int32_t i = 0;
371       int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
372       int32_t partialSizesSize = 1;
373       if(getTestOption(QUICK_OPTION) <= 0) {
374         partialSizesSize = 7;
375       }
376       /*log_verbose("partial sortkey test piecesize=");*/
377       for(i = 0; i < partialSizesSize; i++) {
378         UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
379         /*log_verbose("%i ", partialSizes[i]);*/
380 
381         partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
382         if(partialSKResult != result) {
383           log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n",
384             partialSKResult, result,
385             aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
386         }
387 
388         if(getTestOption(QUICK_OPTION) <= 0 && norm != UCOL_ON) {
389           /*log_verbose("N ");*/
390           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
391           partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
392           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
393           if(partialSKResult != partialNormalizedSKResult) {
394             log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
395               aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
396           }
397         }
398       }
399       /*log_verbose("\n");*/
400     }
401 
402     sortklen1=ucol_getSortKey(myCollation, source, sLen,  NULL, 0);
403     sortklen2=ucol_getSortKey(myCollation, target, tLen,  NULL, 0);
404 
405     sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
406     sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
407     (void)sortklenmin;  /* Suppress set but not used warning. */
408 
409     sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
410     sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
411     ucol_getSortKey(myCollation, source, sLen, sortKey1,  sortklen1+1);
412     ucol_getSortKey(myCollation, source, -1,   sortKey1a, sortklen1+1);
413 
414     sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
415     sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
416     ucol_getSortKey(myCollation, target, tLen, sortKey2,  sortklen2+1);
417     ucol_getSortKey(myCollation, target, -1,   sortKey2a, sortklen2+1);
418 
419     /* Check that sort key generated with null terminated string is identical  */
420     /*  to that generated with a length specified.                              */
421     if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
422         uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
423         log_err("Sort Keys from null terminated and explicit length strings differ.\n");
424     }
425 
426     /*memcmp(sortKey1, sortKey2,sortklenmax);*/
427     temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
428     gSortklen1 = (int)uprv_strlen((const char *)sortKey1)+1;
429     gSortklen2 = (int)uprv_strlen((const char *)sortKey2)+1;
430     if(sortklen1 != gSortklen1){
431         log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
432         log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len));
433     }
434     if(sortklen2!= gSortklen2){
435         log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
436         log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len));
437     }
438 
439     if(temp < 0) {
440         keyResult=UCOL_LESS;
441     }
442     else if(temp > 0) {
443         keyResult= UCOL_GREATER;
444     }
445     else {
446         keyResult = UCOL_EQUAL;
447     }
448     reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
449     free(sortKey1);
450     free(sortKey2);
451     free(sortKey1a);
452     free(sortKey2a);
453 
454 }
455 
doTest(UCollator * myCollation,const UChar source[],const UChar target[],UCollationResult result)456 void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
457 {
458   if(myCollation) {
459     doTestVariant(myCollation, source, target, result);
460     if(result == UCOL_LESS) {
461       doTestVariant(myCollation, target, source, UCOL_GREATER);
462     } else if(result == UCOL_GREATER) {
463       doTestVariant(myCollation, target, source, UCOL_LESS);
464     } else {
465       doTestVariant(myCollation, target, source, UCOL_EQUAL);
466     }
467   } else {
468     log_data_err("No collator! Any data around?\n");
469   }
470 }
471 
472 
473 /**
474  * Return an integer array containing all of the collation orders
475  * returned by calls to next on the specified iterator
476  */
getOrders(UCollationElements * iter,int32_t * orderLength)477 OrderAndOffset* getOrders(UCollationElements *iter, int32_t *orderLength)
478 {
479     UErrorCode status;
480     int32_t order;
481     int32_t maxSize = 100;
482     int32_t size = 0;
483     int32_t offset = ucol_getOffset(iter);
484     OrderAndOffset *temp;
485     OrderAndOffset *orders =(OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
486     status= U_ZERO_ERROR;
487 
488 
489     while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
490     {
491         if (size == maxSize)
492         {
493             maxSize *= 2;
494             temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
495 
496             memcpy(temp, orders, size * sizeof(OrderAndOffset));
497             free(orders);
498             orders = temp;
499 
500         }
501 
502         orders[size].order  = order;
503         orders[size].offset = offset;
504 
505         offset = ucol_getOffset(iter);
506         size += 1;
507     }
508 
509     if (maxSize > size && size > 0)
510     {
511         temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * size);
512 
513         memcpy(temp, orders, size * sizeof(OrderAndOffset));
514         free(orders);
515         orders = temp;
516 
517 
518     }
519 
520     *orderLength = size;
521     return orders;
522 }
523 
524 
525 void
backAndForth(UCollationElements * iter)526 backAndForth(UCollationElements *iter)
527 {
528     /* Run through the iterator forwards and stick it into an array */
529     int32_t idx, o;
530     UErrorCode status = U_ZERO_ERROR;
531     int32_t orderLength = 0;
532     OrderAndOffset *orders = getOrders(iter, &orderLength);
533 
534 
535     /* Now go through it backwards and make sure we get the same values */
536     idx = orderLength;
537     ucol_reset(iter);
538 
539     /* synwee : changed */
540     while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
541 #if TEST_OFFSETS
542       int32_t offset =
543 #endif
544         ucol_getOffset(iter);
545 
546       idx -= 1;
547       if (o != orders[idx].order) {
548         if (o == 0)
549           idx ++;
550         else {
551           while (idx > 0 && orders[-- idx].order == 0) {
552             /* nothing... */
553           }
554 
555           if (o != orders[idx].order) {
556               log_err("Mismatched order at index %d: 0x%8.8X vs. 0x%8.8X\n", idx,
557                 orders[idx].order, o);
558             goto bail;
559           }
560         }
561       }
562 
563 #if TEST_OFFSETS
564       if (offset != orders[idx].offset) {
565         log_err("Mismatched offset at index %d: %d vs. %d\n", idx,
566             orders[idx].offset, offset);
567         goto bail;
568       }
569 #endif
570 
571     }
572 
573     while (idx != 0 && orders[idx - 1].order == 0) {
574       idx -= 1;
575     }
576 
577     if (idx != 0) {
578         log_err("Didn't get back to beginning - index is %d\n", idx);
579 
580         ucol_reset(iter);
581         log_err("\nnext: ");
582 
583         if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER) {
584             log_err("Error at %x\n", o);
585         }
586 
587         log_err("\nprev: ");
588 
589         if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
590             log_err("Error at %x\n", o);
591         }
592 
593         log_verbose("\n");
594     }
595 
596 bail:
597     free(orders);
598 }
599 
genericOrderingTestWithResult(UCollator * coll,const char * const s[],uint32_t size,UCollationResult result)600 void genericOrderingTestWithResult(UCollator *coll, const char * const s[], uint32_t size, UCollationResult result) {
601   UChar t1[2048] = {0};
602   UChar t2[2048] = {0};
603   UCollationElements *iter;
604   UErrorCode status = U_ZERO_ERROR;
605 
606   uint32_t i = 0, j = 0;
607   log_verbose("testing sequence:\n");
608   for(i = 0; i < size; i++) {
609     log_verbose("%s\n", s[i]);
610   }
611 
612   iter = ucol_openElements(coll, t1, u_strlen(t1), &status);
613   if (U_FAILURE(status)) {
614     log_err("Creation of iterator failed\n");
615   }
616   for(i = 0; i < size-1; i++) {
617     for(j = i+1; j < size; j++) {
618       u_unescape(s[i], t1, 2048);
619       u_unescape(s[j], t2, 2048);
620       doTest(coll, t1, t2, result);
621       /* synwee : added collation element iterator test */
622       ucol_setText(iter, t1, u_strlen(t1), &status);
623       backAndForth(iter);
624       ucol_setText(iter, t2, u_strlen(t2), &status);
625       backAndForth(iter);
626     }
627   }
628   ucol_closeElements(iter);
629 }
630 
genericOrderingTest(UCollator * coll,const char * const s[],uint32_t size)631 void genericOrderingTest(UCollator *coll, const char * const s[], uint32_t size) {
632   genericOrderingTestWithResult(coll, s, size, UCOL_LESS);
633 }
634 
genericLocaleStarter(const char * locale,const char * const s[],uint32_t size)635 void genericLocaleStarter(const char *locale, const char * const s[], uint32_t size) {
636   UErrorCode status = U_ZERO_ERROR;
637   UCollator *coll = ucol_open(locale, &status);
638 
639   log_verbose("Locale starter for %s\n", locale);
640 
641   if(U_SUCCESS(status)) {
642     genericOrderingTest(coll, s, size);
643   } else if(status == U_FILE_ACCESS_ERROR) {
644     log_data_err("Is your data around?\n");
645     return;
646   } else {
647     log_err("Unable to open collator for locale %s\n", locale);
648   }
649   ucol_close(coll);
650 }
651 
genericLocaleStarterWithResult(const char * locale,const char * const s[],uint32_t size,UCollationResult result)652 void genericLocaleStarterWithResult(const char *locale, const char * const s[], uint32_t size, UCollationResult result) {
653   UErrorCode status = U_ZERO_ERROR;
654   UCollator *coll = ucol_open(locale, &status);
655 
656   log_verbose("Locale starter for %s\n", locale);
657 
658   if(U_SUCCESS(status)) {
659     genericOrderingTestWithResult(coll, s, size, result);
660   } else if(status == U_FILE_ACCESS_ERROR) {
661     log_data_err("Is your data around?\n");
662     return;
663   } else {
664     log_err("Unable to open collator for locale %s\n", locale);
665   }
666   ucol_close(coll);
667 }
668 
669 /* currently not used with options */
genericRulesStarterWithOptionsAndResult(const char * rules,const char * const s[],uint32_t size,const UColAttribute * attrs,const UColAttributeValue * values,uint32_t attsize,UCollationResult result)670 void genericRulesStarterWithOptionsAndResult(const char *rules, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
671   UErrorCode status = U_ZERO_ERROR;
672   UChar rlz[RULE_BUFFER_LEN] = { 0 };
673   uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
674   uint32_t i;
675 
676   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
677 
678   log_verbose("Rules starter for %s\n", rules);
679 
680   if(U_SUCCESS(status)) {
681     log_verbose("Setting attributes\n");
682     for(i = 0; i < attsize; i++) {
683       ucol_setAttribute(coll, attrs[i], values[i], &status);
684     }
685 
686     genericOrderingTestWithResult(coll, s, size, result);
687   } else {
688     log_err_status(status, "Unable to open collator with rules %s\n", rules);
689   }
690   ucol_close(coll);
691 }
692 
genericLocaleStarterWithOptionsAndResult(const char * locale,const char * const s[],uint32_t size,const UColAttribute * attrs,const UColAttributeValue * values,uint32_t attsize,UCollationResult result)693 void genericLocaleStarterWithOptionsAndResult(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
694   UErrorCode status = U_ZERO_ERROR;
695   uint32_t i;
696 
697   UCollator *coll = ucol_open(locale, &status);
698 
699   log_verbose("Locale starter for %s\n", locale);
700 
701   if(U_SUCCESS(status)) {
702 
703     log_verbose("Setting attributes\n");
704     for(i = 0; i < attsize; i++) {
705       ucol_setAttribute(coll, attrs[i], values[i], &status);
706     }
707 
708     genericOrderingTestWithResult(coll, s, size, result);
709   } else {
710     log_err_status(status, "Unable to open collator for locale %s\n", locale);
711   }
712   ucol_close(coll);
713 }
714 
genericLocaleStarterWithOptions(const char * locale,const char * const s[],uint32_t size,const UColAttribute * attrs,const UColAttributeValue * values,uint32_t attsize)715 void genericLocaleStarterWithOptions(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) {
716   genericLocaleStarterWithOptionsAndResult(locale, s, size, attrs, values, attsize, UCOL_LESS);
717 }
718 
genericRulesStarterWithResult(const char * rules,const char * const s[],uint32_t size,UCollationResult result)719 void genericRulesStarterWithResult(const char *rules, const char * const s[], uint32_t size, UCollationResult result) {
720   UErrorCode status = U_ZERO_ERROR;
721   UChar rlz[RULE_BUFFER_LEN] = { 0 };
722   uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
723 
724   UCollator *coll = NULL;
725   coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
726   log_verbose("Rules starter for %s\n", rules);
727 
728   if(U_SUCCESS(status)) {
729     genericOrderingTestWithResult(coll, s, size, result);
730     ucol_close(coll);
731   } else if(status == U_FILE_ACCESS_ERROR) {
732     log_data_err("Is your data around?\n");
733   } else {
734     log_err("Unable to open collator with rules %s\n", rules);
735   }
736 }
737 
genericRulesStarter(const char * rules,const char * const s[],uint32_t size)738 void genericRulesStarter(const char *rules, const char * const s[], uint32_t size) {
739   genericRulesStarterWithResult(rules, s, size, UCOL_LESS);
740 }
741 
TestTertiary()742 static void TestTertiary()
743 {
744     int32_t len,i;
745     UCollator *myCollation;
746     UErrorCode status=U_ZERO_ERROR;
747     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
748     UChar rules[sizeof(str)];
749     len = (int32_t)strlen(str);
750     u_uastrcpy(rules, str);
751 
752     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
753     if(U_FAILURE(status)){
754         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
755         return;
756     }
757 
758     ucol_setStrength(myCollation, UCOL_TERTIARY);
759     for (i = 0; i < 17 ; i++)
760     {
761         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
762     }
763     ucol_close(myCollation);
764     myCollation = 0;
765 }
766 
TestPrimary()767 static void TestPrimary( )
768 {
769     int32_t len,i;
770     UCollator *myCollation;
771     UErrorCode status=U_ZERO_ERROR;
772     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
773     UChar rules[sizeof(str)];
774     len = (int32_t)strlen(str);
775     u_uastrcpy(rules, str);
776 
777     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
778     if(U_FAILURE(status)){
779         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
780         return;
781     }
782     ucol_setStrength(myCollation, UCOL_PRIMARY);
783 
784     for (i = 17; i < 26 ; i++)
785     {
786 
787         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
788     }
789     ucol_close(myCollation);
790     myCollation = 0;
791 }
792 
TestSecondary()793 static void TestSecondary()
794 {
795     int32_t i;
796     int32_t len;
797     UCollator *myCollation;
798     UErrorCode status=U_ZERO_ERROR;
799     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
800     UChar rules[sizeof(str)];
801     len = (int32_t)strlen(str);
802     u_uastrcpy(rules, str);
803 
804     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
805     if(U_FAILURE(status)){
806         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
807         return;
808     }
809     ucol_setStrength(myCollation, UCOL_SECONDARY);
810     for (i = 26; i < 34 ; i++)
811     {
812         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
813     }
814     ucol_close(myCollation);
815     myCollation = 0;
816 }
817 
TestIdentical()818 static void TestIdentical()
819 {
820     int32_t i;
821     int32_t len;
822     UCollator *myCollation;
823     UErrorCode status=U_ZERO_ERROR;
824     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
825     UChar rules[sizeof(str)];
826     len = (int32_t)strlen(str);
827     u_uastrcpy(rules, str);
828 
829     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status);
830     if(U_FAILURE(status)){
831         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
832         return;
833     }
834     for(i= 34; i<37; i++)
835     {
836         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
837     }
838     ucol_close(myCollation);
839     myCollation = 0;
840 }
841 
TestExtra()842 static void TestExtra()
843 {
844     int32_t i, j;
845     int32_t len;
846     UCollator *myCollation;
847     UErrorCode status = U_ZERO_ERROR;
848     static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
849     UChar rules[sizeof(str)];
850     len = (int32_t)strlen(str);
851     u_uastrcpy(rules, str);
852 
853     myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
854     if(U_FAILURE(status)){
855         log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
856         return;
857     }
858     ucol_setStrength(myCollation, UCOL_TERTIARY);
859     for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
860     {
861         for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
862         {
863 
864             doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
865         }
866     }
867     ucol_close(myCollation);
868     myCollation = 0;
869 }
870 
TestJB581(void)871 static void TestJB581(void)
872 {
873     int32_t     bufferLen   = 0;
874     UChar       source      [100];
875     UChar       target      [100];
876     UCollationResult result     = UCOL_EQUAL;
877     uint8_t     sourceKeyArray  [100];
878     uint8_t     targetKeyArray  [100];
879     int32_t     sourceKeyOut    = 0,
880                 targetKeyOut    = 0;
881     UCollator   *myCollator = 0;
882     UErrorCode status = U_ZERO_ERROR;
883 
884     /*u_uastrcpy(source, "This is a test.");*/
885     /*u_uastrcpy(target, "THISISATEST.");*/
886     u_uastrcpy(source, "THISISATEST.");
887     u_uastrcpy(target, "Thisisatest.");
888 
889     myCollator = ucol_open("en_US", &status);
890     if (U_FAILURE(status)){
891         log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
892         return;
893     }
894     result = ucol_strcoll(myCollator, source, -1, target, -1);
895     /* result is 1, secondary differences only for ignorable space characters*/
896     if (result != 1)
897     {
898         log_err("Comparing two strings with only secondary differences in C failed.\n");
899     }
900     /* To compare them with just primary differences */
901     ucol_setStrength(myCollator, UCOL_PRIMARY);
902     result = ucol_strcoll(myCollator, source, -1, target, -1);
903     /* result is 0 */
904     if (result != 0)
905     {
906         log_err("Comparing two strings with no differences in C failed.\n");
907     }
908     /* Now, do the same comparison with keys */
909     sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
910     (void)sourceKeyOut;    /* Suppress set but not used warning. */
911     targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
912     bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
913     if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0)
914     {
915         log_err("Comparing two strings with sort keys in C failed.\n");
916     }
917     ucol_close(myCollator);
918 }
919 
TestJB1401(void)920 static void TestJB1401(void)
921 {
922     UCollator     *myCollator = 0;
923     UErrorCode     status = U_ZERO_ERROR;
924     static UChar   NFD_UnsafeStartChars[] = {
925         0x0f73,          /* Tibetan Vowel Sign II */
926         0x0f75,          /* Tibetan Vowel Sign UU */
927         0x0f81,          /* Tibetan Vowel Sign Reversed II */
928             0
929     };
930     int            i;
931 
932 
933     myCollator = ucol_open("en_US", &status);
934     if (U_FAILURE(status)){
935         log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
936         return;
937     }
938     ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
939     if (U_FAILURE(status)){
940         log_err("ERROR: Failed to set normalization mode ON for collator.\n");
941         return;
942     }
943 
944     for (i=0; ; i++) {
945         UChar    c;
946         UChar    X[4];
947         UChar    Y[20];
948         UChar    Z[20];
949 
950         /*  Get the next funny character to be tested, and set up the
951          *  three test strings X, Y, Z, consisting of an A-grave + test char,
952          *    in original form, NFD, and then NFC form.
953          */
954         c = NFD_UnsafeStartChars[i];
955         if (c==0) {break;}
956 
957         X[0]=0xC0; X[1]=c; X[2]=0;   /* \u00C0 is A Grave*/
958 
959         unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status);
960         unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status);
961         if (U_FAILURE(status)){
962             log_err("ERROR: Failed to normalize test of character %x\n", c);
963             return;
964         }
965 
966         /* Collation test.  All three strings should be equal.
967          *   doTest does both strcoll and sort keys, with params in both orders.
968          */
969         doTest(myCollator, X, Y, UCOL_EQUAL);
970         doTest(myCollator, X, Z, UCOL_EQUAL);
971         doTest(myCollator, Y, Z, UCOL_EQUAL);
972 
973         /* Run collation element iterators over the three strings.  Results should be same for each.
974          */
975         {
976             UCollationElements *ceiX, *ceiY, *ceiZ;
977             int32_t             ceX,   ceY,   ceZ;
978             int                 j;
979 
980             ceiX = ucol_openElements(myCollator, X, -1, &status);
981             ceiY = ucol_openElements(myCollator, Y, -1, &status);
982             ceiZ = ucol_openElements(myCollator, Z, -1, &status);
983             if (U_FAILURE(status)) {
984                 log_err("ERROR: uucol_openElements failed.\n");
985                 return;
986             }
987 
988             for (j=0;; j++) {
989                 ceX = ucol_next(ceiX, &status);
990                 ceY = ucol_next(ceiY, &status);
991                 ceZ = ucol_next(ceiZ, &status);
992                 if (U_FAILURE(status)) {
993                     log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
994                     break;
995                 }
996                 if (ceX != ceY || ceY != ceZ) {
997                     log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
998                     break;
999                 }
1000                 if (ceX == UCOL_NULLORDER) {
1001                     break;
1002                 }
1003             }
1004             ucol_closeElements(ceiX);
1005             ucol_closeElements(ceiY);
1006             ucol_closeElements(ceiZ);
1007         }
1008     }
1009     ucol_close(myCollator);
1010 }
1011 
1012 
1013 
1014 /**
1015 * Tests the [variable top] tag in rule syntax. Since the default [alternate]
1016 * tag has the value shifted, any codepoints before [variable top] should give
1017 * a primary ce of 0.
1018 */
TestVariableTop(void)1019 static void TestVariableTop(void)
1020 {
1021 #if 0
1022     /*
1023      * Starting with ICU 53, setting the variable top via a pseudo relation string
1024      * is not supported any more.
1025      * It was replaced by the [maxVariable symbol] setting.
1026      * See ICU tickets #9958 and #8032.
1027      */
1028     static const char       str[]          = "&z = [variable top]";
1029           int         len          = strlen(str);
1030           UChar      rules[sizeof(str)];
1031           UCollator  *myCollation;
1032           UCollator  *enCollation;
1033           UErrorCode  status       = U_ZERO_ERROR;
1034           UChar       source[1];
1035           UChar       ch;
1036           uint8_t     result[20];
1037           uint8_t     expected[20];
1038 
1039     u_uastrcpy(rules, str);
1040 
1041     enCollation = ucol_open("en_US", &status);
1042     if (U_FAILURE(status)) {
1043         log_err_status(status, "ERROR: in creation of collator :%s\n",
1044                 myErrorName(status));
1045         return;
1046     }
1047     myCollation = ucol_openRules(rules, len, UCOL_OFF,
1048                                  UCOL_PRIMARY,NULL, &status);
1049     if (U_FAILURE(status)) {
1050         ucol_close(enCollation);
1051         log_err("ERROR: in creation of rule based collator :%s\n",
1052                 myErrorName(status));
1053         return;
1054     }
1055 
1056     ucol_setStrength(enCollation, UCOL_PRIMARY);
1057     ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
1058                       &status);
1059     ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
1060                       &status);
1061 
1062     if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
1063         UCOL_SHIFTED || U_FAILURE(status)) {
1064         log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
1065     }
1066 
1067     uprv_memset(expected, 0, 20);
1068 
1069     /* space is supposed to be a variable */
1070     source[0] = ' ';
1071     len = ucol_getSortKey(enCollation, source, 1, result,
1072                           sizeof(result));
1073 
1074     if (uprv_memcmp(expected, result, len) != 0) {
1075         log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
1076     }
1077 
1078     ch = 'a';
1079     while (ch < 'z') {
1080         source[0] = ch;
1081         len = ucol_getSortKey(myCollation, source, 1, result,
1082                               sizeof(result));
1083         if (uprv_memcmp(expected, result, len) != 0) {
1084             log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
1085                     ch);
1086         }
1087         ch ++;
1088     }
1089 
1090     ucol_close(enCollation);
1091     ucol_close(myCollation);
1092     enCollation = NULL;
1093     myCollation = NULL;
1094 #endif
1095 }
1096 
1097 /**
1098   * Tests surrogate support.
1099   * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
1100   * Therefore, another (unassigned) code point was used for this test.
1101   */
TestSurrogates(void)1102 static void TestSurrogates(void)
1103 {
1104     static const char       str[]          =
1105                               "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
1106           int         len          = (int)strlen(str);
1107           int         rlen         = 0;
1108           UChar      rules[sizeof(str)];
1109           UCollator  *myCollation;
1110           UCollator  *enCollation;
1111           UErrorCode  status       = U_ZERO_ERROR;
1112           UChar       source[][4]    =
1113           {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
1114           UChar       target[][4]    =
1115           {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
1116           int         count        = 0;
1117           uint8_t enresult[20], myresult[20];
1118           int enlen, mylen;
1119 
1120     /* tests for open rules with surrogate rules */
1121     rlen = u_unescape(str, rules, len);
1122 
1123     enCollation = ucol_open("en_US", &status);
1124     if (U_FAILURE(status)) {
1125         log_err_status(status, "ERROR: in creation of collator :%s\n",
1126                 myErrorName(status));
1127         return;
1128     }
1129     myCollation = ucol_openRules(rules, rlen, UCOL_OFF,
1130                                  UCOL_TERTIARY,NULL, &status);
1131     if (U_FAILURE(status)) {
1132         ucol_close(enCollation);
1133         log_err("ERROR: in creation of rule based collator :%s\n",
1134                 myErrorName(status));
1135         return;
1136     }
1137 
1138     /*
1139     this test is to verify the supplementary sort key order in the english
1140     collator
1141     */
1142     log_verbose("start of english collation supplementary characters test\n");
1143     while (count < 2) {
1144         doTest(enCollation, source[count], target[count], UCOL_LESS);
1145         count ++;
1146     }
1147     doTest(enCollation, source[count], target[count], UCOL_GREATER);
1148 
1149     log_verbose("start of tailored collation supplementary characters test\n");
1150     count = 0;
1151     /* tests getting collation elements for surrogates for tailored rules */
1152     while (count < 4) {
1153         doTest(myCollation, source[count], target[count], UCOL_LESS);
1154         count ++;
1155     }
1156 
1157     /* tests that \uD800\uDC02 still has the same value, not changed */
1158     enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
1159     mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
1160     if (enlen != mylen ||
1161         uprv_memcmp(enresult, myresult, enlen) != 0) {
1162         log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
1163     }
1164 
1165     ucol_close(enCollation);
1166     ucol_close(myCollation);
1167     enCollation = NULL;
1168     myCollation = NULL;
1169 }
1170 
1171 /*
1172  *### TODO: Add more invalid rules to test all different scenarios.
1173  *
1174  */
1175 static void
TestInvalidRules()1176 TestInvalidRules(){
1177 #define MAX_ERROR_STATES 2
1178 
1179     static const char* rulesArr[MAX_ERROR_STATES] = {
1180         "& C < ch, cH, Ch[this should fail]<d",
1181         "& C < ch, cH, & Ch[variable top]"
1182     };
1183     static const char* preContextArr[MAX_ERROR_STATES] = {
1184         " C < ch, cH, Ch",
1185         "& C < ch, cH",
1186 
1187     };
1188     static const char* postContextArr[MAX_ERROR_STATES] = {
1189         "[this should fa",
1190         ", & Ch[variable"
1191     };
1192     int i;
1193 
1194     for(i = 0;i<MAX_ERROR_STATES;i++){
1195         UChar rules[1000]       = { '\0' };
1196         UChar preContextExp[1000]  = { '\0' };
1197         UChar postContextExp[1000] = { '\0' };
1198         UParseError parseError;
1199         UErrorCode status = U_ZERO_ERROR;
1200         UCollator* coll=0;
1201         u_charsToUChars(rulesArr[i], rules, (int32_t)uprv_strlen(rulesArr[i]) + 1);
1202         u_charsToUChars(preContextArr[i], preContextExp, (int32_t)uprv_strlen(preContextArr[i]) + 1);
1203         u_charsToUChars(postContextArr[i], postContextExp, (int32_t)uprv_strlen(postContextArr[i]) + 1);
1204         /* clean up stuff in parseError */
1205         u_memset(parseError.preContext, 0x0000, U_PARSE_CONTEXT_LEN);
1206         u_memset(parseError.postContext, 0x0000, U_PARSE_CONTEXT_LEN);
1207         /* open the rules and test */
1208         coll = ucol_openRules(rules, u_strlen(rules), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
1209         (void)coll;   /* Suppress set but not used warning. */
1210         if(u_strcmp(parseError.preContext,preContextExp)!=0){
1211             log_err_status(status, "preContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1212                            aescstrdup(parseError.preContext, -1));
1213         }
1214         if(u_strcmp(parseError.postContext,postContextExp)!=0){
1215             log_err_status(status, "postContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1216                            aescstrdup(parseError.postContext, -1));
1217         }
1218     }
1219 }
1220 
1221 static void
TestJitterbug1098()1222 TestJitterbug1098(){
1223     UChar rule[1000];
1224     UCollator* c1 = NULL;
1225     UErrorCode status = U_ZERO_ERROR;
1226     UParseError parseError;
1227     char preContext[200]={0};
1228     char postContext[200]={0};
1229     int i=0;
1230     const char* rules[] = {
1231          "&''<\\\\",
1232          "&\\'<\\\\",
1233          "&\\\"<'\\'",
1234          "&'\"'<\\'",
1235          NULL
1236 
1237     };
1238     const UCollationResult results1098[] = {
1239         UCOL_LESS,
1240         UCOL_LESS,
1241         UCOL_LESS,
1242         UCOL_LESS,
1243     };
1244     const UChar input[][2]= {
1245         {0x0027,0x005c},
1246         {0x0027,0x005c},
1247         {0x0022,0x005c},
1248         {0x0022,0x0027},
1249     };
1250     UChar X[2] ={0};
1251     UChar Y[2] ={0};
1252     u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
1253     u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
1254     for(;rules[i]!=0;i++){
1255         u_uastrcpy(rule, rules[i]);
1256         c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
1257         if(U_FAILURE(status)){
1258             log_err_status(status, "Could not parse the rules syntax. Error: %s\n", u_errorName(status));
1259 
1260             if (status == U_PARSE_ERROR) {
1261                 u_UCharsToChars(parseError.preContext,preContext,20);
1262                 u_UCharsToChars(parseError.postContext,postContext,20);
1263                 log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext);
1264             }
1265 
1266             return;
1267         }
1268         X[0] = input[i][0];
1269         Y[0] = input[i][1];
1270         doTest(c1,X,Y,results1098[i]);
1271         ucol_close(c1);
1272     }
1273 }
1274 
1275 static void
TestFCDCrash(void)1276 TestFCDCrash(void) {
1277     static const char *test[] = {
1278     "Gr\\u00F6\\u00DFe",
1279     "Grossist"
1280     };
1281 
1282     UErrorCode status = U_ZERO_ERROR;
1283     UCollator *coll = ucol_open("es", &status);
1284     if(U_FAILURE(status)) {
1285         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1286         return;
1287     }
1288     ucol_close(coll);
1289     coll = NULL;
1290     ctest_resetICU();
1291     coll = ucol_open("de_DE", &status);
1292     if(U_FAILURE(status)) {
1293         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1294         return;
1295     }
1296     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1297     genericOrderingTest(coll, test, 2);
1298     ucol_close(coll);
1299 }
1300 
1301 /*static UBool
1302 find(UEnumeration* list, const char* str, UErrorCode* status){
1303     const char* value = NULL;
1304     int32_t length=0;
1305     if(U_FAILURE(*status)){
1306         return false;
1307     }
1308     uenum_reset(list, status);
1309     while( (value= uenum_next(list, &length, status))!=NULL){
1310         if(strcmp(value, str)==0){
1311             return true;
1312         }
1313     }
1314     return false;
1315 }*/
1316 
TestJ5298(void)1317 static void TestJ5298(void)
1318 {
1319     UErrorCode status = U_ZERO_ERROR;
1320     char input[256], output[256];
1321     UBool isAvailable;
1322     int32_t i = 0;
1323     UEnumeration* values = NULL;
1324     const char *keywordValue = NULL;
1325     log_verbose("Number of collator locales returned : %i \n", ucol_countAvailable());
1326     values = ucol_getKeywordValues("collation", &status);
1327     while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
1328         if (strncmp(keywordValue, "private-", 8) == 0) {
1329             log_err("ucol_getKeywordValues() returns private collation keyword: %s\n", keywordValue);
1330         }
1331     }
1332     for (i = 0; i < ucol_countAvailable(); i++) {
1333         uenum_reset(values, &status);
1334         while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
1335             strcpy(input, ucol_getAvailable(i));
1336             if (strcmp(keywordValue, "standard") != 0) {
1337                 strcat(input, "@collation=");
1338                 strcat(input, keywordValue);
1339             }
1340 
1341             ucol_getFunctionalEquivalent(output, 256, "collation", input, &isAvailable, &status);
1342             if (strcmp(input, output) == 0) { /* Unique locale, print it out */
1343                 log_verbose("%s, \n", output);
1344             }
1345         }
1346     }
1347     uenum_close(values);
1348     log_verbose("\n");
1349 }
1350 
1351 static const char* badKeyLocales[] = {
1352 	"@calendar=japanese;collation=search", // ucol_open OK
1353 	"@calendar=japanese", // ucol_open OK
1354 	"en@calendar=x", // ucol_open OK
1355 	"ja@calendar=x", // ucol_open OK
1356 	"en@collation=x", // ucol_open OK
1357 	"ja@collation=x", // ucol_open OK
1358 	"ja@collation=private-kana", // ucol_open fails, verify it does not crash
1359 	"en@collation=\x80", // (x80 undef in ASCII,EBCDIC) ucol_open fails, verify it does not crash
1360 	NULL
1361 };
1362 
1363 // Mainly this is to check that we don't have a crash, but we check
1364 // for correct NULL return and FAILURE/SUCCESS status as a bonus.
TestBadKey(void)1365 static void TestBadKey(void)
1366 {
1367     const char* badLoc;
1368     const char** badLocsPtr = badKeyLocales;
1369     while ((badLoc = *badLocsPtr++) != NULL) {
1370         UErrorCode status = U_ZERO_ERROR;
1371         UCollator* uc = ucol_open(badLoc, &status);
1372         if ( U_SUCCESS(status) ) {
1373             if (uc == NULL) {
1374                 log_err("ucol_open sets SUCCESS but returns NULL, locale: %s\n", badLoc);
1375             }
1376             ucol_close(uc);
1377         } else if (uc != NULL) {
1378             log_err("ucol_open sets FAILURE but returns non-NULL, locale: %s\n", badLoc);
1379         }
1380     }
1381 }
1382 #endif /* #if !UCONFIG_NO_COLLATION */
1383