1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
9 *
10 * File CALLCOLL.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda Ported for C API
15 ********************************************************************************
16 */
17
18 /*
19 * Important: This file is included into intltest/allcoll.cpp so that the
20 * test data is shared. This makes it easier to maintain the test data,
21 * especially since the Unicode data must be portable and quoted character
22 * literals will not work.
23 * If it is included, then there will be a #define INCLUDE_CALLCOLL_C
24 * that must prevent the actual code in here from being part of the
25 * allcoll.cpp compilation.
26 */
27
28 /**
29 * CollationDummyTest is a third level test class. This tests creation of
30 * a customized collator object. For example, number 1 to be sorted
31 * equlivalent to word 'one'.
32 */
33
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #include <string.h>
37
38 #include "unicode/utypes.h"
39
40 #if !UCONFIG_NO_COLLATION
41
42 #include "unicode/ucol.h"
43 #include "unicode/uloc.h"
44 #include "unicode/ures.h"
45 #include "unicode/udata.h"
46 #include "unicode/ucoleitr.h"
47 #include "unicode/ustring.h"
48 #include "unicode/uclean.h"
49 #include "unicode/putil.h"
50 #include "unicode/uenum.h"
51
52 #include "cintltst.h"
53 #include "ccolltst.h"
54 #include "callcoll.h"
55 #include "calldata.h"
56 #include "cstring.h"
57 #include "cmemory.h"
58
59 /* set to 1 to test offsets in backAndForth() */
60 #define TEST_OFFSETS 0
61
62 /* perform test with strength PRIMARY */
63 static void TestPrimary(void);
64
65 /* perform test with strength SECONDARY */
66 static void TestSecondary(void);
67
68 /* perform test with strength tertiary */
69 static void TestTertiary(void);
70
71 /*perform tests with strength Identical */
72 static void TestIdentical(void);
73
74 /* perform extra tests */
75 static void TestExtra(void);
76
77 /* Test jitterbug 581 */
78 static void TestJB581(void);
79
80 /* Test jitterbug 1401 */
81 static void TestJB1401(void);
82
83 /* Test [variable top] in the rule syntax */
84 static void TestVariableTop(void);
85
86 /* Test surrogates */
87 static void TestSurrogates(void);
88
89 static void TestInvalidRules(void);
90
91 static void TestJitterbug1098(void);
92
93 static void TestFCDCrash(void);
94
95 static void TestJ5298(void);
96
97 static void TestBadKey(void);
98
99 const UCollationResult results[] = {
100 UCOL_LESS,
101 UCOL_LESS, /*UCOL_GREATER,*/
102 UCOL_LESS,
103 UCOL_LESS,
104 UCOL_LESS,
105 UCOL_LESS,
106 UCOL_LESS,
107 UCOL_GREATER,
108 UCOL_GREATER,
109 UCOL_LESS, /* 10 */
110 UCOL_GREATER,
111 UCOL_LESS,
112 UCOL_GREATER,
113 UCOL_GREATER,
114 UCOL_LESS,
115 UCOL_LESS,
116 UCOL_LESS,
117 /* test primary > 17 */
118 UCOL_EQUAL,
119 UCOL_EQUAL,
120 UCOL_EQUAL, /* 20 */
121 UCOL_LESS,
122 UCOL_LESS,
123 UCOL_EQUAL,
124 UCOL_EQUAL,
125 UCOL_EQUAL,
126 UCOL_LESS,
127 /* test secondary > 26 */
128 UCOL_EQUAL,
129 UCOL_EQUAL,
130 UCOL_EQUAL,
131 UCOL_EQUAL,
132 UCOL_EQUAL, /* 30 */
133 UCOL_EQUAL,
134 UCOL_LESS,
135 UCOL_EQUAL, /* 34 */
136 UCOL_EQUAL,
137 UCOL_EQUAL,
138 UCOL_LESS /* 37 */
139 };
140
141
142 static
uprv_appendByteToHexString(char * dst,uint8_t val)143 void uprv_appendByteToHexString(char *dst, uint8_t val) {
144 uint32_t len = (uint32_t)uprv_strlen(dst);
145 *(dst+len) = T_CString_itosOffset((val >> 4));
146 *(dst+len+1) = T_CString_itosOffset((val & 0xF));
147 *(dst+len+2) = 0;
148 }
149
150 /* this function makes a string with representation of a sortkey */
sortKeyToString(const UCollator * coll,const uint8_t * sortkey,char * buffer,uint32_t * len)151 static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) {
152 int32_t strength = UCOL_PRIMARY;
153 uint32_t res_size = 0;
154 UBool doneCase = false;
155 UErrorCode errorCode = U_ZERO_ERROR;
156
157 char *current = buffer;
158 const uint8_t *currentSk = sortkey;
159
160 uprv_strcpy(current, "[");
161
162 while(strength <= UCOL_QUATERNARY && strength <= ucol_getStrength(coll)) {
163 if(strength > UCOL_PRIMARY) {
164 uprv_strcat(current, " . ");
165 }
166 while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */
167 uprv_appendByteToHexString(current, *currentSk++);
168 uprv_strcat(current, " ");
169 }
170 if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_ON && strength == UCOL_SECONDARY && doneCase == false) {
171 doneCase = true;
172 } else if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, &errorCode) == UCOL_OFF || doneCase == true || strength != UCOL_SECONDARY) {
173 strength ++;
174 }
175 if (*currentSk) {
176 uprv_appendByteToHexString(current, *currentSk++); /* This should print '01' */
177 }
178 if(strength == UCOL_QUATERNARY && ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &errorCode) == UCOL_NON_IGNORABLE) {
179 break;
180 }
181 }
182
183 if(ucol_getStrength(coll) == UCOL_IDENTICAL) {
184 uprv_strcat(current, " . ");
185 while(*currentSk != 0) {
186 uprv_appendByteToHexString(current, *currentSk++);
187 uprv_strcat(current, " ");
188 }
189
190 uprv_appendByteToHexString(current, *currentSk++);
191 }
192 uprv_strcat(current, "]");
193
194 if(res_size > *len) {
195 return NULL;
196 }
197
198 return buffer;
199 }
200
addAllCollTest(TestNode ** root)201 void addAllCollTest(TestNode** root)
202 {
203 addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary");
204 addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary");
205 addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary");
206 addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical");
207 addTest(root, &TestExtra, "tscoll/callcoll/TestExtra");
208 addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
209 addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
210 addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates");
211 addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules");
212 addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
213 addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
214 addTest(root, &TestFCDCrash, "tscoll/callcoll/TestFCDCrash");
215 addTest(root, &TestJ5298, "tscoll/callcoll/TestJ5298");
216 addTest(root, &TestBadKey, "tscoll/callcoll/TestBadKey");
217 }
218
hasCollationElements(const char * locName)219 UBool hasCollationElements(const char *locName) {
220
221 UErrorCode status = U_ZERO_ERROR;
222
223 UResourceBundle *loc = ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll", locName, &status);
224
225 if(U_SUCCESS(status)) {
226 status = U_ZERO_ERROR;
227 loc = ures_getByKey(loc, "collations", loc, &status);
228 ures_close(loc);
229 if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
230 return true;
231 }
232 }
233 return false;
234 }
235
compareUsingPartials(UCollator * coll,const UChar source[],int32_t sLen,const UChar target[],int32_t tLen,int32_t pieceSize,UErrorCode * status)236 static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) {
237 int32_t partialSKResult = 0;
238 UCharIterator sIter, tIter;
239 uint32_t sState[2], tState[2];
240 int32_t sSize = pieceSize, tSize = pieceSize;
241 /*int32_t i = 0;*/
242 uint8_t sBuf[16384], tBuf[16384];
243 if(pieceSize > 16384) {
244 log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
245 *status = U_BUFFER_OVERFLOW_ERROR;
246 return UCOL_EQUAL;
247 }
248 *status = U_ZERO_ERROR;
249 sState[0] = 0; sState[1] = 0;
250 tState[0] = 0; tState[1] = 0;
251 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
252 uiter_setString(&sIter, source, sLen);
253 uiter_setString(&tIter, target, tLen);
254 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status);
255 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status);
256
257 if(sState[0] != 0 || tState[0] != 0) {
258 /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
259 }
260 /*log_verbose("%i ", i++);*/
261
262 partialSKResult = memcmp(sBuf, tBuf, pieceSize);
263 }
264
265 if(partialSKResult < 0) {
266 return UCOL_LESS;
267 } else if(partialSKResult > 0) {
268 return UCOL_GREATER;
269 } else {
270 return UCOL_EQUAL;
271 }
272 }
273
doTestVariant(UCollator * myCollation,const UChar source[],const UChar target[],UCollationResult result)274 static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
275 {
276 int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
277 int temp=0, gSortklen1=0,gSortklen2=0;
278 UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
279 uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
280 uint32_t sLen = u_strlen(source);
281 uint32_t tLen = u_strlen(target);
282 char buffer[256];
283 uint32_t len;
284 UErrorCode status = U_ZERO_ERROR;
285 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
286
287 UCharIterator sIter, tIter;
288
289 compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
290 if (compareResult != result) {
291 log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n",
292 compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
293 }
294 compareResulta = ucol_strcoll(myCollation, source, -1, target, -1);
295 if (compareResulta != result) {
296 log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
297 compareResult, result, aescstrdup(source,-1), aescstrdup(target,-1));
298 }
299
300 uiter_setString(&sIter, source, sLen);
301 uiter_setString(&tIter, target, tLen);
302 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
303 if(compareResultIter != result) {
304 log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
305 }
306
307 /* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */
308 {
309 char utf8Source[256], utf8Target[256];
310 int32_t utf8SourceLen = 0, utf8TargetLen = 0;
311
312 u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
313 if(U_FAILURE(status)) { /* probably buffer is not big enough */
314 log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
315 } else {
316 u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
317 if(U_SUCCESS(status)) {
318 {
319 /* ucol_strcollUTF8 */
320 compareResulta = ucol_strcollUTF8(myCollation, utf8Source, utf8SourceLen, utf8Target, utf8TargetLen, &status);
321 if (U_FAILURE(status)) {
322 log_err("Error in ucol_strcollUTF8 with explicit length\n");
323 status = U_ZERO_ERROR;
324 } else if (compareResulta != result) {
325 log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n",
326 compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
327 }
328 compareResulta = ucol_strcollUTF8(myCollation, utf8Source, -1, utf8Target, -1, &status);
329 if (U_FAILURE(status)) {
330 log_err("Error in ucol_strcollUTF8 with null terminated strings\n");
331 status = U_ZERO_ERROR;
332 } else if (compareResulta != result) {
333 log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
334 compareResulta, result, aescstrdup(source,-1), aescstrdup(target,-1));
335 }
336 }
337
338 {
339 /* char iterator over UTF8 */
340 UCollationResult compareResultUTF8Iter = result, compareResultUTF8IterNorm = result;
341
342 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
343 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
344 compareResultUTF8Iter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
345
346 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
347 sIter.move(&sIter, 0, UITER_START);
348 tIter.move(&tIter, 0, UITER_START);
349 compareResultUTF8IterNorm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
350
351 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
352 if(compareResultUTF8Iter != compareResultIter) {
353 log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
354 }
355 if(compareResultUTF8Iter != compareResultUTF8IterNorm) {
356 log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
357 }
358 }
359 } else {
360 log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
361 }
362 if(U_FAILURE(status)) {
363 log_verbose("UTF-8 strcoll failed! Ignoring result\n");
364 }
365 }
366 }
367
368 /* testing the partial sortkeys */
369 if(1) { /*!QUICK*/
370 int32_t i = 0;
371 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
372 int32_t partialSizesSize = 1;
373 if(getTestOption(QUICK_OPTION) <= 0) {
374 partialSizesSize = 7;
375 }
376 /*log_verbose("partial sortkey test piecesize=");*/
377 for(i = 0; i < partialSizesSize; i++) {
378 UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
379 /*log_verbose("%i ", partialSizes[i]);*/
380
381 partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
382 if(partialSKResult != result) {
383 log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n",
384 partialSKResult, result,
385 aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
386 }
387
388 if(getTestOption(QUICK_OPTION) <= 0 && norm != UCOL_ON) {
389 /*log_verbose("N ");*/
390 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
391 partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
392 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
393 if(partialSKResult != partialNormalizedSKResult) {
394 log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
395 aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
396 }
397 }
398 }
399 /*log_verbose("\n");*/
400 }
401
402 sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0);
403 sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0);
404
405 sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
406 sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
407 (void)sortklenmin; /* Suppress set but not used warning. */
408
409 sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
410 sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
411 ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
412 ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1);
413
414 sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
415 sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
416 ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
417 ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1);
418
419 /* Check that sort key generated with null terminated string is identical */
420 /* to that generated with a length specified. */
421 if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
422 uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
423 log_err("Sort Keys from null terminated and explicit length strings differ.\n");
424 }
425
426 /*memcmp(sortKey1, sortKey2,sortklenmax);*/
427 temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
428 gSortklen1 = (int)uprv_strlen((const char *)sortKey1)+1;
429 gSortklen2 = (int)uprv_strlen((const char *)sortKey2)+1;
430 if(sortklen1 != gSortklen1){
431 log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
432 log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len));
433 }
434 if(sortklen2!= gSortklen2){
435 log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
436 log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len));
437 }
438
439 if(temp < 0) {
440 keyResult=UCOL_LESS;
441 }
442 else if(temp > 0) {
443 keyResult= UCOL_GREATER;
444 }
445 else {
446 keyResult = UCOL_EQUAL;
447 }
448 reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
449 free(sortKey1);
450 free(sortKey2);
451 free(sortKey1a);
452 free(sortKey2a);
453
454 }
455
doTest(UCollator * myCollation,const UChar source[],const UChar target[],UCollationResult result)456 void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
457 {
458 if(myCollation) {
459 doTestVariant(myCollation, source, target, result);
460 if(result == UCOL_LESS) {
461 doTestVariant(myCollation, target, source, UCOL_GREATER);
462 } else if(result == UCOL_GREATER) {
463 doTestVariant(myCollation, target, source, UCOL_LESS);
464 } else {
465 doTestVariant(myCollation, target, source, UCOL_EQUAL);
466 }
467 } else {
468 log_data_err("No collator! Any data around?\n");
469 }
470 }
471
472
473 /**
474 * Return an integer array containing all of the collation orders
475 * returned by calls to next on the specified iterator
476 */
getOrders(UCollationElements * iter,int32_t * orderLength)477 OrderAndOffset* getOrders(UCollationElements *iter, int32_t *orderLength)
478 {
479 UErrorCode status;
480 int32_t order;
481 int32_t maxSize = 100;
482 int32_t size = 0;
483 int32_t offset = ucol_getOffset(iter);
484 OrderAndOffset *temp;
485 OrderAndOffset *orders =(OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
486 status= U_ZERO_ERROR;
487
488
489 while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
490 {
491 if (size == maxSize)
492 {
493 maxSize *= 2;
494 temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
495
496 memcpy(temp, orders, size * sizeof(OrderAndOffset));
497 free(orders);
498 orders = temp;
499
500 }
501
502 orders[size].order = order;
503 orders[size].offset = offset;
504
505 offset = ucol_getOffset(iter);
506 size += 1;
507 }
508
509 if (maxSize > size && size > 0)
510 {
511 temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * size);
512
513 memcpy(temp, orders, size * sizeof(OrderAndOffset));
514 free(orders);
515 orders = temp;
516
517
518 }
519
520 *orderLength = size;
521 return orders;
522 }
523
524
525 void
backAndForth(UCollationElements * iter)526 backAndForth(UCollationElements *iter)
527 {
528 /* Run through the iterator forwards and stick it into an array */
529 int32_t idx, o;
530 UErrorCode status = U_ZERO_ERROR;
531 int32_t orderLength = 0;
532 OrderAndOffset *orders = getOrders(iter, &orderLength);
533
534
535 /* Now go through it backwards and make sure we get the same values */
536 idx = orderLength;
537 ucol_reset(iter);
538
539 /* synwee : changed */
540 while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
541 #if TEST_OFFSETS
542 int32_t offset =
543 #endif
544 ucol_getOffset(iter);
545
546 idx -= 1;
547 if (o != orders[idx].order) {
548 if (o == 0)
549 idx ++;
550 else {
551 while (idx > 0 && orders[-- idx].order == 0) {
552 /* nothing... */
553 }
554
555 if (o != orders[idx].order) {
556 log_err("Mismatched order at index %d: 0x%8.8X vs. 0x%8.8X\n", idx,
557 orders[idx].order, o);
558 goto bail;
559 }
560 }
561 }
562
563 #if TEST_OFFSETS
564 if (offset != orders[idx].offset) {
565 log_err("Mismatched offset at index %d: %d vs. %d\n", idx,
566 orders[idx].offset, offset);
567 goto bail;
568 }
569 #endif
570
571 }
572
573 while (idx != 0 && orders[idx - 1].order == 0) {
574 idx -= 1;
575 }
576
577 if (idx != 0) {
578 log_err("Didn't get back to beginning - index is %d\n", idx);
579
580 ucol_reset(iter);
581 log_err("\nnext: ");
582
583 if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER) {
584 log_err("Error at %x\n", o);
585 }
586
587 log_err("\nprev: ");
588
589 if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
590 log_err("Error at %x\n", o);
591 }
592
593 log_verbose("\n");
594 }
595
596 bail:
597 free(orders);
598 }
599
genericOrderingTestWithResult(UCollator * coll,const char * const s[],uint32_t size,UCollationResult result)600 void genericOrderingTestWithResult(UCollator *coll, const char * const s[], uint32_t size, UCollationResult result) {
601 UChar t1[2048] = {0};
602 UChar t2[2048] = {0};
603 UCollationElements *iter;
604 UErrorCode status = U_ZERO_ERROR;
605
606 uint32_t i = 0, j = 0;
607 log_verbose("testing sequence:\n");
608 for(i = 0; i < size; i++) {
609 log_verbose("%s\n", s[i]);
610 }
611
612 iter = ucol_openElements(coll, t1, u_strlen(t1), &status);
613 if (U_FAILURE(status)) {
614 log_err("Creation of iterator failed\n");
615 }
616 for(i = 0; i < size-1; i++) {
617 for(j = i+1; j < size; j++) {
618 u_unescape(s[i], t1, 2048);
619 u_unescape(s[j], t2, 2048);
620 doTest(coll, t1, t2, result);
621 /* synwee : added collation element iterator test */
622 ucol_setText(iter, t1, u_strlen(t1), &status);
623 backAndForth(iter);
624 ucol_setText(iter, t2, u_strlen(t2), &status);
625 backAndForth(iter);
626 }
627 }
628 ucol_closeElements(iter);
629 }
630
genericOrderingTest(UCollator * coll,const char * const s[],uint32_t size)631 void genericOrderingTest(UCollator *coll, const char * const s[], uint32_t size) {
632 genericOrderingTestWithResult(coll, s, size, UCOL_LESS);
633 }
634
genericLocaleStarter(const char * locale,const char * const s[],uint32_t size)635 void genericLocaleStarter(const char *locale, const char * const s[], uint32_t size) {
636 UErrorCode status = U_ZERO_ERROR;
637 UCollator *coll = ucol_open(locale, &status);
638
639 log_verbose("Locale starter for %s\n", locale);
640
641 if(U_SUCCESS(status)) {
642 genericOrderingTest(coll, s, size);
643 } else if(status == U_FILE_ACCESS_ERROR) {
644 log_data_err("Is your data around?\n");
645 return;
646 } else {
647 log_err("Unable to open collator for locale %s\n", locale);
648 }
649 ucol_close(coll);
650 }
651
genericLocaleStarterWithResult(const char * locale,const char * const s[],uint32_t size,UCollationResult result)652 void genericLocaleStarterWithResult(const char *locale, const char * const s[], uint32_t size, UCollationResult result) {
653 UErrorCode status = U_ZERO_ERROR;
654 UCollator *coll = ucol_open(locale, &status);
655
656 log_verbose("Locale starter for %s\n", locale);
657
658 if(U_SUCCESS(status)) {
659 genericOrderingTestWithResult(coll, s, size, result);
660 } else if(status == U_FILE_ACCESS_ERROR) {
661 log_data_err("Is your data around?\n");
662 return;
663 } else {
664 log_err("Unable to open collator for locale %s\n", locale);
665 }
666 ucol_close(coll);
667 }
668
669 /* currently not used with options */
genericRulesStarterWithOptionsAndResult(const char * rules,const char * const s[],uint32_t size,const UColAttribute * attrs,const UColAttributeValue * values,uint32_t attsize,UCollationResult result)670 void genericRulesStarterWithOptionsAndResult(const char *rules, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
671 UErrorCode status = U_ZERO_ERROR;
672 UChar rlz[RULE_BUFFER_LEN] = { 0 };
673 uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
674 uint32_t i;
675
676 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
677
678 log_verbose("Rules starter for %s\n", rules);
679
680 if(U_SUCCESS(status)) {
681 log_verbose("Setting attributes\n");
682 for(i = 0; i < attsize; i++) {
683 ucol_setAttribute(coll, attrs[i], values[i], &status);
684 }
685
686 genericOrderingTestWithResult(coll, s, size, result);
687 } else {
688 log_err_status(status, "Unable to open collator with rules %s\n", rules);
689 }
690 ucol_close(coll);
691 }
692
genericLocaleStarterWithOptionsAndResult(const char * locale,const char * const s[],uint32_t size,const UColAttribute * attrs,const UColAttributeValue * values,uint32_t attsize,UCollationResult result)693 void genericLocaleStarterWithOptionsAndResult(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize, UCollationResult result) {
694 UErrorCode status = U_ZERO_ERROR;
695 uint32_t i;
696
697 UCollator *coll = ucol_open(locale, &status);
698
699 log_verbose("Locale starter for %s\n", locale);
700
701 if(U_SUCCESS(status)) {
702
703 log_verbose("Setting attributes\n");
704 for(i = 0; i < attsize; i++) {
705 ucol_setAttribute(coll, attrs[i], values[i], &status);
706 }
707
708 genericOrderingTestWithResult(coll, s, size, result);
709 } else {
710 log_err_status(status, "Unable to open collator for locale %s\n", locale);
711 }
712 ucol_close(coll);
713 }
714
genericLocaleStarterWithOptions(const char * locale,const char * const s[],uint32_t size,const UColAttribute * attrs,const UColAttributeValue * values,uint32_t attsize)715 void genericLocaleStarterWithOptions(const char *locale, const char * const s[], uint32_t size, const UColAttribute *attrs, const UColAttributeValue *values, uint32_t attsize) {
716 genericLocaleStarterWithOptionsAndResult(locale, s, size, attrs, values, attsize, UCOL_LESS);
717 }
718
genericRulesStarterWithResult(const char * rules,const char * const s[],uint32_t size,UCollationResult result)719 void genericRulesStarterWithResult(const char *rules, const char * const s[], uint32_t size, UCollationResult result) {
720 UErrorCode status = U_ZERO_ERROR;
721 UChar rlz[RULE_BUFFER_LEN] = { 0 };
722 uint32_t rlen = u_unescape(rules, rlz, RULE_BUFFER_LEN);
723
724 UCollator *coll = NULL;
725 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
726 log_verbose("Rules starter for %s\n", rules);
727
728 if(U_SUCCESS(status)) {
729 genericOrderingTestWithResult(coll, s, size, result);
730 ucol_close(coll);
731 } else if(status == U_FILE_ACCESS_ERROR) {
732 log_data_err("Is your data around?\n");
733 } else {
734 log_err("Unable to open collator with rules %s\n", rules);
735 }
736 }
737
genericRulesStarter(const char * rules,const char * const s[],uint32_t size)738 void genericRulesStarter(const char *rules, const char * const s[], uint32_t size) {
739 genericRulesStarterWithResult(rules, s, size, UCOL_LESS);
740 }
741
TestTertiary()742 static void TestTertiary()
743 {
744 int32_t len,i;
745 UCollator *myCollation;
746 UErrorCode status=U_ZERO_ERROR;
747 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
748 UChar rules[sizeof(str)];
749 len = (int32_t)strlen(str);
750 u_uastrcpy(rules, str);
751
752 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
753 if(U_FAILURE(status)){
754 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
755 return;
756 }
757
758 ucol_setStrength(myCollation, UCOL_TERTIARY);
759 for (i = 0; i < 17 ; i++)
760 {
761 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
762 }
763 ucol_close(myCollation);
764 myCollation = 0;
765 }
766
TestPrimary()767 static void TestPrimary( )
768 {
769 int32_t len,i;
770 UCollator *myCollation;
771 UErrorCode status=U_ZERO_ERROR;
772 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
773 UChar rules[sizeof(str)];
774 len = (int32_t)strlen(str);
775 u_uastrcpy(rules, str);
776
777 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
778 if(U_FAILURE(status)){
779 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
780 return;
781 }
782 ucol_setStrength(myCollation, UCOL_PRIMARY);
783
784 for (i = 17; i < 26 ; i++)
785 {
786
787 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
788 }
789 ucol_close(myCollation);
790 myCollation = 0;
791 }
792
TestSecondary()793 static void TestSecondary()
794 {
795 int32_t i;
796 int32_t len;
797 UCollator *myCollation;
798 UErrorCode status=U_ZERO_ERROR;
799 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
800 UChar rules[sizeof(str)];
801 len = (int32_t)strlen(str);
802 u_uastrcpy(rules, str);
803
804 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
805 if(U_FAILURE(status)){
806 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
807 return;
808 }
809 ucol_setStrength(myCollation, UCOL_SECONDARY);
810 for (i = 26; i < 34 ; i++)
811 {
812 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
813 }
814 ucol_close(myCollation);
815 myCollation = 0;
816 }
817
TestIdentical()818 static void TestIdentical()
819 {
820 int32_t i;
821 int32_t len;
822 UCollator *myCollation;
823 UErrorCode status=U_ZERO_ERROR;
824 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
825 UChar rules[sizeof(str)];
826 len = (int32_t)strlen(str);
827 u_uastrcpy(rules, str);
828
829 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status);
830 if(U_FAILURE(status)){
831 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
832 return;
833 }
834 for(i= 34; i<37; i++)
835 {
836 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
837 }
838 ucol_close(myCollation);
839 myCollation = 0;
840 }
841
TestExtra()842 static void TestExtra()
843 {
844 int32_t i, j;
845 int32_t len;
846 UCollator *myCollation;
847 UErrorCode status = U_ZERO_ERROR;
848 static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
849 UChar rules[sizeof(str)];
850 len = (int32_t)strlen(str);
851 u_uastrcpy(rules, str);
852
853 myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
854 if(U_FAILURE(status)){
855 log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
856 return;
857 }
858 ucol_setStrength(myCollation, UCOL_TERTIARY);
859 for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
860 {
861 for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
862 {
863
864 doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
865 }
866 }
867 ucol_close(myCollation);
868 myCollation = 0;
869 }
870
TestJB581(void)871 static void TestJB581(void)
872 {
873 int32_t bufferLen = 0;
874 UChar source [100];
875 UChar target [100];
876 UCollationResult result = UCOL_EQUAL;
877 uint8_t sourceKeyArray [100];
878 uint8_t targetKeyArray [100];
879 int32_t sourceKeyOut = 0,
880 targetKeyOut = 0;
881 UCollator *myCollator = 0;
882 UErrorCode status = U_ZERO_ERROR;
883
884 /*u_uastrcpy(source, "This is a test.");*/
885 /*u_uastrcpy(target, "THISISATEST.");*/
886 u_uastrcpy(source, "THISISATEST.");
887 u_uastrcpy(target, "Thisisatest.");
888
889 myCollator = ucol_open("en_US", &status);
890 if (U_FAILURE(status)){
891 log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
892 return;
893 }
894 result = ucol_strcoll(myCollator, source, -1, target, -1);
895 /* result is 1, secondary differences only for ignorable space characters*/
896 if (result != 1)
897 {
898 log_err("Comparing two strings with only secondary differences in C failed.\n");
899 }
900 /* To compare them with just primary differences */
901 ucol_setStrength(myCollator, UCOL_PRIMARY);
902 result = ucol_strcoll(myCollator, source, -1, target, -1);
903 /* result is 0 */
904 if (result != 0)
905 {
906 log_err("Comparing two strings with no differences in C failed.\n");
907 }
908 /* Now, do the same comparison with keys */
909 sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
910 (void)sourceKeyOut; /* Suppress set but not used warning. */
911 targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
912 bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
913 if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0)
914 {
915 log_err("Comparing two strings with sort keys in C failed.\n");
916 }
917 ucol_close(myCollator);
918 }
919
TestJB1401(void)920 static void TestJB1401(void)
921 {
922 UCollator *myCollator = 0;
923 UErrorCode status = U_ZERO_ERROR;
924 static UChar NFD_UnsafeStartChars[] = {
925 0x0f73, /* Tibetan Vowel Sign II */
926 0x0f75, /* Tibetan Vowel Sign UU */
927 0x0f81, /* Tibetan Vowel Sign Reversed II */
928 0
929 };
930 int i;
931
932
933 myCollator = ucol_open("en_US", &status);
934 if (U_FAILURE(status)){
935 log_err_status(status, "ERROR: Failed to create the collator : %s\n", u_errorName(status));
936 return;
937 }
938 ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
939 if (U_FAILURE(status)){
940 log_err("ERROR: Failed to set normalization mode ON for collator.\n");
941 return;
942 }
943
944 for (i=0; ; i++) {
945 UChar c;
946 UChar X[4];
947 UChar Y[20];
948 UChar Z[20];
949
950 /* Get the next funny character to be tested, and set up the
951 * three test strings X, Y, Z, consisting of an A-grave + test char,
952 * in original form, NFD, and then NFC form.
953 */
954 c = NFD_UnsafeStartChars[i];
955 if (c==0) {break;}
956
957 X[0]=0xC0; X[1]=c; X[2]=0; /* \u00C0 is A Grave*/
958
959 unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status);
960 unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status);
961 if (U_FAILURE(status)){
962 log_err("ERROR: Failed to normalize test of character %x\n", c);
963 return;
964 }
965
966 /* Collation test. All three strings should be equal.
967 * doTest does both strcoll and sort keys, with params in both orders.
968 */
969 doTest(myCollator, X, Y, UCOL_EQUAL);
970 doTest(myCollator, X, Z, UCOL_EQUAL);
971 doTest(myCollator, Y, Z, UCOL_EQUAL);
972
973 /* Run collation element iterators over the three strings. Results should be same for each.
974 */
975 {
976 UCollationElements *ceiX, *ceiY, *ceiZ;
977 int32_t ceX, ceY, ceZ;
978 int j;
979
980 ceiX = ucol_openElements(myCollator, X, -1, &status);
981 ceiY = ucol_openElements(myCollator, Y, -1, &status);
982 ceiZ = ucol_openElements(myCollator, Z, -1, &status);
983 if (U_FAILURE(status)) {
984 log_err("ERROR: uucol_openElements failed.\n");
985 return;
986 }
987
988 for (j=0;; j++) {
989 ceX = ucol_next(ceiX, &status);
990 ceY = ucol_next(ceiY, &status);
991 ceZ = ucol_next(ceiZ, &status);
992 if (U_FAILURE(status)) {
993 log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
994 break;
995 }
996 if (ceX != ceY || ceY != ceZ) {
997 log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
998 break;
999 }
1000 if (ceX == UCOL_NULLORDER) {
1001 break;
1002 }
1003 }
1004 ucol_closeElements(ceiX);
1005 ucol_closeElements(ceiY);
1006 ucol_closeElements(ceiZ);
1007 }
1008 }
1009 ucol_close(myCollator);
1010 }
1011
1012
1013
1014 /**
1015 * Tests the [variable top] tag in rule syntax. Since the default [alternate]
1016 * tag has the value shifted, any codepoints before [variable top] should give
1017 * a primary ce of 0.
1018 */
TestVariableTop(void)1019 static void TestVariableTop(void)
1020 {
1021 #if 0
1022 /*
1023 * Starting with ICU 53, setting the variable top via a pseudo relation string
1024 * is not supported any more.
1025 * It was replaced by the [maxVariable symbol] setting.
1026 * See ICU tickets #9958 and #8032.
1027 */
1028 static const char str[] = "&z = [variable top]";
1029 int len = strlen(str);
1030 UChar rules[sizeof(str)];
1031 UCollator *myCollation;
1032 UCollator *enCollation;
1033 UErrorCode status = U_ZERO_ERROR;
1034 UChar source[1];
1035 UChar ch;
1036 uint8_t result[20];
1037 uint8_t expected[20];
1038
1039 u_uastrcpy(rules, str);
1040
1041 enCollation = ucol_open("en_US", &status);
1042 if (U_FAILURE(status)) {
1043 log_err_status(status, "ERROR: in creation of collator :%s\n",
1044 myErrorName(status));
1045 return;
1046 }
1047 myCollation = ucol_openRules(rules, len, UCOL_OFF,
1048 UCOL_PRIMARY,NULL, &status);
1049 if (U_FAILURE(status)) {
1050 ucol_close(enCollation);
1051 log_err("ERROR: in creation of rule based collator :%s\n",
1052 myErrorName(status));
1053 return;
1054 }
1055
1056 ucol_setStrength(enCollation, UCOL_PRIMARY);
1057 ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
1058 &status);
1059 ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
1060 &status);
1061
1062 if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
1063 UCOL_SHIFTED || U_FAILURE(status)) {
1064 log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
1065 }
1066
1067 uprv_memset(expected, 0, 20);
1068
1069 /* space is supposed to be a variable */
1070 source[0] = ' ';
1071 len = ucol_getSortKey(enCollation, source, 1, result,
1072 sizeof(result));
1073
1074 if (uprv_memcmp(expected, result, len) != 0) {
1075 log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
1076 }
1077
1078 ch = 'a';
1079 while (ch < 'z') {
1080 source[0] = ch;
1081 len = ucol_getSortKey(myCollation, source, 1, result,
1082 sizeof(result));
1083 if (uprv_memcmp(expected, result, len) != 0) {
1084 log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
1085 ch);
1086 }
1087 ch ++;
1088 }
1089
1090 ucol_close(enCollation);
1091 ucol_close(myCollation);
1092 enCollation = NULL;
1093 myCollation = NULL;
1094 #endif
1095 }
1096
1097 /**
1098 * Tests surrogate support.
1099 * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
1100 * Therefore, another (unassigned) code point was used for this test.
1101 */
TestSurrogates(void)1102 static void TestSurrogates(void)
1103 {
1104 static const char str[] =
1105 "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
1106 int len = (int)strlen(str);
1107 int rlen = 0;
1108 UChar rules[sizeof(str)];
1109 UCollator *myCollation;
1110 UCollator *enCollation;
1111 UErrorCode status = U_ZERO_ERROR;
1112 UChar source[][4] =
1113 {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
1114 UChar target[][4] =
1115 {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
1116 int count = 0;
1117 uint8_t enresult[20], myresult[20];
1118 int enlen, mylen;
1119
1120 /* tests for open rules with surrogate rules */
1121 rlen = u_unescape(str, rules, len);
1122
1123 enCollation = ucol_open("en_US", &status);
1124 if (U_FAILURE(status)) {
1125 log_err_status(status, "ERROR: in creation of collator :%s\n",
1126 myErrorName(status));
1127 return;
1128 }
1129 myCollation = ucol_openRules(rules, rlen, UCOL_OFF,
1130 UCOL_TERTIARY,NULL, &status);
1131 if (U_FAILURE(status)) {
1132 ucol_close(enCollation);
1133 log_err("ERROR: in creation of rule based collator :%s\n",
1134 myErrorName(status));
1135 return;
1136 }
1137
1138 /*
1139 this test is to verify the supplementary sort key order in the english
1140 collator
1141 */
1142 log_verbose("start of english collation supplementary characters test\n");
1143 while (count < 2) {
1144 doTest(enCollation, source[count], target[count], UCOL_LESS);
1145 count ++;
1146 }
1147 doTest(enCollation, source[count], target[count], UCOL_GREATER);
1148
1149 log_verbose("start of tailored collation supplementary characters test\n");
1150 count = 0;
1151 /* tests getting collation elements for surrogates for tailored rules */
1152 while (count < 4) {
1153 doTest(myCollation, source[count], target[count], UCOL_LESS);
1154 count ++;
1155 }
1156
1157 /* tests that \uD800\uDC02 still has the same value, not changed */
1158 enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
1159 mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
1160 if (enlen != mylen ||
1161 uprv_memcmp(enresult, myresult, enlen) != 0) {
1162 log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
1163 }
1164
1165 ucol_close(enCollation);
1166 ucol_close(myCollation);
1167 enCollation = NULL;
1168 myCollation = NULL;
1169 }
1170
1171 /*
1172 *### TODO: Add more invalid rules to test all different scenarios.
1173 *
1174 */
1175 static void
TestInvalidRules()1176 TestInvalidRules(){
1177 #define MAX_ERROR_STATES 2
1178
1179 static const char* rulesArr[MAX_ERROR_STATES] = {
1180 "& C < ch, cH, Ch[this should fail]<d",
1181 "& C < ch, cH, & Ch[variable top]"
1182 };
1183 static const char* preContextArr[MAX_ERROR_STATES] = {
1184 " C < ch, cH, Ch",
1185 "& C < ch, cH",
1186
1187 };
1188 static const char* postContextArr[MAX_ERROR_STATES] = {
1189 "[this should fa",
1190 ", & Ch[variable"
1191 };
1192 int i;
1193
1194 for(i = 0;i<MAX_ERROR_STATES;i++){
1195 UChar rules[1000] = { '\0' };
1196 UChar preContextExp[1000] = { '\0' };
1197 UChar postContextExp[1000] = { '\0' };
1198 UParseError parseError;
1199 UErrorCode status = U_ZERO_ERROR;
1200 UCollator* coll=0;
1201 u_charsToUChars(rulesArr[i], rules, (int32_t)uprv_strlen(rulesArr[i]) + 1);
1202 u_charsToUChars(preContextArr[i], preContextExp, (int32_t)uprv_strlen(preContextArr[i]) + 1);
1203 u_charsToUChars(postContextArr[i], postContextExp, (int32_t)uprv_strlen(postContextArr[i]) + 1);
1204 /* clean up stuff in parseError */
1205 u_memset(parseError.preContext, 0x0000, U_PARSE_CONTEXT_LEN);
1206 u_memset(parseError.postContext, 0x0000, U_PARSE_CONTEXT_LEN);
1207 /* open the rules and test */
1208 coll = ucol_openRules(rules, u_strlen(rules), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
1209 (void)coll; /* Suppress set but not used warning. */
1210 if(u_strcmp(parseError.preContext,preContextExp)!=0){
1211 log_err_status(status, "preContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1212 aescstrdup(parseError.preContext, -1));
1213 }
1214 if(u_strcmp(parseError.postContext,postContextExp)!=0){
1215 log_err_status(status, "postContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1216 aescstrdup(parseError.postContext, -1));
1217 }
1218 }
1219 }
1220
1221 static void
TestJitterbug1098()1222 TestJitterbug1098(){
1223 UChar rule[1000];
1224 UCollator* c1 = NULL;
1225 UErrorCode status = U_ZERO_ERROR;
1226 UParseError parseError;
1227 char preContext[200]={0};
1228 char postContext[200]={0};
1229 int i=0;
1230 const char* rules[] = {
1231 "&''<\\\\",
1232 "&\\'<\\\\",
1233 "&\\\"<'\\'",
1234 "&'\"'<\\'",
1235 NULL
1236
1237 };
1238 const UCollationResult results1098[] = {
1239 UCOL_LESS,
1240 UCOL_LESS,
1241 UCOL_LESS,
1242 UCOL_LESS,
1243 };
1244 const UChar input[][2]= {
1245 {0x0027,0x005c},
1246 {0x0027,0x005c},
1247 {0x0022,0x005c},
1248 {0x0022,0x0027},
1249 };
1250 UChar X[2] ={0};
1251 UChar Y[2] ={0};
1252 u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
1253 u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
1254 for(;rules[i]!=0;i++){
1255 u_uastrcpy(rule, rules[i]);
1256 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
1257 if(U_FAILURE(status)){
1258 log_err_status(status, "Could not parse the rules syntax. Error: %s\n", u_errorName(status));
1259
1260 if (status == U_PARSE_ERROR) {
1261 u_UCharsToChars(parseError.preContext,preContext,20);
1262 u_UCharsToChars(parseError.postContext,postContext,20);
1263 log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext);
1264 }
1265
1266 return;
1267 }
1268 X[0] = input[i][0];
1269 Y[0] = input[i][1];
1270 doTest(c1,X,Y,results1098[i]);
1271 ucol_close(c1);
1272 }
1273 }
1274
1275 static void
TestFCDCrash(void)1276 TestFCDCrash(void) {
1277 static const char *test[] = {
1278 "Gr\\u00F6\\u00DFe",
1279 "Grossist"
1280 };
1281
1282 UErrorCode status = U_ZERO_ERROR;
1283 UCollator *coll = ucol_open("es", &status);
1284 if(U_FAILURE(status)) {
1285 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1286 return;
1287 }
1288 ucol_close(coll);
1289 coll = NULL;
1290 ctest_resetICU();
1291 coll = ucol_open("de_DE", &status);
1292 if(U_FAILURE(status)) {
1293 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1294 return;
1295 }
1296 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1297 genericOrderingTest(coll, test, 2);
1298 ucol_close(coll);
1299 }
1300
1301 /*static UBool
1302 find(UEnumeration* list, const char* str, UErrorCode* status){
1303 const char* value = NULL;
1304 int32_t length=0;
1305 if(U_FAILURE(*status)){
1306 return false;
1307 }
1308 uenum_reset(list, status);
1309 while( (value= uenum_next(list, &length, status))!=NULL){
1310 if(strcmp(value, str)==0){
1311 return true;
1312 }
1313 }
1314 return false;
1315 }*/
1316
TestJ5298(void)1317 static void TestJ5298(void)
1318 {
1319 UErrorCode status = U_ZERO_ERROR;
1320 char input[256], output[256];
1321 UBool isAvailable;
1322 int32_t i = 0;
1323 UEnumeration* values = NULL;
1324 const char *keywordValue = NULL;
1325 log_verbose("Number of collator locales returned : %i \n", ucol_countAvailable());
1326 values = ucol_getKeywordValues("collation", &status);
1327 while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
1328 if (strncmp(keywordValue, "private-", 8) == 0) {
1329 log_err("ucol_getKeywordValues() returns private collation keyword: %s\n", keywordValue);
1330 }
1331 }
1332 for (i = 0; i < ucol_countAvailable(); i++) {
1333 uenum_reset(values, &status);
1334 while ((keywordValue = uenum_next(values, NULL, &status)) != NULL) {
1335 strcpy(input, ucol_getAvailable(i));
1336 if (strcmp(keywordValue, "standard") != 0) {
1337 strcat(input, "@collation=");
1338 strcat(input, keywordValue);
1339 }
1340
1341 ucol_getFunctionalEquivalent(output, 256, "collation", input, &isAvailable, &status);
1342 if (strcmp(input, output) == 0) { /* Unique locale, print it out */
1343 log_verbose("%s, \n", output);
1344 }
1345 }
1346 }
1347 uenum_close(values);
1348 log_verbose("\n");
1349 }
1350
1351 static const char* badKeyLocales[] = {
1352 "@calendar=japanese;collation=search", // ucol_open OK
1353 "@calendar=japanese", // ucol_open OK
1354 "en@calendar=x", // ucol_open OK
1355 "ja@calendar=x", // ucol_open OK
1356 "en@collation=x", // ucol_open OK
1357 "ja@collation=x", // ucol_open OK
1358 "ja@collation=private-kana", // ucol_open fails, verify it does not crash
1359 "en@collation=\x80", // (x80 undef in ASCII,EBCDIC) ucol_open fails, verify it does not crash
1360 NULL
1361 };
1362
1363 // Mainly this is to check that we don't have a crash, but we check
1364 // for correct NULL return and FAILURE/SUCCESS status as a bonus.
TestBadKey(void)1365 static void TestBadKey(void)
1366 {
1367 const char* badLoc;
1368 const char** badLocsPtr = badKeyLocales;
1369 while ((badLoc = *badLocsPtr++) != NULL) {
1370 UErrorCode status = U_ZERO_ERROR;
1371 UCollator* uc = ucol_open(badLoc, &status);
1372 if ( U_SUCCESS(status) ) {
1373 if (uc == NULL) {
1374 log_err("ucol_open sets SUCCESS but returns NULL, locale: %s\n", badLoc);
1375 }
1376 ucol_close(uc);
1377 } else if (uc != NULL) {
1378 log_err("ucol_open sets FAILURE but returns non-NULL, locale: %s\n", badLoc);
1379 }
1380 }
1381 }
1382 #endif /* #if !UCONFIG_NO_COLLATION */
1383