1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9 /**
10 * IntlTestCollator is the medium level test class for everything in the directory "collate".
11 */
12
13 /***********************************************************************
14 * Modification history
15 * Date Name Description
16 * 02/14/2001 synwee Compare with cintltst and commented away tests
17 * that are not run.
18 ***********************************************************************/
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_COLLATION
23
24 #include "unicode/localpointer.h"
25 #include "unicode/sortkey.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
28
29 #include "encoll.h"
30 #include "frcoll.h"
31 #include "decoll.h"
32 #include "escoll.h"
33 #include "ficoll.h"
34 #include "jacoll.h"
35 #include "trcoll.h"
36 #include "allcoll.h"
37 #include "g7coll.h"
38 #include "mnkytst.h"
39 #include "apicoll.h"
40 #include "regcoll.h"
41 #include "currcoll.h"
42 #include "itercoll.h"
43 #include "tstnorm.h"
44 #include "normconf.h"
45 #include "thcoll.h"
46 #include "srchtest.h"
47 #include "ssearch.h"
48 #include "lcukocol.h"
49 #include "ucaconf.h"
50 #include "svccoll.h"
51 #include "cmemory.h"
52 #include "alphaindextst.h"
53
54 // Set to 1 to test offsets in backAndForth()
55 #define TEST_OFFSETS 0
56
57 extern IntlTest *createCollationTest();
58
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)59 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
60 {
61 if (exec) {
62 logln("TestSuite Collator: ");
63 }
64
65 TESTCASE_AUTO_BEGIN;
66 TESTCASE_AUTO_CLASS(CollationEnglishTest);
67 TESTCASE_AUTO_CLASS(CollationFrenchTest);
68 TESTCASE_AUTO_CLASS(CollationGermanTest);
69 TESTCASE_AUTO_CLASS(CollationSpanishTest);
70 TESTCASE_AUTO_CLASS(CollationKanaTest);
71 TESTCASE_AUTO_CLASS(CollationTurkishTest);
72 TESTCASE_AUTO_CLASS(CollationDummyTest);
73 TESTCASE_AUTO_CLASS(G7CollationTest);
74 TESTCASE_AUTO_CLASS(CollationMonkeyTest);
75 TESTCASE_AUTO_CLASS(CollationAPITest);
76 TESTCASE_AUTO_CLASS(CollationRegressionTest);
77 TESTCASE_AUTO_CLASS(CollationCurrencyTest);
78 TESTCASE_AUTO_CLASS(CollationIteratorTest);
79 TESTCASE_AUTO_CLASS(CollationThaiTest);
80 TESTCASE_AUTO_CLASS(LotusCollationKoreanTest);
81 TESTCASE_AUTO_CLASS(StringSearchTest);
82 TESTCASE_AUTO_CLASS(UCAConformanceTest);
83 TESTCASE_AUTO_CLASS(CollationServiceTest);
84 TESTCASE_AUTO_CLASS(CollationFinnishTest); // removed by weiv - we have changed Finnish collation
85 TESTCASE_AUTO_CLASS(SSearchTest);
86 #if !UCONFIG_NO_NORMALIZATION
87 TESTCASE_AUTO_CLASS(AlphabeticIndexTest);
88 #endif
89 TESTCASE_AUTO_CREATE_CLASS(CollationTest);
90 TESTCASE_AUTO_END;
91 }
92
93 UCollationResult
compareUsingPartials(UCollator * coll,const UChar source[],int32_t sLen,const UChar target[],int32_t tLen,int32_t pieceSize,UErrorCode & status)94 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
95 int32_t partialSKResult = 0;
96 uint8_t sBuf[512], tBuf[512];
97 UCharIterator sIter, tIter;
98 uint32_t sState[2], tState[2];
99 int32_t sSize = pieceSize, tSize = pieceSize;
100 int32_t i = 0;
101 status = U_ZERO_ERROR;
102 sState[0] = 0; sState[1] = 0;
103 tState[0] = 0; tState[1] = 0;
104 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
105 uiter_setString(&sIter, source, sLen);
106 uiter_setString(&tIter, target, tLen);
107 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
108 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
109
110 if(sState[0] != 0 || tState[0] != 0) {
111 log("State != 0 : %08X %08X\n", sState[0], tState[0]);
112 }
113 log("%i ", i++);
114
115 partialSKResult = memcmp(sBuf, tBuf, pieceSize);
116 }
117
118 if(partialSKResult < 0) {
119 return UCOL_LESS;
120 } else if(partialSKResult > 0) {
121 return UCOL_GREATER;
122 } else {
123 return UCOL_EQUAL;
124 }
125 }
126
127 void
doTestVariant(Collator * col,const UnicodeString & source,const UnicodeString & target,Collator::EComparisonResult result)128 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
129 {
130 UErrorCode status = U_ZERO_ERROR;
131
132 UCollator *myCollation = col->toUCollator();
133
134 Collator::EComparisonResult compareResult = col->compare(source, target);
135
136 CollationKey srckey, tgtkey;
137 col->getCollationKey(source, srckey, status);
138 col->getCollationKey(target, tgtkey, status);
139 if (U_FAILURE(status)){
140 errln("Creation of collation keys failed\n");
141 }
142 Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
143
144 reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
145
146 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
147
148 int32_t sLen = source.length(), tLen = target.length();
149 const UChar* src = source.getBuffer();
150 const UChar* trg = target.getBuffer();
151 UCollationResult compareResultIter = (UCollationResult)result;
152
153 {
154 UCharIterator sIter, tIter;
155 uiter_setString(&sIter, src, sLen);
156 uiter_setString(&tIter, trg, tLen);
157 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
158 if(compareResultIter != (UCollationResult)result) {
159 errln("Different result for iterative comparison "+source+" "+target);
160 }
161 }
162 /* convert the strings to UTF-8 and do try comparing with char iterator */
163 if(!quick) { /*!QUICK*/
164 char utf8Source[256], utf8Target[256];
165 int32_t utf8SourceLen = 0, utf8TargetLen = 0;
166 u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
167 if(U_FAILURE(status)) { /* probably buffer is not big enough */
168 log("Src UTF-8 buffer too small! Will not compare!\n");
169 } else {
170 u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
171 if(U_SUCCESS(status)) { /* probably buffer is not big enough */
172 UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
173 UCharIterator sIter, tIter;
174 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
175 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
176 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
177 /*uiter_setString(&sIter, source, sLen);
178 uiter_setString(&tIter, target, tLen);*/
179 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
180 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
181 sIter.move(&sIter, 0, UITER_START);
182 tIter.move(&tIter, 0, UITER_START);
183 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
184 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
185 if(compareResultUTF8 != compareResultIter) {
186 errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
187 }
188 if(compareResultUTF8 != compareResultUTF8Norm) {
189 errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
190 }
191 } else {
192 log("Target UTF-8 buffer too small! Did not compare!\n");
193 }
194 if(U_FAILURE(status)) {
195 log("UTF-8 strcoll failed! Ignoring result\n");
196 }
197 }
198 }
199
200 /* testing the partial sortkeys */
201 { /*!QUICK*/
202 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
203 int32_t partialSizesSize = 1;
204 if(!quick) {
205 partialSizesSize = 7;
206 }
207 int32_t i = 0;
208 log("partial sortkey test piecesize=");
209 for(i = 0; i < partialSizesSize; i++) {
210 UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
211 log("%i ", partialSizes[i]);
212
213 partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
214 if(partialSKResult != (UCollationResult)result) {
215 errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
216 }
217
218 if(norm != UCOL_ON && !quick) {
219 log("N ");
220 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
221 partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
222 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
223 if(partialSKResult != partialNormalizedSKResult) {
224 errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
225 }
226 }
227 }
228 log("\n");
229 }
230 /*
231 if (compareResult != result) {
232 errln("String comparison failed in variant test\n");
233 }
234 if (keyResult != result) {
235 errln("Collation key comparison failed in variant test\n");
236 }
237 */
238 }
239
240 void
doTest(Collator * col,const UChar * source,const UChar * target,Collator::EComparisonResult result)241 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
242 doTest(col, UnicodeString(source), UnicodeString(target), result);
243 }
244
245 void
doTest(Collator * col,const UnicodeString & source,const UnicodeString & target,Collator::EComparisonResult result)246 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
247 {
248 if(col) {
249 doTestVariant(col, source, target, result);
250 if(result == Collator::LESS) {
251 doTestVariant(col, target, source, Collator::GREATER);
252 } else if (result == Collator::GREATER) {
253 doTestVariant(col, target, source, Collator::LESS);
254 }
255
256 UErrorCode status = U_ZERO_ERROR;
257 LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
258 logln("Testing iterating source: "+source);
259 backAndForth(*c);
260 c->setText(target, status);
261 logln("Testing iterating target: "+target);
262 backAndForth(*c);
263 }
264 }
265
266
267 // used for collation result reporting, defined here for convenience
268 // (maybe moved later)
269 void
reportCResult(const UnicodeString & source,const UnicodeString & target,CollationKey & sourceKey,CollationKey & targetKey,Collator::EComparisonResult compareResult,Collator::EComparisonResult keyResult,Collator::EComparisonResult incResult,Collator::EComparisonResult expectedResult)270 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
271 CollationKey &sourceKey, CollationKey &targetKey,
272 Collator::EComparisonResult compareResult,
273 Collator::EComparisonResult keyResult,
274 Collator::EComparisonResult incResult,
275 Collator::EComparisonResult expectedResult )
276 {
277 if (expectedResult < -1 || expectedResult > 1)
278 {
279 errln("***** invalid call to reportCResult ****");
280 return;
281 }
282
283 UBool ok1 = (compareResult == expectedResult);
284 UBool ok2 = (keyResult == expectedResult);
285 UBool ok3 = (incResult == expectedResult);
286
287
288 if (ok1 && ok2 && ok3 && !verbose) {
289 // Keep non-verbose, passing tests fast
290 return;
291 } else {
292 UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
293 UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
294 UnicodeString prettySource, prettyTarget, sExpect, sResult;
295
296 IntlTest::prettify(source, prettySource);
297 IntlTest::prettify(target, prettyTarget);
298 appendCompareResult(compareResult, sResult);
299 appendCompareResult(expectedResult, sExpect);
300
301 if (ok1) {
302 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
303 } else {
304 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
305 }
306
307 msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
308 msg2 = ").compareTo(key(";
309 msg3 = ")) returned ";
310
311 appendCompareResult(keyResult, sResult);
312
313 if (ok2) {
314 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
315 } else {
316 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
317
318 msg1 = " ";
319 msg2 = " vs. ";
320
321 prettify(sourceKey, prettySource);
322 prettify(targetKey, prettyTarget);
323
324 errln(msg1 + prettySource + msg2 + prettyTarget);
325 }
326 msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
327 msg2 = ", ";
328 msg3 = ") returned ";
329
330 appendCompareResult(incResult, sResult);
331
332 if (ok3) {
333 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
334 } else {
335 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
336 }
337 }
338 }
339
340 UnicodeString&
appendCompareResult(Collator::EComparisonResult result,UnicodeString & target)341 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
342 UnicodeString& target)
343 {
344 if (result == Collator::LESS)
345 {
346 target += "LESS";
347 }
348 else if (result == Collator::EQUAL)
349 {
350 target += "EQUAL";
351 }
352 else if (result == Collator::GREATER)
353 {
354 target += "GREATER";
355 }
356 else
357 {
358 UnicodeString huh = "?";
359
360 target += (huh + (int32_t)result);
361 }
362
363 return target;
364 }
365
366 // Produce a printable representation of a CollationKey
prettify(const CollationKey & source,UnicodeString & target)367 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
368 {
369 int32_t i, byteCount;
370 const uint8_t *bytes = source.getByteArray(byteCount);
371
372 target.remove();
373 target += "[";
374
375 for (i = 0; i < byteCount; i += 1)
376 {
377 if (i != 0) {
378 target += " ";
379 }
380 appendHex(bytes[i], 2, target);
381 }
382
383 target += "]";
384
385 return target;
386 }
387
backAndForth(CollationElementIterator & iter)388 void IntlTestCollator::backAndForth(CollationElementIterator &iter)
389 {
390 // Run through the iterator forwards and stick it into an array
391 int32_t orderLength = 0;
392 LocalArray<Order> orders(getOrders(iter, orderLength));
393 UErrorCode status = U_ZERO_ERROR;
394
395 // Now go through it backwards and make sure we get the same values
396 int32_t index = orderLength;
397 int32_t o;
398
399 // reset the iterator
400 iter.reset();
401
402 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
403 {
404 /*int32_t offset = */iter.getOffset();
405
406 if (index == 0) {
407 if(o == 0) {
408 continue;
409 } else { // this is an error, orders exhausted but there are non-ignorable CEs from
410 // going backwards
411 errln("Backward iteration returned a non ignorable after orders are exhausted");
412 break;
413 }
414 }
415
416 index -= 1;
417 if (o != orders[index].order) {
418 if (o == 0)
419 index += 1;
420 else {
421 while (index > 0 && orders[--index].order == 0) {
422 // nothing...
423 }
424
425 if (o != orders[index].order) {
426 errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
427 orders[index].order, o);
428 //break;
429 return;
430 }
431 }
432 }
433
434 #if TEST_OFFSETS
435 if (offset != orders[index].offset) {
436 errln("Mismatched offset at index %d: %d vs. %d", index,
437 orders[index].offset, offset);
438 //break;
439 return;
440 }
441 #endif
442
443 }
444
445 while (index != 0 && orders[index - 1].order == 0)
446 {
447 index --;
448 }
449
450 if (index != 0)
451 {
452 UnicodeString msg("Didn't get back to beginning - index is ");
453 errln(msg + index);
454
455 iter.reset();
456 err("next: ");
457 while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
458 {
459 UnicodeString hexString("0x");
460
461 appendHex(o, 8, hexString);
462 hexString += " ";
463 err(hexString);
464 }
465 errln("");
466
467 err("prev: ");
468 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
469 {
470 UnicodeString hexString("0x");
471
472 appendHex(o, 8, hexString);
473 hexString += " ";
474 err(hexString);
475 }
476 errln("");
477 }
478 }
479
480
481 /**
482 * Return an integer array containing all of the collation orders
483 * returned by calls to next on the specified iterator
484 */
getOrders(CollationElementIterator & iter,int32_t & orderLength)485 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
486 {
487 int32_t maxSize = 100;
488 int32_t size = 0;
489 LocalArray<Order> orders(new Order[maxSize]);
490 UErrorCode status = U_ZERO_ERROR;
491 int32_t offset = iter.getOffset();
492
493 int32_t order;
494 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
495 {
496 if (size == maxSize)
497 {
498 maxSize *= 2;
499 Order *temp = new Order[maxSize];
500
501 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
502 orders.adoptInstead(temp);
503 }
504
505 orders[size].order = order;
506 orders[size].offset = offset;
507
508 offset = iter.getOffset();
509 size += 1;
510 }
511 if (U_FAILURE(status)) {
512 errln("CollationElementIterator.next() failed - %s",
513 u_errorName(status));
514 }
515
516 if (maxSize > size)
517 {
518 Order *temp = new Order[size];
519
520 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
521 orders.adoptInstead(temp);
522 }
523
524 orderLength = size;
525 return orders.orphan();
526 }
527
528 #endif /* #if !UCONFIG_NO_COLLATION */
529