1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 //===============================================================================
9 //
10 // File apicoll.cpp
11 //
12 //
13 //
14 // Created by: Helena Shih
15 //
16 // Modification History:
17 //
18 // Date Name Description
19 // 2/5/97 aliu Added streamIn and streamOut methods. Added
20 // constructor which reads RuleBasedCollator object from
21 // a binary file. Added writeToFile method which streams
22 // RuleBasedCollator out to a binary file. The streamIn
23 // and streamOut methods use istream and ostream objects
24 // in binary mode.
25 // 6/30/97 helena Added tests for CollationElementIterator::setText, getOffset
26 // setOffset and DecompositionIterator::getOffset, setOffset.
27 // DecompositionIterator is made public so add class scope
28 // testing.
29 // 02/10/98 damiba Added test for compare(UnicodeString&, UnicodeString&, int32_t)
30 //===============================================================================
31
32 #include "unicode/utypes.h"
33
34 #if !UCONFIG_NO_COLLATION
35
36 #include "unicode/localpointer.h"
37 #include "unicode/coll.h"
38 #include "unicode/tblcoll.h"
39 #include "unicode/coleitr.h"
40 #include "unicode/sortkey.h"
41 #include "apicoll.h"
42 #include "unicode/chariter.h"
43 #include "unicode/schriter.h"
44 #include "unicode/strenum.h"
45 #include "unicode/ustring.h"
46 #include "unicode/ucol.h"
47
48 #include "sfwdchit.h"
49 #include "cmemory.h"
50 #include <stdlib.h>
51
52 void
doAssert(UBool condition,const char * message)53 CollationAPITest::doAssert(UBool condition, const char *message)
54 {
55 if (!condition) {
56 errln(UnicodeString("ERROR : ") + message);
57 }
58 }
59
60 // Collator Class Properties
61 // ctor, dtor, createInstance, compare, getStrength/setStrength
62 // getDecomposition/setDecomposition, getDisplayName
63 void
TestProperty()64 CollationAPITest::TestProperty(/* char* par */)
65 {
66 UErrorCode success = U_ZERO_ERROR;
67 Collator *col = 0;
68 /*
69 * Expected version of the English collator.
70 * Currently, the major/minor version numbers change when the builder code
71 * changes,
72 * number 2 is from the tailoring data version and
73 * number 3 is the UCA version.
74 * This changes with every UCA version change, and the expected value
75 * needs to be adjusted.
76 * Same in cintltst/capitst.c.
77 */
78 UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A}; // from ICU 4.4/UCA 5.2
79 UVersionInfo versionArray;
80
81 logln("The property tests begin : ");
82 logln("Test ctors : ");
83 col = Collator::createInstance(Locale::getEnglish(), success);
84 if (U_FAILURE(success)){
85 errcheckln(success, "English Collator creation failed. - %s", u_errorName(success));
86 return;
87 }
88
89 col->getVersion(versionArray);
90 // Check for a version greater than some value rather than equality
91 // so that we need not update the expected version each time.
92 if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
93 errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
94 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
95 } else {
96 logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
97 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
98 }
99
100 doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed");
101 doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed");
102 doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed");
103 doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed");
104 doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed");
105 doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed");
106
107 doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed");
108 success = U_ZERO_ERROR;
109 {
110 UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape();
111 UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape();
112 UCharIterator abauIter, abssIter;
113 uiter_setReplaceable(&abauIter, &abau);
114 uiter_setReplaceable(&abssIter, &abss);
115 doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed");
116 success = U_ZERO_ERROR;
117 }
118
119 /*start of update [Bertrand A. D. 02/10/98]*/
120 doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed");
121 doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB with length 2 comparison failed");
122 doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa with length 1 comparison failed");
123 doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa with length 2 comparison failed");
124 doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed");
125 doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed");
126 doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed");
127 /*end of update [Bertrand A. D. 02/10/98]*/
128
129
130 logln("Test ctors ends.");
131 logln("testing Collator::getStrength() method ...");
132 doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength");
133 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
134
135
136 logln("testing Collator::setStrength() method ...");
137 col->setStrength(Collator::SECONDARY);
138 doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference");
139 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
140 doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength");
141
142 UnicodeString name;
143
144 logln("Get display name for the US English collation in German : ");
145 logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name));
146 doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed");
147
148 logln("Get display name for the US English collation in English : ");
149 logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name));
150 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed");
151 #if 0
152 // weiv : this test is bogus if we're running on any machine that has different default locale than English.
153 // Therefore, it is banned!
154 logln("Get display name for the US English in default locale language : ");
155 logln(Collator::getDisplayName(Locale::US, name));
156 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine");
157 #endif
158 delete col; col = 0;
159 RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK",
160 success);
161 if (U_FAILURE(success)) {
162 errcheckln(success, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success));
163 return;
164 }
165 const UnicodeString &daRules = rcol->getRules();
166 if(daRules.isEmpty()) {
167 dataerrln("missing da_DK tailoring rule string");
168 } else {
169 doAssert(daRules.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'");
170 }
171 delete rcol;
172
173 col = Collator::createInstance(Locale::getFrench(), success);
174 if (U_FAILURE(success))
175 {
176 errln("Creating French collation failed.");
177 return;
178 }
179
180 col->setStrength(Collator::PRIMARY);
181 logln("testing Collator::getStrength() method again ...");
182 doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength");
183 doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference");
184
185 logln("testing French Collator::setStrength() method ...");
186 col->setStrength(Collator::TERTIARY);
187 doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference");
188 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
189 doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference");
190 delete col;
191
192 logln("Create junk collation: ");
193 Locale abcd("ab", "CD", "");
194 success = U_ZERO_ERROR;
195 Collator *junk = 0;
196 junk = Collator::createInstance(abcd, success);
197
198 if (U_FAILURE(success))
199 {
200 errln("Junk collation creation failed, should at least return default.");
201 return;
202 }
203
204 doAssert(((RuleBasedCollator *)junk)->getRules().isEmpty(),
205 "The root collation should be returned for an unsupported language.");
206 Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success);
207 if (U_FAILURE(success))
208 {
209 errln("Creating fr_CA collator failed.");
210 delete junk;
211 return;
212 }
213
214 // If the default locale isn't French, the French and non-French collators
215 // should be different
216 if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) {
217 doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator.");
218 }
219 Collator *aFrCol = frCol->clone();
220 doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed.");
221 logln("Collator property test ended.");
222
223 delete frCol;
224 delete aFrCol;
225 delete junk;
226 }
227
TestKeywordValues()228 void CollationAPITest::TestKeywordValues() {
229 IcuTestErrorCode errorCode(*this, "TestKeywordValues");
230 LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
231 if (errorCode.errIfFailureAndReset("English Collator creation failed")) {
232 return;
233 }
234
235 LocalPointer<StringEnumeration> kwEnum(
236 col->getKeywordValuesForLocale("collation", Locale::getEnglish(), TRUE, errorCode));
237 if (errorCode.errIfFailureAndReset("Get Keyword Values for English Collator failed")) {
238 return;
239 }
240 assertTrue("expect at least one collation tailoring for English", kwEnum->count(errorCode) > 0);
241 const char *kw;
242 UBool hasStandard = FALSE;
243 while ((kw = kwEnum->next(NULL, errorCode)) != NULL) {
244 if (strcmp(kw, "standard") == 0) {
245 hasStandard = TRUE;
246 }
247 }
248 assertTrue("expect at least the 'standard' collation tailoring for English", hasStandard);
249 }
250
251 void
TestRuleBasedColl()252 CollationAPITest::TestRuleBasedColl()
253 {
254 RuleBasedCollator *col1, *col2, *col3, *col4;
255 UErrorCode status = U_ZERO_ERROR;
256
257 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
258 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
259
260 col1 = new RuleBasedCollator(ruleset1, status);
261 if (U_FAILURE(status)) {
262 errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status));
263 return;
264 }
265 else {
266 logln("PASS: RuleBased Collator creation passed\n");
267 }
268
269 status = U_ZERO_ERROR;
270 col2 = new RuleBasedCollator(ruleset2, status);
271 if (U_FAILURE(status)) {
272 errln("RuleBased Collator creation failed.\n");
273 return;
274 }
275 else {
276 logln("PASS: RuleBased Collator creation passed\n");
277 }
278
279 status = U_ZERO_ERROR;
280 Locale locale("aa", "AA");
281 col3 = (RuleBasedCollator *)Collator::createInstance(locale, status);
282 if (U_FAILURE(status)) {
283 errln("Fallback Collator creation failed.: %s\n");
284 return;
285 }
286 else {
287 logln("PASS: Fallback Collator creation passed\n");
288 }
289 delete col3;
290
291 status = U_ZERO_ERROR;
292 col3 = (RuleBasedCollator *)Collator::createInstance(status);
293 if (U_FAILURE(status)) {
294 errln("Default Collator creation failed.: %s\n");
295 return;
296 }
297 else {
298 logln("PASS: Default Collator creation passed\n");
299 }
300
301 UnicodeString rule1 = col1->getRules();
302 UnicodeString rule2 = col2->getRules();
303 UnicodeString rule3 = col3->getRules();
304
305 doAssert(rule1 != rule2, "Default collator getRules failed");
306 doAssert(rule2 != rule3, "Default collator getRules failed");
307 doAssert(rule1 != rule3, "Default collator getRules failed");
308
309 col4 = new RuleBasedCollator(rule2, status);
310 if (U_FAILURE(status)) {
311 errln("RuleBased Collator creation failed.\n");
312 return;
313 }
314
315 UnicodeString rule4 = col4->getRules();
316 doAssert(rule2 == rule4, "Default collator getRules failed");
317 int32_t length4 = 0;
318 uint8_t *clonedrule4 = col4->cloneRuleData(length4, status);
319 if (U_FAILURE(status)) {
320 errln("Cloned rule data failed.\n");
321 return;
322 }
323
324 // free(clonedrule4); BAD API!!!!
325 uprv_free(clonedrule4);
326
327
328 delete col1;
329 delete col2;
330 delete col3;
331 delete col4;
332 }
333
334 void
TestRules()335 CollationAPITest::TestRules()
336 {
337 RuleBasedCollator *coll;
338 UErrorCode status = U_ZERO_ERROR;
339 UnicodeString rules;
340
341 coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status);
342 if (U_FAILURE(status)) {
343 errcheckln(status, "English Collator creation failed. - %s", u_errorName(status));
344 return;
345 }
346 else {
347 logln("PASS: RuleBased Collator creation passed\n");
348 }
349
350 coll->getRules(UCOL_TAILORING_ONLY, rules);
351 if (rules.length() != 0x00) {
352 errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00);
353 }
354
355 coll->getRules(UCOL_FULL_RULES, rules);
356 if (rules.length() < 0) {
357 errln("English full rules failed");
358 }
359 delete coll;
360 }
361
362 void
TestDecomposition()363 CollationAPITest::TestDecomposition() {
364 UErrorCode status = U_ZERO_ERROR;
365 Collator *en_US = Collator::createInstance("en_US", status),
366 *el_GR = Collator::createInstance("el_GR", status),
367 *vi_VN = Collator::createInstance("vi_VN", status);
368
369 if (U_FAILURE(status)) {
370 errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status));
371 return;
372 }
373
374 /* there is no reason to have canonical decomposition in en_US OR default locale */
375 if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
376 {
377 errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
378 }
379
380 if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
381 {
382 errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
383 }
384
385 if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
386 {
387 errln("ERROR: en_US collation had canonical decomposition for normalization!\n");
388 }
389
390 delete en_US;
391 delete el_GR;
392 delete vi_VN;
393 }
394
395 void
TestSafeClone()396 CollationAPITest::TestSafeClone() {
397 static const int CLONETEST_COLLATOR_COUNT = 3;
398 Collator *someCollators [CLONETEST_COLLATOR_COUNT];
399 Collator *col;
400 UErrorCode err = U_ZERO_ERROR;
401 int index;
402
403 UnicodeString test1("abCda");
404 UnicodeString test2("abcda");
405
406 /* one default collator & two complex ones */
407 someCollators[0] = Collator::createInstance("en_US", err);
408 someCollators[1] = Collator::createInstance("ko", err);
409 someCollators[2] = Collator::createInstance("ja_JP", err);
410 if(U_FAILURE(err)) {
411 errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err));
412 delete someCollators[0];
413 delete someCollators[1];
414 delete someCollators[2];
415 return;
416 }
417
418 /* change orig & clone & make sure they are independent */
419
420 for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++)
421 {
422 col = someCollators[index]->safeClone();
423 if (col == 0) {
424 errln("SafeClone of collator should not return null\n");
425 break;
426 }
427 col->setStrength(Collator::TERTIARY);
428 someCollators[index]->setStrength(Collator::PRIMARY);
429 col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
430 someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
431
432 doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" ");
433 doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\"");
434 delete col;
435 delete someCollators[index];
436 }
437 }
438
439 void
TestHashCode()440 CollationAPITest::TestHashCode(/* char* par */)
441 {
442 logln("hashCode tests begin.");
443 UErrorCode success = U_ZERO_ERROR;
444 Collator *col1 = 0;
445 col1 = Collator::createInstance(Locale::getEnglish(), success);
446 if (U_FAILURE(success))
447 {
448 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
449 return;
450 }
451
452 Collator *col2 = 0;
453 Locale dk("da", "DK", "");
454 col2 = Collator::createInstance(dk, success);
455 if (U_FAILURE(success))
456 {
457 errln("Danish collation creation failed.");
458 return;
459 }
460
461 Collator *col3 = 0;
462 col3 = Collator::createInstance(Locale::getEnglish(), success);
463 if (U_FAILURE(success))
464 {
465 errln("2nd default collation creation failed.");
466 return;
467 }
468
469 logln("Collator::hashCode() testing ...");
470
471 doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" );
472 doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" );
473 doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" );
474
475 logln("hashCode tests end.");
476 delete col1;
477 delete col2;
478
479 UnicodeString test1("Abcda");
480 UnicodeString test2("abcda");
481
482 CollationKey sortk1, sortk2, sortk3;
483 UErrorCode status = U_ZERO_ERROR;
484
485 col3->getCollationKey(test1, sortk1, status);
486 col3->getCollationKey(test2, sortk2, status);
487 col3->getCollationKey(test2, sortk3, status);
488
489 doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect");
490 doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" );
491
492 delete col3;
493 }
494
495 //----------------------------------------------------------------------------
496 // CollationKey -- Tests the CollationKey methods
497 //
498 void
TestCollationKey()499 CollationAPITest::TestCollationKey(/* char* par */)
500 {
501 logln("testing CollationKey begins...");
502 Collator *col = 0;
503 UErrorCode success=U_ZERO_ERROR;
504 col = Collator::createInstance(Locale::getEnglish(), success);
505 if (U_FAILURE(success))
506 {
507 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
508 return;
509 }
510 col->setStrength(Collator::TERTIARY);
511
512 CollationKey sortk1, sortk2;
513 UnicodeString test1("Abcda"), test2("abcda");
514 UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR;
515
516 logln("Testing weird arguments");
517 // No string vs. empty string vs. completely-ignorable string:
518 // See ICU ticket #10495.
519 CollationKey sortkNone;
520 int32_t length;
521 sortkNone.getByteArray(length);
522 doAssert(!sortkNone.isBogus() && length == 0,
523 "Default-constructed collation key should be empty");
524 CollationKey sortkEmpty;
525 col->getCollationKey(NULL, 0, sortkEmpty, key1Status);
526 // key gets reset here
527 const uint8_t* byteArrayEmpty = sortkEmpty.getByteArray(length);
528 doAssert(sortkEmpty.isBogus() == FALSE && length == 3 &&
529 byteArrayEmpty[0] == 1 && byteArrayEmpty[1] == 1 && byteArrayEmpty[2] == 0,
530 "Empty string should return a collation key with empty levels");
531 doAssert(sortkNone.compareTo(sortkEmpty) == Collator::LESS,
532 "Expected no collation key < collation key for empty string");
533 doAssert(sortkEmpty.compareTo(sortkNone) == Collator::GREATER,
534 "Expected collation key for empty string > no collation key");
535
536 CollationKey sortkIgnorable;
537 // Most control codes and CGJ are completely ignorable.
538 // A string with only completely ignorables must compare equal to an empty string.
539 col->getCollationKey(UnicodeString((UChar)1).append((UChar)0x34f), sortkIgnorable, key1Status);
540 sortkIgnorable.getByteArray(length);
541 doAssert(!sortkIgnorable.isBogus() && length == 3,
542 "Completely ignorable string should return a collation key with empty levels");
543 doAssert(sortkIgnorable.compareTo(sortkEmpty) == Collator::EQUAL,
544 "Completely ignorable string should compare equal to empty string");
545
546 // bogus key returned here
547 key1Status = U_ILLEGAL_ARGUMENT_ERROR;
548 col->getCollationKey(NULL, 0, sortk1, key1Status);
549 doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0,
550 "Error code should return bogus collation key");
551
552 key1Status = U_ZERO_ERROR;
553 logln("Use tertiary comparison level testing ....");
554
555 col->getCollationKey(test1, sortk1, key1Status);
556 if (U_FAILURE(key1Status)) {
557 errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status));
558 return;
559 }
560 doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status)))
561 == Collator::GREATER,
562 "Result should be \"Abcda\" >>> \"abcda\"");
563
564 CollationKey sortk3(sortk2), sortkNew;
565
566 sortkNew = sortk1;
567 doAssert((sortk1 != sortk2), "The sort keys should be different");
568 doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed");
569 doAssert((sortk2 == sortk3), "The sort keys should be the same");
570 doAssert((sortk1 == sortkNew), "The sort keys assignment failed");
571 doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed");
572 doAssert((sortkNew != sortk3), "The sort keys should be different");
573 doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
574 doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\"");
575 doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\"");
576 doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)");
577 doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)");
578 doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
579 doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\"");
580 doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\"");
581 doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)");
582 doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)");
583
584 int32_t cnt1, cnt2, cnt3, cnt4;
585
586 const uint8_t* byteArray1 = sortk1.getByteArray(cnt1);
587 const uint8_t* byteArray2 = sortk2.getByteArray(cnt2);
588
589 const uint8_t* byteArray3 = 0;
590 byteArray3 = sortk1.getByteArray(cnt3);
591
592 const uint8_t* byteArray4 = 0;
593 byteArray4 = sortk2.getByteArray(cnt4);
594
595 CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2);
596 CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4);
597
598 doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed.");
599 doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed.");
600 doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed");
601 doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed.");
602 doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed.");
603 doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed");
604
605 logln("Equality tests : ");
606 doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed.");
607 doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed.");
608 doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed.");
609 doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed.");
610 doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed.");
611 doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed.");
612
613 byteArray1 = 0;
614 byteArray2 = 0;
615
616 sortk3 = sortk1;
617 doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed.");
618 doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed.");
619 logln("testing sortkey ends...");
620
621 col->setStrength(Collator::SECONDARY);
622 doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo(
623 col->getCollationKey(test2, sortk2, key2Status))
624 == Collator::EQUAL,
625 "Result should be \"Abcda\" == \"abcda\"");
626 delete col;
627 }
628
629 //----------------------------------------------------------------------------
630 // Tests the CollatorElementIterator class.
631 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!=
632 //
633 void
TestElemIter()634 CollationAPITest::TestElemIter(/* char* par */)
635 {
636 logln("testing sortkey begins...");
637 Collator *col = 0;
638 UErrorCode success = U_ZERO_ERROR;
639 col = Collator::createInstance(Locale::getEnglish(), success);
640 if (U_FAILURE(success))
641 {
642 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
643 return;
644 }
645
646 UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
647 UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
648 logln("Constructors and comparison testing....");
649 CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
650
651 CharacterIterator *chariter=new StringCharacterIterator(testString1);
652 CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter);
653
654 // copy ctor
655 CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
656 CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2);
657
658 int32_t offset = iterator1->getOffset();
659 if (offset != 0) {
660 errln("Error in getOffset for collation element iterator\n");
661 return;
662 }
663 iterator1->setOffset(6, success);
664 if (U_FAILURE(success)) {
665 errln("Error in setOffset for collation element iterator\n");
666 return;
667 }
668 iterator1->setOffset(0, success);
669 int32_t order1, order2, order3;
670 doAssert((*iterator1 == *iterator2), "The two iterators should be the same");
671 doAssert((*iterator1 != *iterator3), "The two iterators should be different");
672
673 doAssert((*coliter == *iterator1), "The two iterators should be the same");
674 doAssert((*coliter == *iterator2), "The two iterators should be the same");
675 doAssert((*coliter != *iterator3), "The two iterators should be different");
676
677 order1 = iterator1->next(success);
678 if (U_FAILURE(success))
679 {
680 errln("Somehow ran out of memory stepping through the iterator.");
681 return;
682 }
683
684 doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
685 order2 = iterator2->getOffset();
686 doAssert((order1 != order2), "The order result should not be the same");
687 order2 = iterator2->next(success);
688 if (U_FAILURE(success))
689 {
690 errln("Somehow ran out of memory stepping through the iterator.");
691 return;
692 }
693
694 doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
695 doAssert((order1 == order2), "The order result should be the same");
696 order3 = iterator3->next(success);
697 if (U_FAILURE(success))
698 {
699 errln("Somehow ran out of memory stepping through the iterator.");
700 return;
701 }
702
703 doAssert((CollationElementIterator::primaryOrder(order1) ==
704 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
705 doAssert((CollationElementIterator::secondaryOrder(order1) ==
706 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
707 doAssert((CollationElementIterator::tertiaryOrder(order1) ==
708 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
709
710 order1 = iterator1->next(success); order3 = iterator3->next(success);
711 if (U_FAILURE(success))
712 {
713 errln("Somehow ran out of memory stepping through the iterator.");
714 return;
715 }
716
717 doAssert((CollationElementIterator::primaryOrder(order1) ==
718 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
719 doAssert((CollationElementIterator::tertiaryOrder(order1) !=
720 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
721
722 order1 = iterator1->next(success);
723 order3 = iterator3->next(success);
724 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
725 /*
726 doAssert((CollationElementIterator::secondaryOrder(order1) !=
727 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
728 */
729 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
730
731 iterator1->reset(); iterator2->reset(); iterator3->reset();
732 order1 = iterator1->next(success);
733 if (U_FAILURE(success))
734 {
735 errln("Somehow ran out of memory stepping through the iterator.");
736 return;
737 }
738
739 doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
740
741 order2 = iterator2->next(success);
742 if (U_FAILURE(success))
743 {
744 errln("Somehow ran out of memory stepping through the iterator.");
745 return;
746 }
747
748 doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
749 doAssert((order1 == order2), "The order result should be the same");
750
751 order3 = iterator3->next(success);
752 if (U_FAILURE(success))
753 {
754 errln("Somehow ran out of memory stepping through the iterator.");
755 return;
756 }
757
758 doAssert((CollationElementIterator::primaryOrder(order1) ==
759 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
760 doAssert((CollationElementIterator::secondaryOrder(order1) ==
761 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
762 doAssert((CollationElementIterator::tertiaryOrder(order1) ==
763 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
764
765 order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success);
766 if (U_FAILURE(success))
767 {
768 errln("Somehow ran out of memory stepping through the iterator.");
769 return;
770 }
771
772 doAssert((CollationElementIterator::primaryOrder(order1) ==
773 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
774 doAssert((CollationElementIterator::tertiaryOrder(order1) !=
775 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
776
777 order1 = iterator1->next(success); order3 = iterator3->next(success);
778 if (U_FAILURE(success))
779 {
780 errln("Somehow ran out of memory stepping through the iterator.");
781 return;
782 }
783
784 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
785 /*
786 doAssert((CollationElementIterator::secondaryOrder(order1) !=
787 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
788 */
789 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
790 doAssert((*iterator2 != *iterator3), "The iterators should be different");
791
792
793 //test error values
794 success=U_UNSUPPORTED_ERROR;
795 Collator *colerror=NULL;
796 colerror=Collator::createInstance(Locale::getEnglish(), success);
797 if (colerror != 0 || success == U_ZERO_ERROR){
798 errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n");
799 }
800 int32_t position=coliter->previous(success);
801 if(position != CollationElementIterator::NULLORDER){
802 errln((UnicodeString)"Expected NULLORDER got" + position);
803 }
804 coliter->reset();
805 coliter->setText(*chariter, success);
806 if(!U_FAILURE(success)){
807 errln("Expeceted error");
808 }
809 iterator1->setText((UnicodeString)"hello there", success);
810 if(!U_FAILURE(success)){
811 errln("Expeceted error");
812 }
813
814 delete chariter;
815 delete coliter;
816 delete iterator1;
817 delete iterator2;
818 delete iterator3;
819 delete col;
820
821
822
823 logln("testing CollationElementIterator ends...");
824 }
825
826 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules
827 void
TestOperators()828 CollationAPITest::TestOperators(/* char* par */)
829 {
830 UErrorCode success = U_ZERO_ERROR;
831 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
832 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
833 RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success);
834 if (U_FAILURE(success)) {
835 errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success));
836 return;
837 }
838 success = U_ZERO_ERROR;
839 RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success);
840 if (U_FAILURE(success)) {
841 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set.");
842 return;
843 }
844 logln("The operator tests begin : ");
845 logln("testing operator==, operator!=, clone methods ...");
846 doAssert((*col1 != *col2), "The two different table collations compared equal");
847 *col1 = *col2;
848 doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)");
849
850 success = U_ZERO_ERROR;
851 Collator *col3 = Collator::createInstance(Locale::getEnglish(), success);
852 if (U_FAILURE(success)) {
853 errln("Default collation creation failed.");
854 return;
855 }
856 doAssert((*col1 != *col3), "The two different table collations compared equal");
857 Collator* col4 = col1->clone();
858 Collator* col5 = col3->clone();
859 doAssert((*col1 == *col4), "Cloned collation objects not equal");
860 doAssert((*col3 != *col4), "Two different table collations compared equal");
861 doAssert((*col3 == *col5), "Cloned collation objects not equal");
862 doAssert((*col4 != *col5), "Two cloned collations compared equal");
863
864 const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules();
865 RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success);
866 if (U_FAILURE(success)) {
867 errln("Creating default collation with rules failed.");
868 return;
869 }
870 doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed");
871
872 success = U_ZERO_ERROR;
873 RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success);
874 if (U_FAILURE(success)) {
875 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength.");
876 return;
877 }
878 success = U_ZERO_ERROR;
879 RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success);
880 if (U_FAILURE(success)) {
881 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP.");
882 return;
883 }
884 success = U_ZERO_ERROR;
885 RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success);
886 if (U_FAILURE(success)) {
887 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP.");
888 return;
889 }
890 // doAssert((*col7 == *col8), "The two equal table collations compared different");
891 doAssert((*col7 != *col9), "The two different table collations compared equal");
892 doAssert((*col8 != *col9), "The two different table collations compared equal");
893
894 logln("operator tests ended.");
895 delete col1;
896 delete col2;
897 delete col3;
898 delete col4;
899 delete col5;
900 delete col6;
901 delete col7;
902 delete col8;
903 delete col9;
904 }
905
906 // test clone and copy
907 void
TestDuplicate()908 CollationAPITest::TestDuplicate(/* char* par */)
909 {
910 UErrorCode status = U_ZERO_ERROR;
911 Collator *col1 = Collator::createInstance(Locale::getEnglish(), status);
912 if (U_FAILURE(status)) {
913 logln("Default collator creation failed.");
914 return;
915 }
916 Collator *col2 = col1->clone();
917 doAssert((*col1 == *col2), "Cloned object is not equal to the orginal");
918 UnicodeString ruleset("&9 < a, A < b, B < c, C < d, D, e, E");
919 RuleBasedCollator *col3 = new RuleBasedCollator(ruleset, status);
920 if (U_FAILURE(status)) {
921 logln("Collation tailoring failed.");
922 return;
923 }
924 doAssert((*col1 != *col3), "Cloned object is equal to some dummy");
925 *col3 = *((RuleBasedCollator*)col1);
926 doAssert((*col1 == *col3), "Copied object is not equal to the orginal");
927
928 UCollationResult res;
929 UnicodeString first((UChar)0x0061);
930 UnicodeString second((UChar)0x0062);
931 UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules());
932
933 delete col1;
934
935 // Try using the cloned collators after deleting the original data
936 res = col2->compare(first, second, status);
937 if(res != UCOL_LESS) {
938 errln("a should be less then b after tailoring");
939 }
940 if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) {
941 errln(UnicodeString("English rule difference. ")
942 + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules());
943 }
944 res = col3->compare(first, second, status);
945 if(res != UCOL_LESS) {
946 errln("a should be less then b after tailoring");
947 }
948 if (col3->getRules() != copiedEnglishRules) {
949 errln(UnicodeString("English rule difference. ")
950 + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules());
951 }
952
953 delete col2;
954 delete col3;
955 }
956
957 void
TestCompare()958 CollationAPITest::TestCompare(/* char* par */)
959 {
960 logln("The compare tests begin : ");
961 Collator *col = 0;
962 UErrorCode success = U_ZERO_ERROR;
963 col = Collator::createInstance(Locale::getEnglish(), success);
964 if (U_FAILURE(success)) {
965 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
966 return;
967 }
968 UnicodeString test1("Abcda"), test2("abcda");
969 logln("Use tertiary comparison level testing ....");
970
971 doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\"");
972 doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
973 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
974
975 col->setStrength(Collator::SECONDARY);
976 logln("Use secondary comparison level testing ....");
977
978 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
979 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
980 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
981
982 col->setStrength(Collator::PRIMARY);
983 logln("Use primary comparison level testing ....");
984
985 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
986 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
987 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
988
989 // Test different APIs
990 const UChar* t1 = test1.getBuffer();
991 int32_t t1Len = test1.length();
992 const UChar* t2 = test2.getBuffer();
993 int32_t t2Len = test2.length();
994
995 doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem");
996 doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem");
997 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem");
998 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem");
999 doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem");
1000 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem");
1001
1002 col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success);
1003 doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem");
1004 doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem");
1005 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem");
1006 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem");
1007 doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem");
1008 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem");
1009
1010
1011
1012 logln("The compare tests end.");
1013 delete col;
1014 }
1015
1016 void
TestGetAll()1017 CollationAPITest::TestGetAll(/* char* par */)
1018 {
1019 int32_t count1, count2;
1020 UErrorCode status = U_ZERO_ERROR;
1021
1022 logln("Trying Collator::getAvailableLocales(int&)");
1023
1024 const Locale* list = Collator::getAvailableLocales(count1);
1025 for (int32_t i = 0; i < count1; ++i) {
1026 UnicodeString dispName;
1027 logln(UnicodeString("Locale name: ")
1028 + UnicodeString(list[i].getName())
1029 + UnicodeString(" , the display name is : ")
1030 + UnicodeString(list[i].getDisplayName(dispName)));
1031 }
1032
1033 if (count1 == 0 || list == NULL) {
1034 dataerrln("getAvailableLocales(int&) returned an empty list");
1035 }
1036
1037 logln("Trying Collator::getAvailableLocales()");
1038 StringEnumeration* localeEnum = Collator::getAvailableLocales();
1039 const UnicodeString* locStr;
1040 const char *locCStr;
1041 count2 = 0;
1042
1043 if (localeEnum == NULL) {
1044 dataerrln("getAvailableLocales() returned NULL");
1045 return;
1046 }
1047
1048 while ((locStr = localeEnum->snext(status)) != NULL)
1049 {
1050 logln(UnicodeString("Locale name is: ") + *locStr);
1051 count2++;
1052 }
1053 if (count1 != count2) {
1054 errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2);
1055 }
1056
1057 logln("Trying Collator::getAvailableLocales() clone");
1058 count1 = 0;
1059 StringEnumeration* localeEnum2 = localeEnum->clone();
1060 localeEnum2->reset(status);
1061 while ((locCStr = localeEnum2->next(NULL, status)) != NULL)
1062 {
1063 logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr));
1064 count1++;
1065 }
1066 if (count1 != count2) {
1067 errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2);
1068 }
1069 if (localeEnum->count(status) != count1) {
1070 errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1);
1071 }
1072 delete localeEnum;
1073 delete localeEnum2;
1074 }
1075
TestSortKey()1076 void CollationAPITest::TestSortKey()
1077 {
1078 UErrorCode status = U_ZERO_ERROR;
1079 /*
1080 this is supposed to open default date format, but later on it treats
1081 it like it is "en_US"
1082 - very bad if you try to run the tests on machine where default
1083 locale is NOT "en_US"
1084 */
1085 Collator *col = Collator::createInstance(Locale::getEnglish(), status);
1086 if (U_FAILURE(status)) {
1087 errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status));
1088 return;
1089 }
1090
1091 if (col->getStrength() != Collator::TERTIARY)
1092 {
1093 errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1094 }
1095
1096 /* Need to use identical strength */
1097 col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status);
1098
1099 UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
1100 test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
1101 test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
1102
1103 uint8_t sortkey1[64];
1104 uint8_t sortkey2[64];
1105 uint8_t sortkey3[64];
1106
1107 logln("Use tertiary comparison level testing ....\n");
1108
1109 CollationKey key1;
1110 col->getCollationKey(test1, u_strlen(test1), key1, status);
1111
1112 CollationKey key2;
1113 col->getCollationKey(test2, u_strlen(test2), key2, status);
1114
1115 CollationKey key3;
1116 col->getCollationKey(test3, u_strlen(test3), key3, status);
1117
1118 doAssert(key1.compareTo(key2) == Collator::GREATER,
1119 "Result should be \"Abcda\" > \"abcda\"");
1120 doAssert(key2.compareTo(key1) == Collator::LESS,
1121 "Result should be \"abcda\" < \"Abcda\"");
1122 doAssert(key2.compareTo(key3) == Collator::EQUAL,
1123 "Result should be \"abcda\" == \"abcda\"");
1124
1125 // Clone the key2 sortkey for later.
1126 int32_t keylength = 0;
1127 const uint8_t *key2primary_alias = key2.getByteArray(keylength);
1128 LocalArray<uint8_t> key2primary(new uint8_t[keylength]);
1129 memcpy(key2primary.getAlias(), key2primary_alias, keylength);
1130
1131 col->getSortKey(test1, sortkey1, 64);
1132 col->getSortKey(test2, sortkey2, 64);
1133 col->getSortKey(test3, sortkey3, 64);
1134
1135 const uint8_t *tempkey = key1.getByteArray(keylength);
1136 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1137 "Test1 string should have the same collation key and sort key");
1138 tempkey = key2.getByteArray(keylength);
1139 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1140 "Test2 string should have the same collation key and sort key");
1141 tempkey = key3.getByteArray(keylength);
1142 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1143 "Test3 string should have the same collation key and sort key");
1144
1145 col->getSortKey(test1, 5, sortkey1, 64);
1146 col->getSortKey(test2, 5, sortkey2, 64);
1147 col->getSortKey(test3, 5, sortkey3, 64);
1148
1149 tempkey = key1.getByteArray(keylength);
1150 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1151 "Test1 string should have the same collation key and sort key");
1152 tempkey = key2.getByteArray(keylength);
1153 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1154 "Test2 string should have the same collation key and sort key");
1155 tempkey = key3.getByteArray(keylength);
1156 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1157 "Test3 string should have the same collation key and sort key");
1158
1159 UnicodeString strtest1(test1);
1160 col->getSortKey(strtest1, sortkey1, 64);
1161 UnicodeString strtest2(test2);
1162 col->getSortKey(strtest2, sortkey2, 64);
1163 UnicodeString strtest3(test3);
1164 col->getSortKey(strtest3, sortkey3, 64);
1165
1166 tempkey = key1.getByteArray(keylength);
1167 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1168 "Test1 string should have the same collation key and sort key");
1169 tempkey = key2.getByteArray(keylength);
1170 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1171 "Test2 string should have the same collation key and sort key");
1172 tempkey = key3.getByteArray(keylength);
1173 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1174 "Test3 string should have the same collation key and sort key");
1175
1176 logln("Use secondary comparison level testing ...\n");
1177 col->setStrength(Collator::SECONDARY);
1178
1179 col->getCollationKey(test1, u_strlen(test1), key1, status);
1180 col->getCollationKey(test2, u_strlen(test2), key2, status);
1181 col->getCollationKey(test3, u_strlen(test3), key3, status);
1182
1183 doAssert(key1.compareTo(key2) == Collator::EQUAL,
1184 "Result should be \"Abcda\" == \"abcda\"");
1185 doAssert(key2.compareTo(key3) == Collator::EQUAL,
1186 "Result should be \"abcda\" == \"abcda\"");
1187
1188 tempkey = key2.getByteArray(keylength);
1189 doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0,
1190 "Binary format for 'abcda' sortkey different for secondary strength!");
1191
1192 col->getSortKey(test1, sortkey1, 64);
1193 col->getSortKey(test2, sortkey2, 64);
1194 col->getSortKey(test3, sortkey3, 64);
1195
1196 tempkey = key1.getByteArray(keylength);
1197 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1198 "Test1 string should have the same collation key and sort key");
1199 tempkey = key2.getByteArray(keylength);
1200 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1201 "Test2 string should have the same collation key and sort key");
1202 tempkey = key3.getByteArray(keylength);
1203 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1204 "Test3 string should have the same collation key and sort key");
1205
1206 col->getSortKey(test1, 5, sortkey1, 64);
1207 col->getSortKey(test2, 5, sortkey2, 64);
1208 col->getSortKey(test3, 5, sortkey3, 64);
1209
1210 tempkey = key1.getByteArray(keylength);
1211 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1212 "Test1 string should have the same collation key and sort key");
1213 tempkey = key2.getByteArray(keylength);
1214 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1215 "Test2 string should have the same collation key and sort key");
1216 tempkey = key3.getByteArray(keylength);
1217 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1218 "Test3 string should have the same collation key and sort key");
1219
1220 col->getSortKey(strtest1, sortkey1, 64);
1221 col->getSortKey(strtest2, sortkey2, 64);
1222 col->getSortKey(strtest3, sortkey3, 64);
1223
1224 tempkey = key1.getByteArray(keylength);
1225 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1226 "Test1 string should have the same collation key and sort key");
1227 tempkey = key2.getByteArray(keylength);
1228 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1229 "Test2 string should have the same collation key and sort key");
1230 tempkey = key3.getByteArray(keylength);
1231 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1232 "Test3 string should have the same collation key and sort key");
1233
1234 logln("testing sortkey ends...");
1235 delete col;
1236 }
1237
TestSortKeyOverflow()1238 void CollationAPITest::TestSortKeyOverflow() {
1239 IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()");
1240 LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
1241 if (errorCode.errDataIfFailureAndReset("Collator::createInstance(English) failed")) {
1242 return;
1243 }
1244 col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
1245 UChar i_and_phi[] = { 0x438, 0x3c6 }; // Cyrillic small i & Greek small phi.
1246 // The sort key should be 6 bytes:
1247 // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator,
1248 // 2 bytes for the Greek phi, and 1 byte for the NUL terminator.
1249 uint8_t sortKey[12];
1250 int32_t length = col->getSortKey(i_and_phi, 2, sortKey, UPRV_LENGTHOF(sortKey));
1251 uint8_t sortKey2[12];
1252 for (int32_t capacity = 0; capacity < length; ++capacity) {
1253 uprv_memset(sortKey2, 2, UPRV_LENGTHOF(sortKey2));
1254 int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity);
1255 if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) {
1256 errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity);
1257 } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) {
1258 errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity);
1259 }
1260 }
1261
1262 // Now try to break getCollationKey().
1263 // Internally, it always starts with a large stack buffer.
1264 // Since we cannot control the initial capacity, we throw an increasing number
1265 // of characters at it, with the problematic part at the end.
1266 const int32_t longCapacity = 2000;
1267 // Each 'a' in the prefix should result in one primary sort key byte.
1268 // For i_and_phi we expect 6 bytes, then the NUL terminator.
1269 const int32_t maxPrefixLength = longCapacity - 6 - 1;
1270 LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]);
1271 UnicodeString s(FALSE, i_and_phi, 2);
1272 for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) {
1273 length = col->getSortKey(s, longSortKey.getAlias(), longCapacity);
1274 CollationKey collKey;
1275 col->getCollationKey(s, collKey, errorCode);
1276 int32_t collKeyLength;
1277 const uint8_t *collSortKey = collKey.getByteArray(collKeyLength);
1278 if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) {
1279 errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength);
1280 }
1281
1282 // Insert an 'a' to match ++prefixLength.
1283 s.insert(prefixLength, (UChar)0x61);
1284 }
1285 }
1286
TestMaxExpansion()1287 void CollationAPITest::TestMaxExpansion()
1288 {
1289 UErrorCode status = U_ZERO_ERROR;
1290 UChar ch = 0;
1291 UChar32 unassigned = 0xEFFFD;
1292 uint32_t sorder = 0;
1293 uint32_t temporder = 0;
1294
1295 UnicodeString rule("&a < ab < c/aba < d < z < ch");
1296 RuleBasedCollator coll(rule, status);
1297 if(U_FAILURE(status)) {
1298 errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1299 return;
1300 }
1301 UnicodeString str(ch);
1302 CollationElementIterator *iter =
1303 coll.createCollationElementIterator(str);
1304
1305 while (ch < 0xFFFF && U_SUCCESS(status)) {
1306 int count = 1;
1307 uint32_t order;
1308 int32_t size = 0;
1309
1310 ch ++;
1311
1312 str.setCharAt(0, ch);
1313 iter->setText(str, status);
1314 order = iter->previous(status);
1315
1316 /* thai management */
1317 if (order == 0)
1318 order = iter->previous(status);
1319
1320 while (U_SUCCESS(status) && iter->previous(status) != CollationElementIterator::NULLORDER) {
1321 count ++;
1322 }
1323
1324 size = coll.getMaxExpansion(order);
1325 if (U_FAILURE(status) || size < count) {
1326 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1327 ch, size, count);
1328 }
1329 }
1330
1331 /* testing for exact max expansion */
1332 int32_t size;
1333 ch = 0;
1334 while (ch < 0x61) {
1335 uint32_t order;
1336 str.setCharAt(0, ch);
1337 iter->setText(str, status);
1338 order = iter->previous(status);
1339 size = coll.getMaxExpansion(order);
1340 if (U_FAILURE(status) || size != 1) {
1341 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1342 ch, size, 1);
1343 }
1344 ch ++;
1345 }
1346
1347 ch = 0x63;
1348 str.setTo(ch);
1349 iter->setText(str, status);
1350 temporder = iter->previous(status);
1351 size = coll.getMaxExpansion(temporder);
1352 if (U_FAILURE(status) || size != 3) {
1353 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1354 ch, temporder, size, 3);
1355 }
1356
1357 ch = 0x64;
1358 str.setTo(ch);
1359 iter->setText(str, status);
1360 temporder = iter->previous(status);
1361 size = coll.getMaxExpansion(temporder);
1362 if (U_FAILURE(status) || size != 1) {
1363 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1364 ch, temporder, size, 1);
1365 }
1366
1367 str.setTo(unassigned);
1368 iter->setText(str, status);
1369 sorder = iter->previous(status);
1370 size = coll.getMaxExpansion(sorder);
1371 if (U_FAILURE(status) || size != 2) {
1372 errln("Failure at supplementary codepoints, maximum expansion count %d < %d",
1373 size, 2);
1374 }
1375
1376 /* testing jamo */
1377 ch = 0x1165;
1378 str.setTo(ch);
1379 iter->setText(str, status);
1380 temporder = iter->previous(status);
1381 size = coll.getMaxExpansion(temporder);
1382 if (U_FAILURE(status) || size > 3) {
1383 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1384 ch, size, 3);
1385 }
1386
1387 delete iter;
1388
1389 /* testing special jamo &a<\u1160 */
1390 rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071");
1391
1392 RuleBasedCollator jamocoll(rule, status);
1393 iter = jamocoll.createCollationElementIterator(str);
1394 temporder = iter->previous(status);
1395 size = iter->getMaxExpansion(temporder);
1396 if (U_FAILURE(status) || size != 6) {
1397 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1398 ch, size, 5);
1399 }
1400
1401 delete iter;
1402 }
1403
TestDisplayName()1404 void CollationAPITest::TestDisplayName()
1405 {
1406 UErrorCode error = U_ZERO_ERROR;
1407 Collator *coll = Collator::createInstance("en_US", error);
1408 if (U_FAILURE(error)) {
1409 errcheckln(error, "Failure creating english collator - %s", u_errorName(error));
1410 return;
1411 }
1412 UnicodeString name;
1413 UnicodeString result;
1414 coll->getDisplayName(Locale::getCanadaFrench(), result);
1415 Locale::getCanadaFrench().getDisplayName(name);
1416 if (result.compare(name)) {
1417 errln("Failure getting the correct name for locale en_US");
1418 }
1419
1420 coll->getDisplayName(Locale::getSimplifiedChinese(), result);
1421 Locale::getSimplifiedChinese().getDisplayName(name);
1422 if (result.compare(name)) {
1423 errln("Failure getting the correct name for locale zh_SG");
1424 }
1425 delete coll;
1426 }
1427
TestAttribute()1428 void CollationAPITest::TestAttribute()
1429 {
1430 UErrorCode error = U_ZERO_ERROR;
1431 Collator *coll = Collator::createInstance(error);
1432
1433 if (U_FAILURE(error)) {
1434 errcheckln(error, "Creation of default collator failed - %s", u_errorName(error));
1435 return;
1436 }
1437
1438 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error);
1439 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF ||
1440 U_FAILURE(error)) {
1441 errln("Setting and retrieving of the french collation failed");
1442 }
1443
1444 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error);
1445 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON ||
1446 U_FAILURE(error)) {
1447 errln("Setting and retrieving of the french collation failed");
1448 }
1449
1450 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error);
1451 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED ||
1452 U_FAILURE(error)) {
1453 errln("Setting and retrieving of the alternate handling failed");
1454 }
1455
1456 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error);
1457 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE ||
1458 U_FAILURE(error)) {
1459 errln("Setting and retrieving of the alternate handling failed");
1460 }
1461
1462 coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error);
1463 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST ||
1464 U_FAILURE(error)) {
1465 errln("Setting and retrieving of the case first attribute failed");
1466 }
1467
1468 coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error);
1469 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST ||
1470 U_FAILURE(error)) {
1471 errln("Setting and retrieving of the case first attribute failed");
1472 }
1473
1474 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error);
1475 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON ||
1476 U_FAILURE(error)) {
1477 errln("Setting and retrieving of the case level attribute failed");
1478 }
1479
1480 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error);
1481 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF ||
1482 U_FAILURE(error)) {
1483 errln("Setting and retrieving of the case level attribute failed");
1484 }
1485
1486 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error);
1487 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON ||
1488 U_FAILURE(error)) {
1489 errln("Setting and retrieving of the normalization on/off attribute failed");
1490 }
1491
1492 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error);
1493 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF ||
1494 U_FAILURE(error)) {
1495 errln("Setting and retrieving of the normalization on/off attribute failed");
1496 }
1497
1498 coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error);
1499 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY ||
1500 U_FAILURE(error)) {
1501 errln("Setting and retrieving of the collation strength failed");
1502 }
1503
1504 coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error);
1505 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY ||
1506 U_FAILURE(error)) {
1507 errln("Setting and retrieving of the collation strength failed");
1508 }
1509
1510 coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error);
1511 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY ||
1512 U_FAILURE(error)) {
1513 errln("Setting and retrieving of the collation strength failed");
1514 }
1515
1516 coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error);
1517 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY ||
1518 U_FAILURE(error)) {
1519 errln("Setting and retrieving of the collation strength failed");
1520 }
1521
1522 coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error);
1523 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL ||
1524 U_FAILURE(error)) {
1525 errln("Setting and retrieving of the collation strength failed");
1526 }
1527
1528 delete coll;
1529 }
1530
TestVariableTopSetting()1531 void CollationAPITest::TestVariableTopSetting() {
1532 UErrorCode status = U_ZERO_ERROR;
1533
1534 UChar vt[256] = { 0 };
1535
1536 // Use the root collator, not the default collator.
1537 // This test fails with en_US_POSIX which tailors the dollar sign after 'A'.
1538 Collator *coll = Collator::createInstance(Locale::getRoot(), status);
1539 if(U_FAILURE(status)) {
1540 delete coll;
1541 errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1542 return;
1543 }
1544
1545 uint32_t oldVarTop = coll->getVariableTop(status);
1546
1547 // ICU 53+: The character must be in a supported reordering group,
1548 // and the variable top is pinned to the end of that group.
1549 vt[0] = 0x0041;
1550
1551 (void)coll->setVariableTop(vt, 1, status);
1552 if(status != U_ILLEGAL_ARGUMENT_ERROR) {
1553 errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status));
1554 }
1555
1556 status = U_ZERO_ERROR;
1557 vt[0] = 0x24; // dollar sign (currency symbol)
1558 uint32_t newVarTop = coll->setVariableTop(vt, 1, status);
1559 if(U_FAILURE(status)) {
1560 errln("setVariableTop(dollar sign) failed: %s", u_errorName(status));
1561 return;
1562 }
1563 if(newVarTop != coll->getVariableTop(status)) {
1564 errln("setVariableTop(dollar sign) != following getVariableTop()");
1565 }
1566
1567 UnicodeString dollar((UChar)0x24);
1568 UnicodeString euro((UChar)0x20AC);
1569 uint32_t newVarTop2 = coll->setVariableTop(euro, status);
1570 assertEquals("setVariableTop(Euro sign) == following getVariableTop()",
1571 (int64_t)newVarTop2, (int64_t)coll->getVariableTop(status));
1572 assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)",
1573 (int64_t)newVarTop2, (int64_t)newVarTop);
1574
1575 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1576 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), dollar));
1577 assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), euro));
1578 assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(dollar, UnicodeString((UChar)0x30)));
1579
1580 coll->setVariableTop(oldVarTop, status);
1581
1582 uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status);
1583
1584 if(newVarTop != newerVarTop) {
1585 errln("Didn't set vartop properly from UnicodeString!\n");
1586 }
1587
1588 delete coll;
1589
1590 }
1591
TestMaxVariable()1592 void CollationAPITest::TestMaxVariable() {
1593 UErrorCode errorCode = U_ZERO_ERROR;
1594 LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), errorCode));
1595 if(U_FAILURE(errorCode)) {
1596 errcheckln(errorCode, "Collator creation failed with error %s", u_errorName(errorCode));
1597 return;
1598 }
1599
1600 (void)coll->setMaxVariable(UCOL_REORDER_CODE_OTHERS, errorCode);
1601 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
1602 errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode));
1603 }
1604
1605 errorCode = U_ZERO_ERROR;
1606 (void)coll->setMaxVariable(UCOL_REORDER_CODE_CURRENCY, errorCode);
1607
1608 if(UCOL_REORDER_CODE_CURRENCY != coll->getMaxVariable()) {
1609 errln("setMaxVariable(currency) != following getMaxVariable()");
1610 }
1611
1612 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, errorCode);
1613 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x24)));
1614 assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x20AC)));
1615 assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(UnicodeString((UChar)0x24), UnicodeString((UChar)0x30)));
1616 }
1617
TestGetLocale()1618 void CollationAPITest::TestGetLocale() {
1619 UErrorCode status = U_ZERO_ERROR;
1620 const char *rules = "&a<x<y<z";
1621 UChar rlz[256] = {0};
1622
1623 Collator *coll = Collator::createInstance("root", status);
1624 if(U_FAILURE(status)) {
1625 dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status));
1626 return;
1627 }
1628 Locale locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1629 if(locale != Locale::getRoot()) {
1630 errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); "
1631 "getLocale().getName() = \"%s\"",
1632 locale.getName());
1633 }
1634 delete coll;
1635
1636 coll = Collator::createInstance("", status);
1637 if(U_FAILURE(status)) {
1638 dataerrln("Failed to open collator for \"\" with %s", u_errorName(status));
1639 return;
1640 }
1641 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1642 if(locale != Locale::getRoot()) {
1643 errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); "
1644 "getLocale().getName() = \"%s\"",
1645 locale.getName());
1646 }
1647 delete coll;
1648
1649 int32_t i = 0;
1650
1651 static const struct {
1652 const char* requestedLocale;
1653 const char* validLocale;
1654 const char* actualLocale;
1655 } testStruct[] = {
1656 // Note: Locale::getRoot().getName() == "" not "root".
1657 { "de_DE", "de", "" },
1658 { "sr_RS", "sr_Cyrl_RS", "sr" },
1659 { "en_US_CALIFORNIA", "en_US", "" },
1660 { "fr_FR_NONEXISTANT", "fr", "" },
1661 // pinyin is the default, therefore suppressed.
1662 { "zh_CN", "zh_Hans_CN", "zh" },
1663 // zh_Hant has default=stroke but the data is in zh.
1664 { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" },
1665 { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" },
1666 { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" },
1667 // yue/yue_Hant aliased to zh_Hant, yue_Hans aliased to zh_Hans.
1668 { "yue", "zh_Hant", "zh@collation=stroke" },
1669 { "yue_HK", "zh_Hant", "zh@collation=stroke" },
1670 { "yue_Hant", "zh_Hant", "zh@collation=stroke" },
1671 { "yue_Hant_HK", "zh_Hant", "zh@collation=stroke" },
1672 { "yue@collation=pinyin", "zh_Hant@collation=pinyin", "zh" },
1673 { "yue_HK@collation=pinyin", "zh_Hant@collation=pinyin", "zh" },
1674 { "yue_CN", "zh_Hans", "zh" },
1675 { "yue_Hans", "zh_Hans", "zh" },
1676 { "yue_Hans_CN", "zh_Hans", "zh" },
1677 { "yue_Hans@collation=stroke", "zh_Hans@collation=stroke", "zh@collation=stroke" },
1678 { "yue_CN@collation=stroke", "zh_Hans@collation=stroke", "zh@collation=stroke" }
1679 };
1680
1681 u_unescape(rules, rlz, 256);
1682
1683 /* test opening collators for different locales */
1684 for(i = 0; i<UPRV_LENGTHOF(testStruct); i++) {
1685 status = U_ZERO_ERROR;
1686 coll = Collator::createInstance(testStruct[i].requestedLocale, status);
1687 if(U_FAILURE(status)) {
1688 errln("Failed to open collator for %s with %s", testStruct[i].requestedLocale, u_errorName(status));
1689 delete coll;
1690 continue;
1691 }
1692 // The requested locale may be the same as the valid locale,
1693 // or may not be supported at all. See ticket #10477.
1694 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1695 if(U_SUCCESS(status) &&
1696 locale != testStruct[i].requestedLocale && locale != testStruct[i].validLocale) {
1697 errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s",
1698 testStruct[i].requestedLocale,
1699 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1700 }
1701 status = U_ZERO_ERROR;
1702 locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1703 if(locale != testStruct[i].validLocale) {
1704 errln("[Coll %s]: Error in valid locale, expected %s, got %s",
1705 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1706 }
1707 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1708 if(locale != testStruct[i].actualLocale) {
1709 errln("[Coll %s]: Error in actual locale, expected %s, got %s",
1710 testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName());
1711 }
1712 // If we open a collator for the actual locale, we should get an equivalent one again.
1713 LocalPointer<Collator> coll2(Collator::createInstance(locale, status));
1714 if(U_FAILURE(status)) {
1715 errln("Failed to open collator for actual locale \"%s\" with %s",
1716 locale.getName(), u_errorName(status));
1717 } else {
1718 Locale actual2 = coll2->getLocale(ULOC_ACTUAL_LOCALE, status);
1719 if(actual2 != locale) {
1720 errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"",
1721 locale.getName(), actual2.getName());
1722 }
1723 if(*coll2 != *coll) {
1724 errln("[Coll actual \"%s\"]: Got different collator than before", locale.getName());
1725 }
1726 }
1727 delete coll;
1728 }
1729
1730 /* completely non-existent locale for collator should get a root collator */
1731 {
1732 LocalPointer<Collator> coll(Collator::createInstance("blahaha", status));
1733 if(U_FAILURE(status)) {
1734 errln("Failed to open collator with %s", u_errorName(status));
1735 return;
1736 }
1737 Locale valid = coll->getLocale(ULOC_VALID_LOCALE, status);
1738 const char *name = valid.getName();
1739 if(*name != 0 && strcmp(name, "root") != 0) {
1740 errln("Valid locale for nonexisting-locale collator is \"%s\" not root", name);
1741 }
1742 Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1743 name = actual.getName();
1744 if(*name != 0 && strcmp(name, "root") != 0) {
1745 errln("Actual locale for nonexisting-locale collator is \"%s\" not root", name);
1746 }
1747 }
1748
1749
1750
1751 /* collator instantiated from rules should have all three locales NULL */
1752 coll = new RuleBasedCollator(rlz, status);
1753 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1754 if(U_SUCCESS(status) && !locale.isBogus()) {
1755 errln("For collator instantiated from rules, requested locale %s is not bogus", locale.getName());
1756 }
1757 status = U_ZERO_ERROR;
1758 locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1759 if(!locale.isBogus()) {
1760 errln("For collator instantiated from rules, valid locale %s is not bogus", locale.getName());
1761 }
1762 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1763 if(!locale.isBogus()) {
1764 errln("For collator instantiated from rules, actual locale %s is not bogus", locale.getName());
1765 }
1766 delete coll;
1767 }
1768
1769 struct teststruct {
1770 const char *original;
1771 uint8_t key[256];
1772 };
1773
1774
1775
1776 U_CDECL_BEGIN
1777 static int U_CALLCONV
compare_teststruct(const void * string1,const void * string2)1778 compare_teststruct(const void *string1, const void *string2) {
1779 return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key));
1780 }
1781 U_CDECL_END
1782
1783
TestBounds(void)1784 void CollationAPITest::TestBounds(void) {
1785 UErrorCode status = U_ZERO_ERROR;
1786
1787 Collator *coll = Collator::createInstance(Locale("sh"), status);
1788 if(U_FAILURE(status)) {
1789 delete coll;
1790 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1791 return;
1792 }
1793
1794 uint8_t sortkey[512], lower[512], upper[512];
1795 UChar buffer[512];
1796
1797 static const char * const test[] = {
1798 "John Smith",
1799 "JOHN SMITH",
1800 "john SMITH",
1801 "j\\u00F6hn sm\\u00EFth",
1802 "J\\u00F6hn Sm\\u00EFth",
1803 "J\\u00D6HN SM\\u00CFTH",
1804 "john smithsonian",
1805 "John Smithsonian"
1806 };
1807
1808 struct teststruct tests[] = {
1809 {"\\u010CAKI MIHALJ", {0}},
1810 {"\\u010CAKI MIHALJ", {0}},
1811 {"\\u010CAKI PIRO\\u0160KA", {0}},
1812 {"\\u010CABAI ANDRIJA", {0}},
1813 {"\\u010CABAI LAJO\\u0160", {0}},
1814 {"\\u010CABAI MARIJA", {0}},
1815 {"\\u010CABAI STEVAN", {0}},
1816 {"\\u010CABAI STEVAN", {0}},
1817 {"\\u010CABARKAPA BRANKO", {0}},
1818 {"\\u010CABARKAPA MILENKO", {0}},
1819 {"\\u010CABARKAPA MIROSLAV", {0}},
1820 {"\\u010CABARKAPA SIMO", {0}},
1821 {"\\u010CABARKAPA STANKO", {0}},
1822 {"\\u010CABARKAPA TAMARA", {0}},
1823 {"\\u010CABARKAPA TOMA\\u0160", {0}},
1824 {"\\u010CABDARI\\u0106 NIKOLA", {0}},
1825 {"\\u010CABDARI\\u0106 ZORICA", {0}},
1826 {"\\u010CABI NANDOR", {0}},
1827 {"\\u010CABOVI\\u0106 MILAN", {0}},
1828 {"\\u010CABRADI AGNEZIJA", {0}},
1829 {"\\u010CABRADI IVAN", {0}},
1830 {"\\u010CABRADI JELENA", {0}},
1831 {"\\u010CABRADI LJUBICA", {0}},
1832 {"\\u010CABRADI STEVAN", {0}},
1833 {"\\u010CABRDA MARTIN", {0}},
1834 {"\\u010CABRILO BOGDAN", {0}},
1835 {"\\u010CABRILO BRANISLAV", {0}},
1836 {"\\u010CABRILO LAZAR", {0}},
1837 {"\\u010CABRILO LJUBICA", {0}},
1838 {"\\u010CABRILO SPASOJA", {0}},
1839 {"\\u010CADE\\u0160 ZDENKA", {0}},
1840 {"\\u010CADESKI BLAGOJE", {0}},
1841 {"\\u010CADOVSKI VLADIMIR", {0}},
1842 {"\\u010CAGLJEVI\\u0106 TOMA", {0}},
1843 {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}},
1844 {"\\u010CAJA VANKA", {0}},
1845 {"\\u010CAJI\\u0106 BOGOLJUB", {0}},
1846 {"\\u010CAJI\\u0106 BORISLAV", {0}},
1847 {"\\u010CAJI\\u0106 RADOSLAV", {0}},
1848 {"\\u010CAK\\u0160IRAN MILADIN", {0}},
1849 {"\\u010CAKAN EUGEN", {0}},
1850 {"\\u010CAKAN EVGENIJE", {0}},
1851 {"\\u010CAKAN IVAN", {0}},
1852 {"\\u010CAKAN JULIJAN", {0}},
1853 {"\\u010CAKAN MIHAJLO", {0}},
1854 {"\\u010CAKAN STEVAN", {0}},
1855 {"\\u010CAKAN VLADIMIR", {0}},
1856 {"\\u010CAKAN VLADIMIR", {0}},
1857 {"\\u010CAKAN VLADIMIR", {0}},
1858 {"\\u010CAKARA ANA", {0}},
1859 {"\\u010CAKAREVI\\u0106 MOMIR", {0}},
1860 {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}},
1861 {"\\u010CAKI \\u0160ANDOR", {0}},
1862 {"\\u010CAKI AMALIJA", {0}},
1863 {"\\u010CAKI ANDRA\\u0160", {0}},
1864 {"\\u010CAKI LADISLAV", {0}},
1865 {"\\u010CAKI LAJO\\u0160", {0}},
1866 {"\\u010CAKI LASLO", {0}}
1867 };
1868
1869
1870
1871 int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0;
1872 int32_t arraySize = UPRV_LENGTHOF(tests);
1873
1874 (void)lowerSize; // Suppress unused variable warnings.
1875 (void)upperSize;
1876
1877 for(i = 0; i<arraySize; i++) {
1878 buffSize = u_unescape(tests[i].original, buffer, 512);
1879 skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512);
1880 }
1881
1882 qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct);
1883
1884 for(i = 0; i < arraySize-1; i++) {
1885 for(j = i+1; j < arraySize; j++) {
1886 lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status);
1887 upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status);
1888 for(k = i; k <= j; k++) {
1889 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) {
1890 errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original);
1891 }
1892 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) {
1893 errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original);
1894 }
1895 }
1896 }
1897 }
1898
1899
1900 for(i = 0; i<UPRV_LENGTHOF(test); i++) {
1901 buffSize = u_unescape(test[i], buffer, 512);
1902 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1903 lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1904 upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1905 for(j = i+1; j<UPRV_LENGTHOF(test); j++) {
1906 buffSize = u_unescape(test[j], buffer, 512);
1907 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1908 if(strcmp((const char *)lower, (const char *)sortkey) > 0) {
1909 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1910 }
1911 if(strcmp((const char *)upper, (const char *)sortkey) <= 0) {
1912 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1913 }
1914 }
1915 }
1916 delete coll;
1917 }
1918
1919
TestGetTailoredSet()1920 void CollationAPITest::TestGetTailoredSet()
1921 {
1922 struct {
1923 const char *rules;
1924 const char *tests[20];
1925 int32_t testsize;
1926 } setTest[] = {
1927 { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1928 { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1929 };
1930
1931 int32_t i = 0, j = 0;
1932 UErrorCode status = U_ZERO_ERROR;
1933
1934 UnicodeString buff;
1935 UnicodeSet *set = NULL;
1936
1937 for(i = 0; i < UPRV_LENGTHOF(setTest); i++) {
1938 buff = UnicodeString(setTest[i].rules, -1, US_INV).unescape();
1939 RuleBasedCollator coll(buff, status);
1940 if(U_SUCCESS(status)) {
1941 set = coll.getTailoredSet(status);
1942 if(set->size() < setTest[i].testsize) {
1943 errln("Tailored set size smaller (%d) than expected (%d)", set->size(), setTest[i].testsize);
1944 }
1945 for(j = 0; j < setTest[i].testsize; j++) {
1946 buff = UnicodeString(setTest[i].tests[j], -1, US_INV).unescape();
1947 if(!set->contains(buff)) {
1948 errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]);
1949 }
1950 }
1951 delete set;
1952 } else {
1953 errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status));
1954 }
1955 }
1956 }
1957
TestUClassID()1958 void CollationAPITest::TestUClassID()
1959 {
1960 char id = *((char *)RuleBasedCollator::getStaticClassID());
1961 if (id != 0) {
1962 errln("Static class id for RuleBasedCollator should be 0");
1963 }
1964 UErrorCode status = U_ZERO_ERROR;
1965 RuleBasedCollator *coll
1966 = (RuleBasedCollator *)Collator::createInstance(status);
1967 if(U_FAILURE(status)) {
1968 delete coll;
1969 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1970 return;
1971 }
1972 id = *((char *)coll->getDynamicClassID());
1973 if (id != 0) {
1974 errln("Dynamic class id for RuleBasedCollator should be 0");
1975 }
1976 id = *((char *)CollationKey::getStaticClassID());
1977 if (id != 0) {
1978 errln("Static class id for CollationKey should be 0");
1979 }
1980 CollationKey *key = new CollationKey();
1981 id = *((char *)key->getDynamicClassID());
1982 if (id != 0) {
1983 errln("Dynamic class id for CollationKey should be 0");
1984 }
1985 id = *((char *)CollationElementIterator::getStaticClassID());
1986 if (id != 0) {
1987 errln("Static class id for CollationElementIterator should be 0");
1988 }
1989 UnicodeString str("testing");
1990 CollationElementIterator *iter = coll->createCollationElementIterator(str);
1991 id = *((char *)iter->getDynamicClassID());
1992 if (id != 0) {
1993 errln("Dynamic class id for CollationElementIterator should be 0");
1994 }
1995 delete key;
1996 delete iter;
1997 delete coll;
1998 }
1999
2000 class TestCollator : public Collator
2001 {
2002 public:
2003 virtual TestCollator* clone() const;
2004
2005 using Collator::compare;
2006
2007 virtual UCollationResult compare(const UnicodeString& source,
2008 const UnicodeString& target,
2009 UErrorCode& status) const;
2010 virtual UCollationResult compare(const UnicodeString& source,
2011 const UnicodeString& target,
2012 int32_t length,
2013 UErrorCode& status) const;
2014 virtual UCollationResult compare(const UChar* source,
2015 int32_t sourceLength,
2016 const UChar* target,
2017 int32_t targetLength,
2018 UErrorCode& status) const;
2019 virtual CollationKey& getCollationKey(const UnicodeString& source,
2020 CollationKey& key,
2021 UErrorCode& status) const;
2022 virtual CollationKey& getCollationKey(const UChar*source,
2023 int32_t sourceLength,
2024 CollationKey& key,
2025 UErrorCode& status) const;
2026 virtual int32_t hashCode(void) const;
2027 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
2028 virtual ECollationStrength getStrength(void) const;
2029 virtual void setStrength(ECollationStrength newStrength);
2030 virtual UClassID getDynamicClassID(void) const;
2031 virtual void getVersion(UVersionInfo info) const;
2032 virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
2033 UErrorCode &status);
2034 virtual UColAttributeValue getAttribute(UColAttribute attr,
2035 UErrorCode &status) const;
2036 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len,
2037 UErrorCode &status);
2038 virtual uint32_t setVariableTop(const UnicodeString &varTop,
2039 UErrorCode &status);
2040 virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
2041 virtual uint32_t getVariableTop(UErrorCode &status) const;
2042 virtual int32_t getSortKey(const UnicodeString& source,
2043 uint8_t* result,
2044 int32_t resultLength) const;
2045 virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
2046 uint8_t*result, int32_t resultLength) const;
2047 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
2048 virtual UBool operator==(const Collator& other) const;
2049 // Collator::operator!= calls !Collator::operator== which works for all subclasses.
2050 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
TestCollator()2051 TestCollator() : Collator() {}
TestCollator(UCollationStrength collationStrength,UNormalizationMode decompositionMode)2052 TestCollator(UCollationStrength collationStrength,
2053 UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {}
2054 };
2055
operator ==(const Collator & other) const2056 inline UBool TestCollator::operator==(const Collator& other) const {
2057 // TestCollator has no fields, so we test for identity.
2058 return this == &other;
2059
2060 // Normally, subclasses should do something like the following:
2061 // if (this == &other) { return TRUE; }
2062 // if (!Collator::operator==(other)) { return FALSE; } // not the same class
2063 //
2064 // const TestCollator &o = (const TestCollator&)other;
2065 // (compare this vs. o's subclass fields)
2066 }
2067
clone() const2068 TestCollator* TestCollator::clone() const
2069 {
2070 return new TestCollator();
2071 }
2072
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const2073 UCollationResult TestCollator::compare(const UnicodeString& source,
2074 const UnicodeString& target,
2075 UErrorCode& status) const
2076 {
2077 if(U_SUCCESS(status)) {
2078 return UCollationResult(source.compare(target));
2079 } else {
2080 return UCOL_EQUAL;
2081 }
2082 }
2083
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const2084 UCollationResult TestCollator::compare(const UnicodeString& source,
2085 const UnicodeString& target,
2086 int32_t length,
2087 UErrorCode& status) const
2088 {
2089 if(U_SUCCESS(status)) {
2090 return UCollationResult(source.compare(0, length, target));
2091 } else {
2092 return UCOL_EQUAL;
2093 }
2094 }
2095
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const2096 UCollationResult TestCollator::compare(const UChar* source,
2097 int32_t sourceLength,
2098 const UChar* target,
2099 int32_t targetLength,
2100 UErrorCode& status) const
2101 {
2102 UnicodeString s(source, sourceLength);
2103 UnicodeString t(target, targetLength);
2104 return compare(s, t, status);
2105 }
2106
getCollationKey(const UnicodeString & source,CollationKey & key,UErrorCode & status) const2107 CollationKey& TestCollator::getCollationKey(const UnicodeString& source,
2108 CollationKey& key,
2109 UErrorCode& status) const
2110 {
2111 char temp[100];
2112 int length = 100;
2113 length = source.extract(temp, length, NULL, status);
2114 temp[length] = 0;
2115 CollationKey tempkey((uint8_t*)temp, length);
2116 key = tempkey;
2117 return key;
2118 }
2119
getCollationKey(const UChar * source,int32_t sourceLength,CollationKey & key,UErrorCode & status) const2120 CollationKey& TestCollator::getCollationKey(const UChar*source,
2121 int32_t sourceLength,
2122 CollationKey& key,
2123 UErrorCode& status) const
2124 {
2125 //s tack allocation used since collationkey does not keep the unicodestring
2126 UnicodeString str(source, sourceLength);
2127 return getCollationKey(str, key, status);
2128 }
2129
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const2130 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result,
2131 int32_t resultLength) const
2132 {
2133 UErrorCode status = U_ZERO_ERROR;
2134 int32_t length = source.extract((char *)result, resultLength, NULL,
2135 status);
2136 result[length] = 0;
2137 return length;
2138 }
2139
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const2140 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength,
2141 uint8_t*result, int32_t resultLength) const
2142 {
2143 UnicodeString str(source, sourceLength);
2144 return getSortKey(str, result, resultLength);
2145 }
2146
hashCode() const2147 int32_t TestCollator::hashCode() const
2148 {
2149 return 0;
2150 }
2151
getLocale(ULocDataLocaleType type,UErrorCode & status) const2152 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const
2153 {
2154 // api not used, this is to make the compiler happy
2155 if (U_FAILURE(status)) {
2156 (void)type;
2157 }
2158 return NULL;
2159 }
2160
getStrength() const2161 Collator::ECollationStrength TestCollator::getStrength() const
2162 {
2163 return TERTIARY;
2164 }
2165
setStrength(Collator::ECollationStrength newStrength)2166 void TestCollator::setStrength(Collator::ECollationStrength newStrength)
2167 {
2168 // api not used, this is to make the compiler happy
2169 (void)newStrength;
2170 }
2171
getDynamicClassID(void) const2172 UClassID TestCollator::getDynamicClassID(void) const
2173 {
2174 return 0;
2175 }
2176
getVersion(UVersionInfo info) const2177 void TestCollator::getVersion(UVersionInfo info) const
2178 {
2179 // api not used, this is to make the compiler happy
2180 memset(info, 0, U_MAX_VERSION_LENGTH);
2181 }
2182
setAttribute(UColAttribute,UColAttributeValue,UErrorCode &)2183 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/,
2184 UErrorCode & /*status*/)
2185 {
2186 }
2187
getAttribute(UColAttribute attr,UErrorCode & status) const2188 UColAttributeValue TestCollator::getAttribute(UColAttribute attr,
2189 UErrorCode &status) const
2190 {
2191 // api not used, this is to make the compiler happy
2192 if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) {
2193 return UCOL_OFF;
2194 }
2195 return UCOL_DEFAULT;
2196 }
2197
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)2198 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len,
2199 UErrorCode &status)
2200 {
2201 // api not used, this is to make the compiler happy
2202 if (U_SUCCESS(status) && (varTop == 0 || len < -1)) {
2203 status = U_ILLEGAL_ARGUMENT_ERROR;
2204 }
2205 return 0;
2206 }
2207
setVariableTop(const UnicodeString & varTop,UErrorCode & status)2208 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop,
2209 UErrorCode &status)
2210 {
2211 // api not used, this is to make the compiler happy
2212 if (U_SUCCESS(status) && varTop.length() == 0) {
2213 status = U_ILLEGAL_ARGUMENT_ERROR;
2214 }
2215 return 0;
2216 }
2217
setVariableTop(uint32_t varTop,UErrorCode & status)2218 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status)
2219 {
2220 // api not used, this is to make the compiler happy
2221 if (U_SUCCESS(status) && varTop == 0) {
2222 status = U_ILLEGAL_ARGUMENT_ERROR;
2223 }
2224 }
2225
getVariableTop(UErrorCode & status) const2226 uint32_t TestCollator::getVariableTop(UErrorCode &status) const
2227 {
2228
2229 // api not used, this is to make the compiler happy
2230 if (U_SUCCESS(status)) {
2231 return 0;
2232 }
2233 return (uint32_t)(0xFFFFFFFFu);
2234 }
2235
getTailoredSet(UErrorCode & status) const2236 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const
2237 {
2238 return Collator::getTailoredSet(status);
2239 }
2240
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)2241 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale)
2242 {
2243 Collator::setLocales(requestedLocale, validLocale, actualLocale);
2244 }
2245
2246
TestSubclass()2247 void CollationAPITest::TestSubclass()
2248 {
2249 TestCollator col1;
2250 TestCollator col2;
2251 doAssert(col1 != col2, "2 instances of TestCollator should be different");
2252 if (col1.hashCode() != col2.hashCode()) {
2253 errln("Every TestCollator has the same hashcode");
2254 }
2255 UnicodeString abc("abc", 3);
2256 UnicodeString bcd("bcd", 3);
2257 if (col1.compare(abc, bcd) != abc.compare(bcd)) {
2258 errln("TestCollator compare should be the same as the default "
2259 "string comparison");
2260 }
2261 CollationKey key;
2262 UErrorCode status = U_ZERO_ERROR;
2263 col1.getCollationKey(abc, key, status);
2264 int32_t length = 0;
2265 const char* bytes = (const char *)key.getByteArray(length);
2266 UnicodeString keyarray(bytes, length, NULL, status);
2267 if (abc != keyarray) {
2268 errln("TestCollator collationkey API is returning wrong values");
2269 }
2270
2271 UnicodeSet expectedset(0, 0x10FFFF);
2272 UnicodeSet *defaultset = col1.getTailoredSet(status);
2273 if (!defaultset->containsAll(expectedset)
2274 || !expectedset.containsAll(*defaultset)) {
2275 errln("Error: expected default tailoring to be 0 to 0x10ffff");
2276 }
2277 delete defaultset;
2278
2279 // use base class implementation
2280 Locale loc1 = Locale::getGermany();
2281 Locale loc2 = Locale::getFrance();
2282 col1.setLocales(loc1, loc2, loc2); // default implementation has no effect
2283
2284 UnicodeString displayName;
2285 col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale
2286
2287 TestCollator col3(UCOL_TERTIARY, UNORM_NONE);
2288 UnicodeString a("a");
2289 UnicodeString b("b");
2290 Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b));
2291 if(col1.compare(a, b) != result) {
2292 errln("Collator doesn't give default result");
2293 }
2294 if(col1.compare(a, b, 1) != result) {
2295 errln("Collator doesn't give default result");
2296 }
2297 if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) {
2298 errln("Collator doesn't give default result");
2299 }
2300 }
2301
TestNULLCharTailoring()2302 void CollationAPITest::TestNULLCharTailoring()
2303 {
2304 UErrorCode status = U_ZERO_ERROR;
2305 UChar buf[256] = {0};
2306 int32_t len = u_unescape("&a < '\\u0000'", buf, 256);
2307 UnicodeString first((UChar)0x0061);
2308 UnicodeString second((UChar)0);
2309 RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status);
2310 if(U_FAILURE(status)) {
2311 delete coll;
2312 errcheckln(status, "Failed to open collator - %s", u_errorName(status));
2313 return;
2314 }
2315 UCollationResult res = coll->compare(first, second, status);
2316 if(res != UCOL_LESS) {
2317 errln("a should be less then NULL after tailoring");
2318 }
2319 delete coll;
2320 }
2321
TestClone()2322 void CollationAPITest::TestClone() {
2323 logln("\ninit c0");
2324 UErrorCode status = U_ZERO_ERROR;
2325 RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status);
2326
2327 if (U_FAILURE(status)) {
2328 errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status));
2329 return;
2330 }
2331
2332 c0->setStrength(Collator::TERTIARY);
2333 dump("c0", c0, status);
2334
2335 logln("\ninit c1");
2336 RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status);
2337 c1->setStrength(Collator::TERTIARY);
2338 UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status);
2339 if(val == UCOL_LOWER_FIRST){
2340 c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2341 }else{
2342 c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2343 }
2344 dump("c0", c0, status);
2345 dump("c1", c1, status);
2346
2347 logln("\ninit c2");
2348 RuleBasedCollator* c2 = c1->clone();
2349 val = c2->getAttribute(UCOL_CASE_FIRST, status);
2350 if(val == UCOL_LOWER_FIRST){
2351 c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2352 }else{
2353 c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2354 }
2355 if(U_FAILURE(status)){
2356 errln("set and get attributes of collator failed. %s\n", u_errorName(status));
2357 return;
2358 }
2359 dump("c0", c0, status);
2360 dump("c1", c1, status);
2361 dump("c2", c2, status);
2362 if(*c1 == *c2){
2363 errln("The cloned objects refer to same data");
2364 }
2365 delete c0;
2366 delete c1;
2367 delete c2;
2368 }
2369
TestCloneBinary()2370 void CollationAPITest::TestCloneBinary() {
2371 IcuTestErrorCode errorCode(*this, "TestCloneBinary");
2372 LocalPointer<Collator> root(Collator::createInstance(Locale::getRoot(), errorCode));
2373 LocalPointer<Collator> coll(Collator::createInstance("de@collation=phonebook", errorCode));
2374 if(errorCode.errDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) {
2375 return;
2376 }
2377 RuleBasedCollator *rbRoot = dynamic_cast<RuleBasedCollator *>(root.getAlias());
2378 RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias());
2379 if(rbRoot == NULL || rbc == NULL) {
2380 infoln("root or de@collation=phonebook is not a RuleBasedCollator");
2381 return;
2382 }
2383 rbc->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
2384 UnicodeString uUmlaut((UChar)0xfc);
2385 UnicodeString ue = UNICODE_STRING_SIMPLE("ue");
2386 assertEquals("rbc/primary: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc->compare(uUmlaut, ue, errorCode));
2387 uint8_t bin[25000];
2388 int32_t binLength = rbc->cloneBinary(bin, UPRV_LENGTHOF(bin), errorCode);
2389 if(errorCode.errDataIfFailureAndReset("rbc->cloneBinary()")) {
2390 return;
2391 }
2392 logln("rbc->cloneBinary() -> %d bytes", (int)binLength);
2393
2394 RuleBasedCollator rbc2(bin, binLength, rbRoot, errorCode);
2395 if(errorCode.errDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) {
2396 return;
2397 }
2398 assertEquals("rbc2.strength==primary", (int32_t)UCOL_PRIMARY, rbc2.getAttribute(UCOL_STRENGTH, errorCode));
2399 assertEquals("rbc2: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc2.compare(uUmlaut, ue, errorCode));
2400 assertTrue("rbc==rbc2", *rbc == rbc2);
2401 uint8_t bin2[25000];
2402 int32_t bin2Length = rbc2.cloneBinary(bin2, UPRV_LENGTHOF(bin2), errorCode);
2403 assertEquals("len(rbc binary)==len(rbc2 binary)", binLength, bin2Length);
2404 assertTrue("rbc binary==rbc2 binary", binLength == bin2Length && memcmp(bin, bin2, binLength) == 0);
2405
2406 RuleBasedCollator rbc3(bin, -1, rbRoot, errorCode);
2407 if(errorCode.errDataIfFailureAndReset("RuleBasedCollator(rbc binary, length<0)")) {
2408 return;
2409 }
2410 assertEquals("rbc3.strength==primary", (int32_t)UCOL_PRIMARY, rbc3.getAttribute(UCOL_STRENGTH, errorCode));
2411 assertEquals("rbc3: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc3.compare(uUmlaut, ue, errorCode));
2412 assertTrue("rbc==rbc3", *rbc == rbc3);
2413 }
2414
TestIterNumeric()2415 void CollationAPITest::TestIterNumeric() {
2416 // Regression test for ticket #9915.
2417 // The collation code sometimes masked the continuation marker away
2418 // but later tested the result for isContinuation().
2419 // This test case failed because the third bytes of the computed numeric-collation primaries
2420 // were permutated with the script reordering table.
2421 // It should have been possible to reproduce this with the root collator
2422 // and characters with appropriate 3-byte primary weights.
2423 // The effectiveness of this test depends completely on the collation elements
2424 // and on the implementation code.
2425 IcuTestErrorCode errorCode(*this, "TestIterNumeric");
2426 RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode);
2427 if(errorCode.errDataIfFailureAndReset("RuleBasedCollator constructor")) {
2428 return;
2429 }
2430 coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode);
2431 UCharIterator iter40, iter72;
2432 uiter_setUTF8(&iter40, "\x34\x30", 2);
2433 uiter_setUTF8(&iter72, "\x37\x32", 2);
2434 UCollationResult result = coll.compare(iter40, iter72, errorCode);
2435 assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result);
2436 }
2437
TestBadKeywords()2438 void CollationAPITest::TestBadKeywords() {
2439 // Test locale IDs with errors.
2440 // Valid locale IDs are tested via data-driven tests.
2441 UErrorCode errorCode = U_ZERO_ERROR;
2442 Locale bogusLocale(Locale::getRoot());
2443 bogusLocale.setToBogus();
2444 LocalPointer<Collator> coll(Collator::createInstance(bogusLocale, errorCode));
2445 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2446 errln("Collator::createInstance(bogus locale) did not fail as expected - %s",
2447 u_errorName(errorCode));
2448 }
2449
2450 // Unknown value.
2451 const char *localeID = "it-u-ks-xyz";
2452 errorCode = U_ZERO_ERROR;
2453 coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2454 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2455 dataerrln("Collator::createInstance(%s) did not fail as expected - %s",
2456 localeID, u_errorName(errorCode));
2457 }
2458
2459 // Unsupported attributes.
2460 localeID = "it@colHiraganaQuaternary=true";
2461 errorCode = U_ZERO_ERROR;
2462 coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2463 if(errorCode != U_UNSUPPORTED_ERROR) {
2464 if (errorCode == U_FILE_ACCESS_ERROR) {
2465 dataerrln("Collator::createInstance(it@colHiraganaQuaternary=true) : %s", u_errorName(errorCode));
2466 } else {
2467 errln("Collator::createInstance(%s) did not fail as expected - %s",
2468 localeID, u_errorName(errorCode));
2469 }
2470 }
2471
2472 localeID = "it-u-vt-u24";
2473 errorCode = U_ZERO_ERROR;
2474 coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2475 if(errorCode != U_UNSUPPORTED_ERROR) {
2476 if (errorCode == U_ILLEGAL_ARGUMENT_ERROR || errorCode == U_FILE_ACCESS_ERROR) {
2477 dataerrln("Collator::createInstance(it-u-vt-u24) : %s", u_errorName(errorCode));
2478 } else {
2479 errln("Collator::createInstance(%s) did not fail as expected - %s",
2480 localeID, u_errorName(errorCode));
2481 }
2482 }
2483 }
2484
TestGapTooSmall()2485 void CollationAPITest::TestGapTooSmall() {
2486 IcuTestErrorCode errorCode(*this, "TestGapTooSmall");
2487 // Try to tailor >20k characters into a too-small primary gap between symbols
2488 // that have 3-byte primary weights.
2489 // In FractionalUCA.txt:
2490 // 263A; [0C BA D0, 05, 05] # Zyyy So [084A.0020.0002] * WHITE SMILING FACE
2491 // 263B; [0C BA D7, 05, 05] # Zyyy So [084B.0020.0002] * BLACK SMILING FACE
2492 {
2493 RuleBasedCollator(u"&☺<*\u4E00-\u9FFF", errorCode);
2494 if(errorCode.isSuccess()) {
2495 errln("no exception for primary-gap overflow");
2496 } else if(errorCode.get() == U_BUFFER_OVERFLOW_ERROR) {
2497 // This is the expected error.
2498 // assertTrue("exception message mentions 'gap'", e.getMessage().contains("gap"));
2499 } else {
2500 errln("unexpected error for primary-gap overflow: %s", errorCode.errorName());
2501 }
2502 errorCode.reset();
2503 }
2504
2505 // CLDR 32/ICU 60 FractionalUCA.txt makes room at the end of the symbols range
2506 // for several 2-byte primaries, or a large number of 3-byters.
2507 // The reset point is primary-before what should be
2508 // the special currency-first-primary contraction,
2509 // which is hopefully fairly stable, but not guaranteed stable.
2510 // In FractionalUCA.txt:
2511 // FDD1 20AC; [0D 70 02, 05, 05] # CURRENCY first primary
2512 {
2513 RuleBasedCollator coll(u"&[before 1]\uFDD1€<*\u4E00-\u9FFF", errorCode);
2514 assertTrue("tailored Han before currency", coll.compare(u"\u4E00", u"$", errorCode) < 0);
2515 errorCode.errIfFailureAndReset(
2516 "unexpected exception for tailoring many characters at the end of symbols");
2517 }
2518 }
2519
dump(UnicodeString msg,RuleBasedCollator * c,UErrorCode & status)2520 void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) {
2521 const char* bigone = "One";
2522 const char* littleone = "one";
2523
2524 logln(msg + " " + c->compare(bigone, littleone) +
2525 " s: " + c->getStrength() +
2526 " u: " + c->getAttribute(UCOL_CASE_FIRST, status));
2527 }
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)2528 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
2529 {
2530 if (exec) logln("TestSuite CollationAPITest: ");
2531 TESTCASE_AUTO_BEGIN;
2532 TESTCASE_AUTO(TestProperty);
2533 TESTCASE_AUTO(TestKeywordValues);
2534 TESTCASE_AUTO(TestOperators);
2535 TESTCASE_AUTO(TestDuplicate);
2536 TESTCASE_AUTO(TestCompare);
2537 TESTCASE_AUTO(TestHashCode);
2538 TESTCASE_AUTO(TestCollationKey);
2539 TESTCASE_AUTO(TestElemIter);
2540 TESTCASE_AUTO(TestGetAll);
2541 TESTCASE_AUTO(TestRuleBasedColl);
2542 TESTCASE_AUTO(TestDecomposition);
2543 TESTCASE_AUTO(TestSafeClone);
2544 TESTCASE_AUTO(TestSortKey);
2545 TESTCASE_AUTO(TestSortKeyOverflow);
2546 TESTCASE_AUTO(TestMaxExpansion);
2547 TESTCASE_AUTO(TestDisplayName);
2548 TESTCASE_AUTO(TestAttribute);
2549 TESTCASE_AUTO(TestVariableTopSetting);
2550 TESTCASE_AUTO(TestMaxVariable);
2551 TESTCASE_AUTO(TestRules);
2552 TESTCASE_AUTO(TestGetLocale);
2553 TESTCASE_AUTO(TestBounds);
2554 TESTCASE_AUTO(TestGetTailoredSet);
2555 TESTCASE_AUTO(TestUClassID);
2556 TESTCASE_AUTO(TestSubclass);
2557 TESTCASE_AUTO(TestNULLCharTailoring);
2558 TESTCASE_AUTO(TestClone);
2559 TESTCASE_AUTO(TestCloneBinary);
2560 TESTCASE_AUTO(TestIterNumeric);
2561 TESTCASE_AUTO(TestBadKeywords);
2562 TESTCASE_AUTO(TestGapTooSmall);
2563 TESTCASE_AUTO_END;
2564 }
2565
2566 #endif /* #if !UCONFIG_NO_COLLATION */
2567