1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 //===============================================================================
7 //
8 // File apicoll.cpp
9 //
10 //
11 //
12 // Created by: Helena Shih
13 //
14 // Modification History:
15 //
16 // Date Name Description
17 // 2/5/97 aliu Added streamIn and streamOut methods. Added
18 // constructor which reads RuleBasedCollator object from
19 // a binary file. Added writeToFile method which streams
20 // RuleBasedCollator out to a binary file. The streamIn
21 // and streamOut methods use istream and ostream objects
22 // in binary mode.
23 // 6/30/97 helena Added tests for CollationElementIterator::setText, getOffset
24 // setOffset and DecompositionIterator::getOffset, setOffset.
25 // DecompositionIterator is made public so add class scope
26 // testing.
27 // 02/10/98 damiba Added test for compare(UnicodeString&, UnicodeString&, int32_t)
28 //===============================================================================
29
30 #include "unicode/utypes.h"
31
32 #if !UCONFIG_NO_COLLATION
33
34 #include "unicode/localpointer.h"
35 #include "unicode/coll.h"
36 #include "unicode/tblcoll.h"
37 #include "unicode/coleitr.h"
38 #include "unicode/sortkey.h"
39 #include "apicoll.h"
40 #include "unicode/chariter.h"
41 #include "unicode/schriter.h"
42 #include "unicode/ustring.h"
43 #include "unicode/ucol.h"
44
45 #include "sfwdchit.h"
46 #include "cmemory.h"
47 #include <stdlib.h>
48
49 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
50
51 void
doAssert(UBool condition,const char * message)52 CollationAPITest::doAssert(UBool condition, const char *message)
53 {
54 if (!condition) {
55 errln(UnicodeString("ERROR : ") + message);
56 }
57 }
58
59 // Collator Class Properties
60 // ctor, dtor, createInstance, compare, getStrength/setStrength
61 // getDecomposition/setDecomposition, getDisplayName
62 void
TestProperty()63 CollationAPITest::TestProperty(/* char* par */)
64 {
65 UErrorCode success = U_ZERO_ERROR;
66 Collator *col = 0;
67 /*
68 * Expected version of the English collator.
69 * Currently, the major/minor version numbers change when the builder code
70 * changes,
71 * number 2 is from the tailoring data version and
72 * number 3 is the UCA version.
73 * This changes with every UCA version change, and the expected value
74 * needs to be adjusted.
75 * Same in cintltst/capitst.c.
76 */
77 UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A}; // from ICU 4.4/UCA 5.2
78 UVersionInfo versionArray;
79
80 logln("The property tests begin : ");
81 logln("Test ctors : ");
82 col = Collator::createInstance(Locale::getEnglish(), success);
83 if (U_FAILURE(success)){
84 errcheckln(success, "Default Collator creation failed. - %s", u_errorName(success));
85 return;
86 }
87
88 StringEnumeration* kwEnum = col->getKeywordValuesForLocale("", Locale::getEnglish(),true,success);
89 if (U_FAILURE(success)){
90 errcheckln(success, "Get Keyword Values for Locale failed. - %s", u_errorName(success));
91 return;
92 }
93 delete kwEnum;
94
95 col->getVersion(versionArray);
96 // Check for a version greater than some value rather than equality
97 // so that we need not update the expected version each time.
98 if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
99 errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
100 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
101 } else {
102 logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
103 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
104 }
105
106 doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed");
107 doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed");
108 doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed");
109 doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed");
110 doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed");
111 doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed");
112
113 doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed");
114 success = U_ZERO_ERROR;
115 {
116 UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape();
117 UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape();
118 UCharIterator abauIter, abssIter;
119 uiter_setReplaceable(&abauIter, &abau);
120 uiter_setReplaceable(&abssIter, &abss);
121 doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed");
122 success = U_ZERO_ERROR;
123 }
124
125 /*start of update [Bertrand A. D. 02/10/98]*/
126 doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed");
127 doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB with length 2 comparison failed");
128 doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa with length 1 comparison failed");
129 doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa with length 2 comparison failed");
130 doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed");
131 doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed");
132 doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed");
133 /*end of update [Bertrand A. D. 02/10/98]*/
134
135
136 logln("Test ctors ends.");
137 logln("testing Collator::getStrength() method ...");
138 doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength");
139 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
140
141
142 logln("testing Collator::setStrength() method ...");
143 col->setStrength(Collator::SECONDARY);
144 doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference");
145 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
146 doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength");
147
148 UnicodeString name;
149
150 logln("Get display name for the US English collation in German : ");
151 logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name));
152 doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed");
153
154 logln("Get display name for the US English collation in English : ");
155 logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name));
156 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed");
157 #if 0
158 // weiv : this test is bogus if we're running on any machine that has different default locale than English.
159 // Therefore, it is banned!
160 logln("Get display name for the US English in default locale language : ");
161 logln(Collator::getDisplayName(Locale::US, name));
162 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine");
163 #endif
164 delete col; col = 0;
165 RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK",
166 success);
167 if (U_FAILURE(success)) {
168 errcheckln(success, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success));
169 return;
170 }
171 const UnicodeString &daRules = rcol->getRules();
172 if(daRules.isEmpty()) {
173 dataerrln("missing da_DK tailoring rule string");
174 } else {
175 doAssert(daRules.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'");
176 }
177 delete rcol;
178
179 col = Collator::createInstance(Locale::getFrench(), success);
180 if (U_FAILURE(success))
181 {
182 errln("Creating French collation failed.");
183 return;
184 }
185
186 col->setStrength(Collator::PRIMARY);
187 logln("testing Collator::getStrength() method again ...");
188 doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength");
189 doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference");
190
191 logln("testing French Collator::setStrength() method ...");
192 col->setStrength(Collator::TERTIARY);
193 doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference");
194 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
195 doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference");
196
197 logln("Create junk collation: ");
198 Locale abcd("ab", "CD", "");
199 success = U_ZERO_ERROR;
200 Collator *junk = 0;
201 junk = Collator::createInstance(abcd, success);
202
203 if (U_FAILURE(success))
204 {
205 errln("Junk collation creation failed, should at least return default.");
206 delete col;
207 return;
208 }
209
210 delete col;
211 col = Collator::createInstance(success);
212 if (U_FAILURE(success))
213 {
214 errln("Creating default collator failed.");
215 delete junk;
216 return;
217 }
218
219 doAssert(((RuleBasedCollator *)col)->getRules() == ((RuleBasedCollator *)junk)->getRules(),
220 "The default collation should be returned.");
221 Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success);
222 if (U_FAILURE(success))
223 {
224 errln("Creating fr_CA collator failed.");
225 delete col;
226 delete junk;
227 return;
228 }
229
230 // If the default locale isn't French, the French and non-French collators
231 // should be different
232 if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) {
233 doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator.");
234 }
235 Collator *aFrCol = frCol->clone();
236 doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed.");
237 logln("Collator property test ended.");
238
239 delete col;
240 delete frCol;
241 delete aFrCol;
242 delete junk;
243 }
244
245 void
TestRuleBasedColl()246 CollationAPITest::TestRuleBasedColl()
247 {
248 RuleBasedCollator *col1, *col2, *col3, *col4;
249 UErrorCode status = U_ZERO_ERROR;
250
251 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
252 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
253
254 col1 = new RuleBasedCollator(ruleset1, status);
255 if (U_FAILURE(status)) {
256 errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status));
257 return;
258 }
259 else {
260 logln("PASS: RuleBased Collator creation passed\n");
261 }
262
263 status = U_ZERO_ERROR;
264 col2 = new RuleBasedCollator(ruleset2, status);
265 if (U_FAILURE(status)) {
266 errln("RuleBased Collator creation failed.\n");
267 return;
268 }
269 else {
270 logln("PASS: RuleBased Collator creation passed\n");
271 }
272
273 status = U_ZERO_ERROR;
274 Locale locale("aa", "AA");
275 col3 = (RuleBasedCollator *)Collator::createInstance(locale, status);
276 if (U_FAILURE(status)) {
277 errln("Fallback Collator creation failed.: %s\n");
278 return;
279 }
280 else {
281 logln("PASS: Fallback Collator creation passed\n");
282 }
283 delete col3;
284
285 status = U_ZERO_ERROR;
286 col3 = (RuleBasedCollator *)Collator::createInstance(status);
287 if (U_FAILURE(status)) {
288 errln("Default Collator creation failed.: %s\n");
289 return;
290 }
291 else {
292 logln("PASS: Default Collator creation passed\n");
293 }
294
295 UnicodeString rule1 = col1->getRules();
296 UnicodeString rule2 = col2->getRules();
297 UnicodeString rule3 = col3->getRules();
298
299 doAssert(rule1 != rule2, "Default collator getRules failed");
300 doAssert(rule2 != rule3, "Default collator getRules failed");
301 doAssert(rule1 != rule3, "Default collator getRules failed");
302
303 col4 = new RuleBasedCollator(rule2, status);
304 if (U_FAILURE(status)) {
305 errln("RuleBased Collator creation failed.\n");
306 return;
307 }
308
309 UnicodeString rule4 = col4->getRules();
310 doAssert(rule2 == rule4, "Default collator getRules failed");
311 int32_t length4 = 0;
312 uint8_t *clonedrule4 = col4->cloneRuleData(length4, status);
313 if (U_FAILURE(status)) {
314 errln("Cloned rule data failed.\n");
315 return;
316 }
317
318 // free(clonedrule4); BAD API!!!!
319 uprv_free(clonedrule4);
320
321
322 delete col1;
323 delete col2;
324 delete col3;
325 delete col4;
326 }
327
328 void
TestRules()329 CollationAPITest::TestRules()
330 {
331 RuleBasedCollator *coll;
332 UErrorCode status = U_ZERO_ERROR;
333 UnicodeString rules;
334
335 coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status);
336 if (U_FAILURE(status)) {
337 errcheckln(status, "English Collator creation failed. - %s", u_errorName(status));
338 return;
339 }
340 else {
341 logln("PASS: RuleBased Collator creation passed\n");
342 }
343
344 coll->getRules(UCOL_TAILORING_ONLY, rules);
345 if (rules.length() != 0x00) {
346 errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00);
347 }
348
349 coll->getRules(UCOL_FULL_RULES, rules);
350 if (rules.length() < 0) {
351 errln("English full rules failed");
352 }
353 delete coll;
354 }
355
356 void
TestDecomposition()357 CollationAPITest::TestDecomposition() {
358 UErrorCode status = U_ZERO_ERROR;
359 Collator *en_US = Collator::createInstance("en_US", status),
360 *el_GR = Collator::createInstance("el_GR", status),
361 *vi_VN = Collator::createInstance("vi_VN", status);
362
363 if (U_FAILURE(status)) {
364 errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status));
365 return;
366 }
367
368 /* there is no reason to have canonical decomposition in en_US OR default locale */
369 if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
370 {
371 errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
372 }
373
374 if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
375 {
376 errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
377 }
378
379 if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
380 {
381 errln("ERROR: en_US collation had canonical decomposition for normalization!\n");
382 }
383
384 delete en_US;
385 delete el_GR;
386 delete vi_VN;
387 }
388
389 void
TestSafeClone()390 CollationAPITest::TestSafeClone() {
391 static const int CLONETEST_COLLATOR_COUNT = 3;
392 Collator *someCollators [CLONETEST_COLLATOR_COUNT];
393 Collator *col;
394 UErrorCode err = U_ZERO_ERROR;
395 int index;
396
397 UnicodeString test1("abCda");
398 UnicodeString test2("abcda");
399
400 /* one default collator & two complex ones */
401 someCollators[0] = Collator::createInstance("en_US", err);
402 someCollators[1] = Collator::createInstance("ko", err);
403 someCollators[2] = Collator::createInstance("ja_JP", err);
404 if(U_FAILURE(err)) {
405 errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err));
406 delete someCollators[0];
407 delete someCollators[1];
408 delete someCollators[2];
409 return;
410 }
411
412 /* change orig & clone & make sure they are independent */
413
414 for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++)
415 {
416 col = someCollators[index]->safeClone();
417 if (col == 0) {
418 errln("SafeClone of collator should not return null\n");
419 break;
420 }
421 col->setStrength(Collator::TERTIARY);
422 someCollators[index]->setStrength(Collator::PRIMARY);
423 col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
424 someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
425
426 doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" ");
427 doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\"");
428 delete col;
429 delete someCollators[index];
430 }
431 }
432
433 void
TestHashCode()434 CollationAPITest::TestHashCode(/* char* par */)
435 {
436 logln("hashCode tests begin.");
437 UErrorCode success = U_ZERO_ERROR;
438 Collator *col1 = 0;
439 col1 = Collator::createInstance(Locale::getEnglish(), success);
440 if (U_FAILURE(success))
441 {
442 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
443 return;
444 }
445
446 Collator *col2 = 0;
447 Locale dk("da", "DK", "");
448 col2 = Collator::createInstance(dk, success);
449 if (U_FAILURE(success))
450 {
451 errln("Danish collation creation failed.");
452 return;
453 }
454
455 Collator *col3 = 0;
456 col3 = Collator::createInstance(Locale::getEnglish(), success);
457 if (U_FAILURE(success))
458 {
459 errln("2nd default collation creation failed.");
460 return;
461 }
462
463 logln("Collator::hashCode() testing ...");
464
465 doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" );
466 doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" );
467 doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" );
468
469 logln("hashCode tests end.");
470 delete col1;
471 delete col2;
472
473 UnicodeString test1("Abcda");
474 UnicodeString test2("abcda");
475
476 CollationKey sortk1, sortk2, sortk3;
477 UErrorCode status = U_ZERO_ERROR;
478
479 col3->getCollationKey(test1, sortk1, status);
480 col3->getCollationKey(test2, sortk2, status);
481 col3->getCollationKey(test2, sortk3, status);
482
483 doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect");
484 doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" );
485
486 delete col3;
487 }
488
489 //----------------------------------------------------------------------------
490 // CollationKey -- Tests the CollationKey methods
491 //
492 void
TestCollationKey()493 CollationAPITest::TestCollationKey(/* char* par */)
494 {
495 logln("testing CollationKey begins...");
496 Collator *col = 0;
497 UErrorCode success=U_ZERO_ERROR;
498 col = Collator::createInstance(Locale::getEnglish(), success);
499 if (U_FAILURE(success))
500 {
501 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
502 return;
503 }
504 col->setStrength(Collator::TERTIARY);
505
506 CollationKey sortk1, sortk2;
507 UnicodeString test1("Abcda"), test2("abcda");
508 UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR;
509
510 logln("Testing weird arguments");
511 // No string vs. empty string vs. completely-ignorable string:
512 // See ICU ticket #10495.
513 CollationKey sortkNone;
514 int32_t length;
515 sortkNone.getByteArray(length);
516 doAssert(!sortkNone.isBogus() && length == 0,
517 "Default-constructed collation key should be empty");
518 CollationKey sortkEmpty;
519 col->getCollationKey(NULL, 0, sortkEmpty, key1Status);
520 // key gets reset here
521 const uint8_t* byteArrayEmpty = sortkEmpty.getByteArray(length);
522 doAssert(sortkEmpty.isBogus() == FALSE && length == 3 &&
523 byteArrayEmpty[0] == 1 && byteArrayEmpty[1] == 1 && byteArrayEmpty[2] == 0,
524 "Empty string should return a collation key with empty levels");
525 doAssert(sortkNone.compareTo(sortkEmpty) == Collator::LESS,
526 "Expected no collation key < collation key for empty string");
527 doAssert(sortkEmpty.compareTo(sortkNone) == Collator::GREATER,
528 "Expected collation key for empty string > no collation key");
529
530 CollationKey sortkIgnorable;
531 // Most control codes and CGJ are completely ignorable.
532 // A string with only completely ignorables must compare equal to an empty string.
533 col->getCollationKey(UnicodeString((UChar)1).append((UChar)0x34f), sortkIgnorable, key1Status);
534 sortkIgnorable.getByteArray(length);
535 doAssert(!sortkIgnorable.isBogus() && length == 3,
536 "Completely ignorable string should return a collation key with empty levels");
537 doAssert(sortkIgnorable.compareTo(sortkEmpty) == Collator::EQUAL,
538 "Completely ignorable string should compare equal to empty string");
539
540 // bogus key returned here
541 key1Status = U_ILLEGAL_ARGUMENT_ERROR;
542 col->getCollationKey(NULL, 0, sortk1, key1Status);
543 doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0,
544 "Error code should return bogus collation key");
545
546 key1Status = U_ZERO_ERROR;
547 logln("Use tertiary comparison level testing ....");
548
549 col->getCollationKey(test1, sortk1, key1Status);
550 if (U_FAILURE(key1Status)) {
551 errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status));
552 return;
553 }
554 doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status)))
555 == Collator::GREATER,
556 "Result should be \"Abcda\" >>> \"abcda\"");
557
558 CollationKey sortk3(sortk2), sortkNew;
559
560 sortkNew = sortk1;
561 doAssert((sortk1 != sortk2), "The sort keys should be different");
562 doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed");
563 doAssert((sortk2 == sortk3), "The sort keys should be the same");
564 doAssert((sortk1 == sortkNew), "The sort keys assignment failed");
565 doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed");
566 doAssert((sortkNew != sortk3), "The sort keys should be different");
567 doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
568 doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\"");
569 doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\"");
570 doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)");
571 doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)");
572 doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
573 doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\"");
574 doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\"");
575 doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)");
576 doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)");
577
578 int32_t cnt1, cnt2, cnt3, cnt4;
579
580 const uint8_t* byteArray1 = sortk1.getByteArray(cnt1);
581 const uint8_t* byteArray2 = sortk2.getByteArray(cnt2);
582
583 const uint8_t* byteArray3 = 0;
584 byteArray3 = sortk1.getByteArray(cnt3);
585
586 const uint8_t* byteArray4 = 0;
587 byteArray4 = sortk2.getByteArray(cnt4);
588
589 CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2);
590 CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4);
591
592 doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed.");
593 doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed.");
594 doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed");
595 doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed.");
596 doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed.");
597 doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed");
598
599 logln("Equality tests : ");
600 doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed.");
601 doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed.");
602 doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed.");
603 doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed.");
604 doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed.");
605 doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed.");
606
607 byteArray1 = 0;
608 byteArray2 = 0;
609
610 sortk3 = sortk1;
611 doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed.");
612 doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed.");
613 logln("testing sortkey ends...");
614
615 col->setStrength(Collator::SECONDARY);
616 doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo(
617 col->getCollationKey(test2, sortk2, key2Status))
618 == Collator::EQUAL,
619 "Result should be \"Abcda\" == \"abcda\"");
620 delete col;
621 }
622
623 //----------------------------------------------------------------------------
624 // Tests the CollatorElementIterator class.
625 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!=
626 //
627 void
TestElemIter()628 CollationAPITest::TestElemIter(/* char* par */)
629 {
630 logln("testing sortkey begins...");
631 Collator *col = 0;
632 UErrorCode success = U_ZERO_ERROR;
633 col = Collator::createInstance(Locale::getEnglish(), success);
634 if (U_FAILURE(success))
635 {
636 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
637 return;
638 }
639
640 UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
641 UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
642 logln("Constructors and comparison testing....");
643 CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
644
645 CharacterIterator *chariter=new StringCharacterIterator(testString1);
646 CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter);
647
648 // copy ctor
649 CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
650 CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2);
651
652 int32_t offset = iterator1->getOffset();
653 if (offset != 0) {
654 errln("Error in getOffset for collation element iterator\n");
655 return;
656 }
657 iterator1->setOffset(6, success);
658 if (U_FAILURE(success)) {
659 errln("Error in setOffset for collation element iterator\n");
660 return;
661 }
662 iterator1->setOffset(0, success);
663 int32_t order1, order2, order3;
664 doAssert((*iterator1 == *iterator2), "The two iterators should be the same");
665 doAssert((*iterator1 != *iterator3), "The two iterators should be different");
666
667 doAssert((*coliter == *iterator1), "The two iterators should be the same");
668 doAssert((*coliter == *iterator2), "The two iterators should be the same");
669 doAssert((*coliter != *iterator3), "The two iterators should be different");
670
671 order1 = iterator1->next(success);
672 if (U_FAILURE(success))
673 {
674 errln("Somehow ran out of memory stepping through the iterator.");
675 return;
676 }
677
678 doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
679 order2 = iterator2->getOffset();
680 doAssert((order1 != order2), "The order result should not be the same");
681 order2 = iterator2->next(success);
682 if (U_FAILURE(success))
683 {
684 errln("Somehow ran out of memory stepping through the iterator.");
685 return;
686 }
687
688 doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
689 doAssert((order1 == order2), "The order result should be the same");
690 order3 = iterator3->next(success);
691 if (U_FAILURE(success))
692 {
693 errln("Somehow ran out of memory stepping through the iterator.");
694 return;
695 }
696
697 doAssert((CollationElementIterator::primaryOrder(order1) ==
698 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
699 doAssert((CollationElementIterator::secondaryOrder(order1) ==
700 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
701 doAssert((CollationElementIterator::tertiaryOrder(order1) ==
702 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
703
704 order1 = iterator1->next(success); order3 = iterator3->next(success);
705 if (U_FAILURE(success))
706 {
707 errln("Somehow ran out of memory stepping through the iterator.");
708 return;
709 }
710
711 doAssert((CollationElementIterator::primaryOrder(order1) ==
712 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
713 doAssert((CollationElementIterator::tertiaryOrder(order1) !=
714 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
715
716 order1 = iterator1->next(success);
717 order3 = iterator3->next(success);
718 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
719 /*
720 doAssert((CollationElementIterator::secondaryOrder(order1) !=
721 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
722 */
723 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
724
725 iterator1->reset(); iterator2->reset(); iterator3->reset();
726 order1 = iterator1->next(success);
727 if (U_FAILURE(success))
728 {
729 errln("Somehow ran out of memory stepping through the iterator.");
730 return;
731 }
732
733 doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
734
735 order2 = iterator2->next(success);
736 if (U_FAILURE(success))
737 {
738 errln("Somehow ran out of memory stepping through the iterator.");
739 return;
740 }
741
742 doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
743 doAssert((order1 == order2), "The order result should be the same");
744
745 order3 = iterator3->next(success);
746 if (U_FAILURE(success))
747 {
748 errln("Somehow ran out of memory stepping through the iterator.");
749 return;
750 }
751
752 doAssert((CollationElementIterator::primaryOrder(order1) ==
753 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
754 doAssert((CollationElementIterator::secondaryOrder(order1) ==
755 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
756 doAssert((CollationElementIterator::tertiaryOrder(order1) ==
757 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
758
759 order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success);
760 if (U_FAILURE(success))
761 {
762 errln("Somehow ran out of memory stepping through the iterator.");
763 return;
764 }
765
766 doAssert((CollationElementIterator::primaryOrder(order1) ==
767 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
768 doAssert((CollationElementIterator::tertiaryOrder(order1) !=
769 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
770
771 order1 = iterator1->next(success); order3 = iterator3->next(success);
772 if (U_FAILURE(success))
773 {
774 errln("Somehow ran out of memory stepping through the iterator.");
775 return;
776 }
777
778 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
779 /*
780 doAssert((CollationElementIterator::secondaryOrder(order1) !=
781 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
782 */
783 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
784 doAssert((*iterator2 != *iterator3), "The iterators should be different");
785
786
787 //test error values
788 success=U_UNSUPPORTED_ERROR;
789 Collator *colerror=NULL;
790 colerror=Collator::createInstance(Locale::getEnglish(), success);
791 if (colerror != 0 || success == U_ZERO_ERROR){
792 errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n");
793 }
794 int32_t position=coliter->previous(success);
795 if(position != CollationElementIterator::NULLORDER){
796 errln((UnicodeString)"Expected NULLORDER got" + position);
797 }
798 coliter->reset();
799 coliter->setText(*chariter, success);
800 if(!U_FAILURE(success)){
801 errln("Expeceted error");
802 }
803 iterator1->setText((UnicodeString)"hello there", success);
804 if(!U_FAILURE(success)){
805 errln("Expeceted error");
806 }
807
808 delete chariter;
809 delete coliter;
810 delete iterator1;
811 delete iterator2;
812 delete iterator3;
813 delete col;
814
815
816
817 logln("testing CollationElementIterator ends...");
818 }
819
820 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules
821 void
TestOperators()822 CollationAPITest::TestOperators(/* char* par */)
823 {
824 UErrorCode success = U_ZERO_ERROR;
825 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
826 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
827 RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success);
828 if (U_FAILURE(success)) {
829 errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success));
830 return;
831 }
832 success = U_ZERO_ERROR;
833 RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success);
834 if (U_FAILURE(success)) {
835 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set.");
836 return;
837 }
838 logln("The operator tests begin : ");
839 logln("testing operator==, operator!=, clone methods ...");
840 doAssert((*col1 != *col2), "The two different table collations compared equal");
841 *col1 = *col2;
842 doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)");
843
844 success = U_ZERO_ERROR;
845 Collator *col3 = Collator::createInstance(Locale::getEnglish(), success);
846 if (U_FAILURE(success)) {
847 errln("Default collation creation failed.");
848 return;
849 }
850 doAssert((*col1 != *col3), "The two different table collations compared equal");
851 Collator* col4 = col1->clone();
852 Collator* col5 = col3->clone();
853 doAssert((*col1 == *col4), "Cloned collation objects not equal");
854 doAssert((*col3 != *col4), "Two different table collations compared equal");
855 doAssert((*col3 == *col5), "Cloned collation objects not equal");
856 doAssert((*col4 != *col5), "Two cloned collations compared equal");
857
858 const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules();
859 RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success);
860 if (U_FAILURE(success)) {
861 errln("Creating default collation with rules failed.");
862 return;
863 }
864 doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed");
865
866 success = U_ZERO_ERROR;
867 RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success);
868 if (U_FAILURE(success)) {
869 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength.");
870 return;
871 }
872 success = U_ZERO_ERROR;
873 RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success);
874 if (U_FAILURE(success)) {
875 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP.");
876 return;
877 }
878 success = U_ZERO_ERROR;
879 RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success);
880 if (U_FAILURE(success)) {
881 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP.");
882 return;
883 }
884 // doAssert((*col7 == *col8), "The two equal table collations compared different");
885 doAssert((*col7 != *col9), "The two different table collations compared equal");
886 doAssert((*col8 != *col9), "The two different table collations compared equal");
887
888 logln("operator tests ended.");
889 delete col1;
890 delete col2;
891 delete col3;
892 delete col4;
893 delete col5;
894 delete col6;
895 delete col7;
896 delete col8;
897 delete col9;
898 }
899
900 // test clone and copy
901 void
TestDuplicate()902 CollationAPITest::TestDuplicate(/* char* par */)
903 {
904 UErrorCode status = U_ZERO_ERROR;
905 Collator *col1 = Collator::createInstance(Locale::getEnglish(), status);
906 if (U_FAILURE(status)) {
907 logln("Default collator creation failed.");
908 return;
909 }
910 Collator *col2 = col1->clone();
911 doAssert((*col1 == *col2), "Cloned object is not equal to the orginal");
912 UnicodeString ruleset("&9 < a, A < b, B < c, C < d, D, e, E");
913 RuleBasedCollator *col3 = new RuleBasedCollator(ruleset, status);
914 if (U_FAILURE(status)) {
915 logln("Collation tailoring failed.");
916 return;
917 }
918 doAssert((*col1 != *col3), "Cloned object is equal to some dummy");
919 *col3 = *((RuleBasedCollator*)col1);
920 doAssert((*col1 == *col3), "Copied object is not equal to the orginal");
921
922 UCollationResult res;
923 UnicodeString first((UChar)0x0061);
924 UnicodeString second((UChar)0x0062);
925 UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules());
926
927 delete col1;
928
929 // Try using the cloned collators after deleting the original data
930 res = col2->compare(first, second, status);
931 if(res != UCOL_LESS) {
932 errln("a should be less then b after tailoring");
933 }
934 if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) {
935 errln(UnicodeString("English rule difference. ")
936 + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules());
937 }
938 res = col3->compare(first, second, status);
939 if(res != UCOL_LESS) {
940 errln("a should be less then b after tailoring");
941 }
942 if (col3->getRules() != copiedEnglishRules) {
943 errln(UnicodeString("English rule difference. ")
944 + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules());
945 }
946
947 delete col2;
948 delete col3;
949 }
950
951 void
TestCompare()952 CollationAPITest::TestCompare(/* char* par */)
953 {
954 logln("The compare tests begin : ");
955 Collator *col = 0;
956 UErrorCode success = U_ZERO_ERROR;
957 col = Collator::createInstance(Locale::getEnglish(), success);
958 if (U_FAILURE(success)) {
959 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
960 return;
961 }
962 UnicodeString test1("Abcda"), test2("abcda");
963 logln("Use tertiary comparison level testing ....");
964
965 doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\"");
966 doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
967 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
968
969 col->setStrength(Collator::SECONDARY);
970 logln("Use secondary comparison level testing ....");
971
972 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
973 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
974 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
975
976 col->setStrength(Collator::PRIMARY);
977 logln("Use primary comparison level testing ....");
978
979 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
980 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
981 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
982
983 // Test different APIs
984 const UChar* t1 = test1.getBuffer();
985 int32_t t1Len = test1.length();
986 const UChar* t2 = test2.getBuffer();
987 int32_t t2Len = test2.length();
988
989 doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem");
990 doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem");
991 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem");
992 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem");
993 doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem");
994 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem");
995
996 col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success);
997 doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem");
998 doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem");
999 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem");
1000 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem");
1001 doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem");
1002 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem");
1003
1004
1005
1006 logln("The compare tests end.");
1007 delete col;
1008 }
1009
1010 void
TestGetAll()1011 CollationAPITest::TestGetAll(/* char* par */)
1012 {
1013 if (logKnownIssue("10774","Side effects from utility/LocaleTest/TestGetLocale")) {
1014 return;
1015 }
1016 int32_t count1, count2;
1017 UErrorCode status = U_ZERO_ERROR;
1018
1019 logln("Trying Collator::getAvailableLocales(int&)");
1020
1021 const Locale* list = Collator::getAvailableLocales(count1);
1022 for (int32_t i = 0; i < count1; ++i) {
1023 UnicodeString dispName;
1024 logln(UnicodeString("Locale name: ")
1025 + UnicodeString(list[i].getName())
1026 + UnicodeString(" , the display name is : ")
1027 + UnicodeString(list[i].getDisplayName(dispName)));
1028 }
1029
1030 if (count1 == 0 || list == NULL) {
1031 dataerrln("getAvailableLocales(int&) returned an empty list");
1032 }
1033
1034 logln("Trying Collator::getAvailableLocales()");
1035 StringEnumeration* localeEnum = Collator::getAvailableLocales();
1036 const UnicodeString* locStr;
1037 const char *locCStr;
1038 count2 = 0;
1039
1040 if (localeEnum == NULL) {
1041 dataerrln("getAvailableLocales() returned NULL");
1042 return;
1043 }
1044
1045 while ((locStr = localeEnum->snext(status)) != NULL)
1046 {
1047 logln(UnicodeString("Locale name is: ") + *locStr);
1048 count2++;
1049 }
1050 if (count1 != count2) {
1051 errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2);
1052 }
1053
1054 logln("Trying Collator::getAvailableLocales() clone");
1055 count1 = 0;
1056 StringEnumeration* localeEnum2 = localeEnum->clone();
1057 localeEnum2->reset(status);
1058 while ((locCStr = localeEnum2->next(NULL, status)) != NULL)
1059 {
1060 logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr));
1061 count1++;
1062 }
1063 if (count1 != count2) {
1064 errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2);
1065 }
1066 if (localeEnum->count(status) != count1) {
1067 errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1);
1068 }
1069 delete localeEnum;
1070 delete localeEnum2;
1071 }
1072
TestSortKey()1073 void CollationAPITest::TestSortKey()
1074 {
1075 UErrorCode status = U_ZERO_ERROR;
1076 /*
1077 this is supposed to open default date format, but later on it treats
1078 it like it is "en_US"
1079 - very bad if you try to run the tests on machine where default
1080 locale is NOT "en_US"
1081 */
1082 Collator *col = Collator::createInstance(Locale::getEnglish(), status);
1083 if (U_FAILURE(status)) {
1084 errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status));
1085 return;
1086 }
1087
1088 if (col->getStrength() != Collator::TERTIARY)
1089 {
1090 errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1091 }
1092
1093 /* Need to use identical strength */
1094 col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status);
1095
1096 UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
1097 test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
1098 test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
1099
1100 uint8_t sortkey1[64];
1101 uint8_t sortkey2[64];
1102 uint8_t sortkey3[64];
1103
1104 logln("Use tertiary comparison level testing ....\n");
1105
1106 CollationKey key1;
1107 col->getCollationKey(test1, u_strlen(test1), key1, status);
1108
1109 CollationKey key2;
1110 col->getCollationKey(test2, u_strlen(test2), key2, status);
1111
1112 CollationKey key3;
1113 col->getCollationKey(test3, u_strlen(test3), key3, status);
1114
1115 doAssert(key1.compareTo(key2) == Collator::GREATER,
1116 "Result should be \"Abcda\" > \"abcda\"");
1117 doAssert(key2.compareTo(key1) == Collator::LESS,
1118 "Result should be \"abcda\" < \"Abcda\"");
1119 doAssert(key2.compareTo(key3) == Collator::EQUAL,
1120 "Result should be \"abcda\" == \"abcda\"");
1121
1122 // Clone the key2 sortkey for later.
1123 int32_t keylength = 0;
1124 const uint8_t *key2primary_alias = key2.getByteArray(keylength);
1125 LocalArray<uint8_t> key2primary(new uint8_t[keylength]);
1126 memcpy(key2primary.getAlias(), key2primary_alias, keylength);
1127
1128 col->getSortKey(test1, sortkey1, 64);
1129 col->getSortKey(test2, sortkey2, 64);
1130 col->getSortKey(test3, sortkey3, 64);
1131
1132 const uint8_t *tempkey = key1.getByteArray(keylength);
1133 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1134 "Test1 string should have the same collation key and sort key");
1135 tempkey = key2.getByteArray(keylength);
1136 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1137 "Test2 string should have the same collation key and sort key");
1138 tempkey = key3.getByteArray(keylength);
1139 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1140 "Test3 string should have the same collation key and sort key");
1141
1142 col->getSortKey(test1, 5, sortkey1, 64);
1143 col->getSortKey(test2, 5, sortkey2, 64);
1144 col->getSortKey(test3, 5, sortkey3, 64);
1145
1146 tempkey = key1.getByteArray(keylength);
1147 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1148 "Test1 string should have the same collation key and sort key");
1149 tempkey = key2.getByteArray(keylength);
1150 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1151 "Test2 string should have the same collation key and sort key");
1152 tempkey = key3.getByteArray(keylength);
1153 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1154 "Test3 string should have the same collation key and sort key");
1155
1156 UnicodeString strtest1(test1);
1157 col->getSortKey(strtest1, sortkey1, 64);
1158 UnicodeString strtest2(test2);
1159 col->getSortKey(strtest2, sortkey2, 64);
1160 UnicodeString strtest3(test3);
1161 col->getSortKey(strtest3, sortkey3, 64);
1162
1163 tempkey = key1.getByteArray(keylength);
1164 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1165 "Test1 string should have the same collation key and sort key");
1166 tempkey = key2.getByteArray(keylength);
1167 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1168 "Test2 string should have the same collation key and sort key");
1169 tempkey = key3.getByteArray(keylength);
1170 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1171 "Test3 string should have the same collation key and sort key");
1172
1173 logln("Use secondary comparision level testing ...\n");
1174 col->setStrength(Collator::SECONDARY);
1175
1176 col->getCollationKey(test1, u_strlen(test1), key1, status);
1177 col->getCollationKey(test2, u_strlen(test2), key2, status);
1178 col->getCollationKey(test3, u_strlen(test3), key3, status);
1179
1180 doAssert(key1.compareTo(key2) == Collator::EQUAL,
1181 "Result should be \"Abcda\" == \"abcda\"");
1182 doAssert(key2.compareTo(key3) == Collator::EQUAL,
1183 "Result should be \"abcda\" == \"abcda\"");
1184
1185 tempkey = key2.getByteArray(keylength);
1186 doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0,
1187 "Binary format for 'abcda' sortkey different for secondary strength!");
1188
1189 col->getSortKey(test1, sortkey1, 64);
1190 col->getSortKey(test2, sortkey2, 64);
1191 col->getSortKey(test3, sortkey3, 64);
1192
1193 tempkey = key1.getByteArray(keylength);
1194 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1195 "Test1 string should have the same collation key and sort key");
1196 tempkey = key2.getByteArray(keylength);
1197 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1198 "Test2 string should have the same collation key and sort key");
1199 tempkey = key3.getByteArray(keylength);
1200 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1201 "Test3 string should have the same collation key and sort key");
1202
1203 col->getSortKey(test1, 5, sortkey1, 64);
1204 col->getSortKey(test2, 5, sortkey2, 64);
1205 col->getSortKey(test3, 5, sortkey3, 64);
1206
1207 tempkey = key1.getByteArray(keylength);
1208 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1209 "Test1 string should have the same collation key and sort key");
1210 tempkey = key2.getByteArray(keylength);
1211 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1212 "Test2 string should have the same collation key and sort key");
1213 tempkey = key3.getByteArray(keylength);
1214 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1215 "Test3 string should have the same collation key and sort key");
1216
1217 col->getSortKey(strtest1, sortkey1, 64);
1218 col->getSortKey(strtest2, sortkey2, 64);
1219 col->getSortKey(strtest3, sortkey3, 64);
1220
1221 tempkey = key1.getByteArray(keylength);
1222 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1223 "Test1 string should have the same collation key and sort key");
1224 tempkey = key2.getByteArray(keylength);
1225 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1226 "Test2 string should have the same collation key and sort key");
1227 tempkey = key3.getByteArray(keylength);
1228 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1229 "Test3 string should have the same collation key and sort key");
1230
1231 logln("testing sortkey ends...");
1232 delete col;
1233 }
1234
TestSortKeyOverflow()1235 void CollationAPITest::TestSortKeyOverflow() {
1236 IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()");
1237 LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
1238 if (errorCode.logDataIfFailureAndReset("Collator::createInstance(English) failed")) {
1239 return;
1240 }
1241 col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
1242 UChar i_and_phi[] = { 0x438, 0x3c6 }; // Cyrillic small i & Greek small phi.
1243 // The sort key should be 6 bytes:
1244 // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator,
1245 // 2 bytes for the Greek phi, and 1 byte for the NUL terminator.
1246 uint8_t sortKey[12];
1247 int32_t length = col->getSortKey(i_and_phi, 2, sortKey, LENGTHOF(sortKey));
1248 uint8_t sortKey2[12];
1249 for (int32_t capacity = 0; capacity < length; ++capacity) {
1250 uprv_memset(sortKey2, 2, LENGTHOF(sortKey2));
1251 int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity);
1252 if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) {
1253 errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity);
1254 } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) {
1255 errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity);
1256 }
1257 }
1258
1259 // Now try to break getCollationKey().
1260 // Internally, it always starts with a large stack buffer.
1261 // Since we cannot control the initial capacity, we throw an increasing number
1262 // of characters at it, with the problematic part at the end.
1263 const int32_t longCapacity = 2000;
1264 // Each 'a' in the prefix should result in one primary sort key byte.
1265 // For i_and_phi we expect 6 bytes, then the NUL terminator.
1266 const int32_t maxPrefixLength = longCapacity - 6 - 1;
1267 LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]);
1268 UnicodeString s(FALSE, i_and_phi, 2);
1269 for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) {
1270 length = col->getSortKey(s, longSortKey.getAlias(), longCapacity);
1271 CollationKey collKey;
1272 col->getCollationKey(s, collKey, errorCode);
1273 int32_t collKeyLength;
1274 const uint8_t *collSortKey = collKey.getByteArray(collKeyLength);
1275 if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) {
1276 errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength);
1277 }
1278
1279 // Insert an 'a' to match ++prefixLength.
1280 s.insert(prefixLength, (UChar)0x61);
1281 }
1282 }
1283
TestMaxExpansion()1284 void CollationAPITest::TestMaxExpansion()
1285 {
1286 UErrorCode status = U_ZERO_ERROR;
1287 UChar ch = 0;
1288 UChar32 unassigned = 0xEFFFD;
1289 uint32_t sorder = 0;
1290 uint32_t temporder = 0;
1291
1292 UnicodeString rule("&a < ab < c/aba < d < z < ch");
1293 RuleBasedCollator coll(rule, status);
1294 if(U_FAILURE(status)) {
1295 errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1296 return;
1297 }
1298 UnicodeString str(ch);
1299 CollationElementIterator *iter =
1300 coll.createCollationElementIterator(str);
1301
1302 while (ch < 0xFFFF && U_SUCCESS(status)) {
1303 int count = 1;
1304 uint32_t order;
1305 int32_t size = 0;
1306
1307 ch ++;
1308
1309 str.setCharAt(0, ch);
1310 iter->setText(str, status);
1311 order = iter->previous(status);
1312
1313 /* thai management */
1314 if (order == 0)
1315 order = iter->previous(status);
1316
1317 while (U_SUCCESS(status) && iter->previous(status) != CollationElementIterator::NULLORDER) {
1318 count ++;
1319 }
1320
1321 size = coll.getMaxExpansion(order);
1322 if (U_FAILURE(status) || size < count) {
1323 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1324 ch, size, count);
1325 }
1326 }
1327
1328 /* testing for exact max expansion */
1329 int32_t size;
1330 ch = 0;
1331 while (ch < 0x61) {
1332 uint32_t order;
1333 str.setCharAt(0, ch);
1334 iter->setText(str, status);
1335 order = iter->previous(status);
1336 size = coll.getMaxExpansion(order);
1337 if (U_FAILURE(status) || size != 1) {
1338 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1339 ch, size, 1);
1340 }
1341 ch ++;
1342 }
1343
1344 ch = 0x63;
1345 str.setTo(ch);
1346 iter->setText(str, status);
1347 temporder = iter->previous(status);
1348 size = coll.getMaxExpansion(temporder);
1349 if (U_FAILURE(status) || size != 3) {
1350 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1351 ch, temporder, size, 3);
1352 }
1353
1354 ch = 0x64;
1355 str.setTo(ch);
1356 iter->setText(str, status);
1357 temporder = iter->previous(status);
1358 size = coll.getMaxExpansion(temporder);
1359 if (U_FAILURE(status) || size != 1) {
1360 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1361 ch, temporder, size, 1);
1362 }
1363
1364 str.setTo(unassigned);
1365 iter->setText(str, status);
1366 sorder = iter->previous(status);
1367 size = coll.getMaxExpansion(sorder);
1368 if (U_FAILURE(status) || size != 2) {
1369 errln("Failure at supplementary codepoints, maximum expansion count %d < %d",
1370 size, 2);
1371 }
1372
1373 /* testing jamo */
1374 ch = 0x1165;
1375 str.setTo(ch);
1376 iter->setText(str, status);
1377 temporder = iter->previous(status);
1378 size = coll.getMaxExpansion(temporder);
1379 if (U_FAILURE(status) || size > 3) {
1380 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1381 ch, size, 3);
1382 }
1383
1384 delete iter;
1385
1386 /* testing special jamo &a<\u1160 */
1387 rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071");
1388
1389 RuleBasedCollator jamocoll(rule, status);
1390 iter = jamocoll.createCollationElementIterator(str);
1391 temporder = iter->previous(status);
1392 size = iter->getMaxExpansion(temporder);
1393 if (U_FAILURE(status) || size != 6) {
1394 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1395 ch, size, 5);
1396 }
1397
1398 delete iter;
1399 }
1400
TestDisplayName()1401 void CollationAPITest::TestDisplayName()
1402 {
1403 UErrorCode error = U_ZERO_ERROR;
1404 Collator *coll = Collator::createInstance("en_US", error);
1405 if (U_FAILURE(error)) {
1406 errcheckln(error, "Failure creating english collator - %s", u_errorName(error));
1407 return;
1408 }
1409 UnicodeString name;
1410 UnicodeString result;
1411 coll->getDisplayName(Locale::getCanadaFrench(), result);
1412 Locale::getCanadaFrench().getDisplayName(name);
1413 if (result.compare(name)) {
1414 errln("Failure getting the correct name for locale en_US");
1415 }
1416
1417 coll->getDisplayName(Locale::getSimplifiedChinese(), result);
1418 Locale::getSimplifiedChinese().getDisplayName(name);
1419 if (result.compare(name)) {
1420 errln("Failure getting the correct name for locale zh_SG");
1421 }
1422 delete coll;
1423 }
1424
TestAttribute()1425 void CollationAPITest::TestAttribute()
1426 {
1427 UErrorCode error = U_ZERO_ERROR;
1428 Collator *coll = Collator::createInstance(error);
1429
1430 if (U_FAILURE(error)) {
1431 errcheckln(error, "Creation of default collator failed - %s", u_errorName(error));
1432 return;
1433 }
1434
1435 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error);
1436 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF ||
1437 U_FAILURE(error)) {
1438 errln("Setting and retrieving of the french collation failed");
1439 }
1440
1441 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error);
1442 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON ||
1443 U_FAILURE(error)) {
1444 errln("Setting and retrieving of the french collation failed");
1445 }
1446
1447 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error);
1448 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED ||
1449 U_FAILURE(error)) {
1450 errln("Setting and retrieving of the alternate handling failed");
1451 }
1452
1453 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error);
1454 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE ||
1455 U_FAILURE(error)) {
1456 errln("Setting and retrieving of the alternate handling failed");
1457 }
1458
1459 coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error);
1460 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST ||
1461 U_FAILURE(error)) {
1462 errln("Setting and retrieving of the case first attribute failed");
1463 }
1464
1465 coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error);
1466 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST ||
1467 U_FAILURE(error)) {
1468 errln("Setting and retrieving of the case first attribute failed");
1469 }
1470
1471 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error);
1472 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON ||
1473 U_FAILURE(error)) {
1474 errln("Setting and retrieving of the case level attribute failed");
1475 }
1476
1477 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error);
1478 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF ||
1479 U_FAILURE(error)) {
1480 errln("Setting and retrieving of the case level attribute failed");
1481 }
1482
1483 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error);
1484 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON ||
1485 U_FAILURE(error)) {
1486 errln("Setting and retrieving of the normalization on/off attribute failed");
1487 }
1488
1489 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error);
1490 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF ||
1491 U_FAILURE(error)) {
1492 errln("Setting and retrieving of the normalization on/off attribute failed");
1493 }
1494
1495 coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error);
1496 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY ||
1497 U_FAILURE(error)) {
1498 errln("Setting and retrieving of the collation strength failed");
1499 }
1500
1501 coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error);
1502 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY ||
1503 U_FAILURE(error)) {
1504 errln("Setting and retrieving of the collation strength failed");
1505 }
1506
1507 coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error);
1508 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY ||
1509 U_FAILURE(error)) {
1510 errln("Setting and retrieving of the collation strength failed");
1511 }
1512
1513 coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error);
1514 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY ||
1515 U_FAILURE(error)) {
1516 errln("Setting and retrieving of the collation strength failed");
1517 }
1518
1519 coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error);
1520 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL ||
1521 U_FAILURE(error)) {
1522 errln("Setting and retrieving of the collation strength failed");
1523 }
1524
1525 delete coll;
1526 }
1527
TestVariableTopSetting()1528 void CollationAPITest::TestVariableTopSetting() {
1529 UErrorCode status = U_ZERO_ERROR;
1530
1531 UChar vt[256] = { 0 };
1532
1533 // Use the root collator, not the default collator.
1534 // This test fails with en_US_POSIX which tailors the dollar sign after 'A'.
1535 Collator *coll = Collator::createInstance(Locale::getRoot(), status);
1536 if(U_FAILURE(status)) {
1537 delete coll;
1538 errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1539 return;
1540 }
1541
1542 uint32_t oldVarTop = coll->getVariableTop(status);
1543
1544 // ICU 53+: The character must be in a supported reordering group,
1545 // and the variable top is pinned to the end of that group.
1546 vt[0] = 0x0041;
1547
1548 (void)coll->setVariableTop(vt, 1, status);
1549 if(status != U_ILLEGAL_ARGUMENT_ERROR) {
1550 errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status));
1551 }
1552
1553 status = U_ZERO_ERROR;
1554 vt[0] = 0x24; // dollar sign (currency symbol)
1555 uint32_t newVarTop = coll->setVariableTop(vt, 1, status);
1556
1557 if(newVarTop != coll->getVariableTop(status)) {
1558 errln("setVariableTop(dollar sign) != following getVariableTop()");
1559 }
1560
1561 UnicodeString dollar((UChar)0x24);
1562 UnicodeString euro((UChar)0x20AC);
1563 uint32_t newVarTop2 = coll->setVariableTop(euro, status);
1564 assertEquals("setVariableTop(Euro sign) == following getVariableTop()",
1565 (int64_t)newVarTop2, (int64_t)coll->getVariableTop(status));
1566 assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)",
1567 (int64_t)newVarTop2, (int64_t)newVarTop);
1568
1569 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1570 assertEquals("empty==dollar", UCOL_EQUAL, coll->compare(UnicodeString(), dollar));
1571 assertEquals("empty==euro", UCOL_EQUAL, coll->compare(UnicodeString(), euro));
1572 assertEquals("dollar<zero", UCOL_LESS, coll->compare(dollar, UnicodeString((UChar)0x30)));
1573
1574 coll->setVariableTop(oldVarTop, status);
1575
1576 uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status);
1577
1578 if(newVarTop != newerVarTop) {
1579 errln("Didn't set vartop properly from UnicodeString!\n");
1580 }
1581
1582 delete coll;
1583
1584 }
1585
TestMaxVariable()1586 void CollationAPITest::TestMaxVariable() {
1587 UErrorCode errorCode = U_ZERO_ERROR;
1588 LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), errorCode));
1589 if(U_FAILURE(errorCode)) {
1590 errcheckln(errorCode, "Collator creation failed with error %s", u_errorName(errorCode));
1591 return;
1592 }
1593
1594 (void)coll->setMaxVariable(UCOL_REORDER_CODE_OTHERS, errorCode);
1595 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
1596 errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode));
1597 }
1598
1599 errorCode = U_ZERO_ERROR;
1600 (void)coll->setMaxVariable(UCOL_REORDER_CODE_CURRENCY, errorCode);
1601
1602 if(UCOL_REORDER_CODE_CURRENCY != coll->getMaxVariable()) {
1603 errln("setMaxVariable(currency) != following getMaxVariable()");
1604 }
1605
1606 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, errorCode);
1607 assertEquals("empty==dollar", UCOL_EQUAL, coll->compare(UnicodeString(), UnicodeString((UChar)0x24)));
1608 assertEquals("empty==euro", UCOL_EQUAL, coll->compare(UnicodeString(), UnicodeString((UChar)0x20AC)));
1609 assertEquals("dollar<zero", UCOL_LESS, coll->compare(UnicodeString((UChar)0x24), UnicodeString((UChar)0x30)));
1610 }
1611
TestGetLocale()1612 void CollationAPITest::TestGetLocale() {
1613 UErrorCode status = U_ZERO_ERROR;
1614 const char *rules = "&a<x<y<z";
1615 UChar rlz[256] = {0};
1616
1617 Collator *coll = Collator::createInstance("root", status);
1618 if(U_FAILURE(status)) {
1619 dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status));
1620 return;
1621 }
1622 Locale locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1623 if(locale != Locale::getRoot()) {
1624 errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); "
1625 "getLocale().getName() = \"%s\"",
1626 locale.getName());
1627 }
1628 delete coll;
1629
1630 coll = Collator::createInstance("", status);
1631 if(U_FAILURE(status)) {
1632 dataerrln("Failed to open collator for \"\" with %s", u_errorName(status));
1633 return;
1634 }
1635 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1636 if(locale != Locale::getRoot()) {
1637 errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); "
1638 "getLocale().getName() = \"%s\"",
1639 locale.getName());
1640 }
1641 delete coll;
1642
1643 int32_t i = 0;
1644
1645 static const struct {
1646 const char* requestedLocale;
1647 const char* validLocale;
1648 const char* actualLocale;
1649 } testStruct[] = {
1650 // Note: Locale::getRoot().getName() == "" not "root".
1651 { "de_DE", "de", "" },
1652 { "sr_RS", "sr_Cyrl_RS", "sr" },
1653 { "en_US_CALIFORNIA", "en_US", "" },
1654 { "fr_FR_NONEXISTANT", "fr", "" },
1655 // pinyin is the default, therefore suppressed.
1656 { "zh_CN", "zh_Hans_CN", "zh" },
1657 // zh_Hant has default=stroke but the data is in zh.
1658 { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" },
1659 { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" },
1660 { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" }
1661 };
1662
1663 u_unescape(rules, rlz, 256);
1664
1665 /* test opening collators for different locales */
1666 for(i = 0; i<(int32_t)LENGTHOF(testStruct); i++) {
1667 status = U_ZERO_ERROR;
1668 coll = Collator::createInstance(testStruct[i].requestedLocale, status);
1669 if(U_FAILURE(status)) {
1670 errln("Failed to open collator for %s with %s", testStruct[i].requestedLocale, u_errorName(status));
1671 delete coll;
1672 continue;
1673 }
1674 // The requested locale may be the same as the valid locale,
1675 // or may not be supported at all. See ticket #10477.
1676 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1677 if(locale != testStruct[i].requestedLocale && locale != testStruct[i].validLocale) {
1678 errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s",
1679 testStruct[i].requestedLocale,
1680 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1681 }
1682 locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1683 if(locale != testStruct[i].validLocale) {
1684 errln("[Coll %s]: Error in valid locale, expected %s, got %s",
1685 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1686 }
1687 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1688 if(locale != testStruct[i].actualLocale) {
1689 errln("[Coll %s]: Error in actual locale, expected %s, got %s",
1690 testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName());
1691 }
1692 // If we open a collator for the actual locale, we should get an equivalent one again.
1693 LocalPointer<Collator> coll2(Collator::createInstance(locale, status));
1694 if(U_FAILURE(status)) {
1695 errln("Failed to open collator for actual locale \"%s\" with %s",
1696 locale.getName(), u_errorName(status));
1697 } else {
1698 Locale actual2 = coll2->getLocale(ULOC_ACTUAL_LOCALE, status);
1699 if(actual2 != locale) {
1700 errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"",
1701 locale.getName(), actual2.getName());
1702 }
1703 if(*coll2 != *coll) {
1704 errln("[Coll actual \"%s\"]: Got different collator than before", locale.getName());
1705 }
1706 }
1707 delete coll;
1708 }
1709
1710 /* completely non-existant locale for collator should get a default collator */
1711 {
1712 Collator *defaultColl = Collator::createInstance((const Locale)NULL, status);
1713 coll = Collator::createInstance("blahaha", status);
1714 if(U_FAILURE(status)) {
1715 errln("Failed to open collator with %s", u_errorName(status));
1716 delete coll;
1717 delete defaultColl;
1718 return;
1719 }
1720 if(coll->getLocale(ULOC_VALID_LOCALE, status) !=
1721 defaultColl->getLocale(ULOC_VALID_LOCALE, status)) {
1722 errln("Valid locale for nonexisting locale locale collator differs "
1723 "from valid locale for default collator");
1724 }
1725 if(coll->getLocale(ULOC_ACTUAL_LOCALE, status) !=
1726 defaultColl->getLocale(ULOC_ACTUAL_LOCALE, status)) {
1727 errln("Actual locale for nonexisting locale locale collator differs "
1728 "from actual locale for default collator");
1729 }
1730 delete coll;
1731 delete defaultColl;
1732 }
1733
1734
1735
1736 /* collator instantiated from rules should have all three locales NULL */
1737 coll = new RuleBasedCollator(rlz, status);
1738 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1739 if(!locale.isBogus()) {
1740 errln("For collator instantiated from rules, requested locale %s is not bogus", locale.getName());
1741 }
1742 locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1743 if(!locale.isBogus()) {
1744 errln("For collator instantiated from rules, valid locale %s is not bogus", locale.getName());
1745 }
1746 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1747 if(!locale.isBogus()) {
1748 errln("For collator instantiated from rules, actual locale %s is not bogus", locale.getName());
1749 }
1750 delete coll;
1751 }
1752
1753 struct teststruct {
1754 const char *original;
1755 uint8_t key[256];
1756 };
1757
1758
1759
1760 U_CDECL_BEGIN
1761 static int U_CALLCONV
compare_teststruct(const void * string1,const void * string2)1762 compare_teststruct(const void *string1, const void *string2) {
1763 return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key));
1764 }
1765 U_CDECL_END
1766
1767
TestBounds(void)1768 void CollationAPITest::TestBounds(void) {
1769 UErrorCode status = U_ZERO_ERROR;
1770
1771 Collator *coll = Collator::createInstance(Locale("sh"), status);
1772 if(U_FAILURE(status)) {
1773 delete coll;
1774 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1775 return;
1776 }
1777
1778 uint8_t sortkey[512], lower[512], upper[512];
1779 UChar buffer[512];
1780
1781 static const char * const test[] = {
1782 "John Smith",
1783 "JOHN SMITH",
1784 "john SMITH",
1785 "j\\u00F6hn sm\\u00EFth",
1786 "J\\u00F6hn Sm\\u00EFth",
1787 "J\\u00D6HN SM\\u00CFTH",
1788 "john smithsonian",
1789 "John Smithsonian"
1790 };
1791
1792 struct teststruct tests[] = {
1793 {"\\u010CAKI MIHALJ", {0}},
1794 {"\\u010CAKI MIHALJ", {0}},
1795 {"\\u010CAKI PIRO\\u0160KA", {0}},
1796 {"\\u010CABAI ANDRIJA", {0}},
1797 {"\\u010CABAI LAJO\\u0160", {0}},
1798 {"\\u010CABAI MARIJA", {0}},
1799 {"\\u010CABAI STEVAN", {0}},
1800 {"\\u010CABAI STEVAN", {0}},
1801 {"\\u010CABARKAPA BRANKO", {0}},
1802 {"\\u010CABARKAPA MILENKO", {0}},
1803 {"\\u010CABARKAPA MIROSLAV", {0}},
1804 {"\\u010CABARKAPA SIMO", {0}},
1805 {"\\u010CABARKAPA STANKO", {0}},
1806 {"\\u010CABARKAPA TAMARA", {0}},
1807 {"\\u010CABARKAPA TOMA\\u0160", {0}},
1808 {"\\u010CABDARI\\u0106 NIKOLA", {0}},
1809 {"\\u010CABDARI\\u0106 ZORICA", {0}},
1810 {"\\u010CABI NANDOR", {0}},
1811 {"\\u010CABOVI\\u0106 MILAN", {0}},
1812 {"\\u010CABRADI AGNEZIJA", {0}},
1813 {"\\u010CABRADI IVAN", {0}},
1814 {"\\u010CABRADI JELENA", {0}},
1815 {"\\u010CABRADI LJUBICA", {0}},
1816 {"\\u010CABRADI STEVAN", {0}},
1817 {"\\u010CABRDA MARTIN", {0}},
1818 {"\\u010CABRILO BOGDAN", {0}},
1819 {"\\u010CABRILO BRANISLAV", {0}},
1820 {"\\u010CABRILO LAZAR", {0}},
1821 {"\\u010CABRILO LJUBICA", {0}},
1822 {"\\u010CABRILO SPASOJA", {0}},
1823 {"\\u010CADE\\u0160 ZDENKA", {0}},
1824 {"\\u010CADESKI BLAGOJE", {0}},
1825 {"\\u010CADOVSKI VLADIMIR", {0}},
1826 {"\\u010CAGLJEVI\\u0106 TOMA", {0}},
1827 {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}},
1828 {"\\u010CAJA VANKA", {0}},
1829 {"\\u010CAJI\\u0106 BOGOLJUB", {0}},
1830 {"\\u010CAJI\\u0106 BORISLAV", {0}},
1831 {"\\u010CAJI\\u0106 RADOSLAV", {0}},
1832 {"\\u010CAK\\u0160IRAN MILADIN", {0}},
1833 {"\\u010CAKAN EUGEN", {0}},
1834 {"\\u010CAKAN EVGENIJE", {0}},
1835 {"\\u010CAKAN IVAN", {0}},
1836 {"\\u010CAKAN JULIJAN", {0}},
1837 {"\\u010CAKAN MIHAJLO", {0}},
1838 {"\\u010CAKAN STEVAN", {0}},
1839 {"\\u010CAKAN VLADIMIR", {0}},
1840 {"\\u010CAKAN VLADIMIR", {0}},
1841 {"\\u010CAKAN VLADIMIR", {0}},
1842 {"\\u010CAKARA ANA", {0}},
1843 {"\\u010CAKAREVI\\u0106 MOMIR", {0}},
1844 {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}},
1845 {"\\u010CAKI \\u0160ANDOR", {0}},
1846 {"\\u010CAKI AMALIJA", {0}},
1847 {"\\u010CAKI ANDRA\\u0160", {0}},
1848 {"\\u010CAKI LADISLAV", {0}},
1849 {"\\u010CAKI LAJO\\u0160", {0}},
1850 {"\\u010CAKI LASLO", {0}}
1851 };
1852
1853
1854
1855 int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0;
1856 int32_t arraySize = sizeof(tests)/sizeof(tests[0]);
1857
1858 (void)lowerSize; // Suppress unused variable warnings.
1859 (void)upperSize;
1860
1861 for(i = 0; i<arraySize; i++) {
1862 buffSize = u_unescape(tests[i].original, buffer, 512);
1863 skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512);
1864 }
1865
1866 qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct);
1867
1868 for(i = 0; i < arraySize-1; i++) {
1869 for(j = i+1; j < arraySize; j++) {
1870 lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status);
1871 upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status);
1872 for(k = i; k <= j; k++) {
1873 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) {
1874 errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original);
1875 }
1876 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) {
1877 errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original);
1878 }
1879 }
1880 }
1881 }
1882
1883
1884 for(i = 0; i<(int32_t)(sizeof(test)/sizeof(test[0])); i++) {
1885 buffSize = u_unescape(test[i], buffer, 512);
1886 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1887 lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1888 upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1889 for(j = i+1; j<(int32_t)(sizeof(test)/sizeof(test[0])); j++) {
1890 buffSize = u_unescape(test[j], buffer, 512);
1891 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1892 if(strcmp((const char *)lower, (const char *)sortkey) > 0) {
1893 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1894 }
1895 if(strcmp((const char *)upper, (const char *)sortkey) <= 0) {
1896 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1897 }
1898 }
1899 }
1900 delete coll;
1901 }
1902
1903
TestGetTailoredSet()1904 void CollationAPITest::TestGetTailoredSet()
1905 {
1906 struct {
1907 const char *rules;
1908 const char *tests[20];
1909 int32_t testsize;
1910 } setTest[] = {
1911 { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1912 { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1913 };
1914
1915 int32_t i = 0, j = 0;
1916 UErrorCode status = U_ZERO_ERROR;
1917
1918 UnicodeString buff;
1919 UnicodeSet *set = NULL;
1920
1921 for(i = 0; i < LENGTHOF(setTest); i++) {
1922 buff = UnicodeString(setTest[i].rules, -1, US_INV).unescape();
1923 RuleBasedCollator coll(buff, status);
1924 if(U_SUCCESS(status)) {
1925 set = coll.getTailoredSet(status);
1926 if(set->size() < setTest[i].testsize) {
1927 errln("Tailored set size smaller (%d) than expected (%d)", set->size(), setTest[i].testsize);
1928 }
1929 for(j = 0; j < setTest[i].testsize; j++) {
1930 buff = UnicodeString(setTest[i].tests[j], -1, US_INV).unescape();
1931 if(!set->contains(buff)) {
1932 errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]);
1933 }
1934 }
1935 delete set;
1936 } else {
1937 errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status));
1938 }
1939 }
1940 }
1941
TestUClassID()1942 void CollationAPITest::TestUClassID()
1943 {
1944 char id = *((char *)RuleBasedCollator::getStaticClassID());
1945 if (id != 0) {
1946 errln("Static class id for RuleBasedCollator should be 0");
1947 }
1948 UErrorCode status = U_ZERO_ERROR;
1949 RuleBasedCollator *coll
1950 = (RuleBasedCollator *)Collator::createInstance(status);
1951 if(U_FAILURE(status)) {
1952 delete coll;
1953 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1954 return;
1955 }
1956 id = *((char *)coll->getDynamicClassID());
1957 if (id != 0) {
1958 errln("Dynamic class id for RuleBasedCollator should be 0");
1959 }
1960 id = *((char *)CollationKey::getStaticClassID());
1961 if (id != 0) {
1962 errln("Static class id for CollationKey should be 0");
1963 }
1964 CollationKey *key = new CollationKey();
1965 id = *((char *)key->getDynamicClassID());
1966 if (id != 0) {
1967 errln("Dynamic class id for CollationKey should be 0");
1968 }
1969 id = *((char *)CollationElementIterator::getStaticClassID());
1970 if (id != 0) {
1971 errln("Static class id for CollationElementIterator should be 0");
1972 }
1973 UnicodeString str("testing");
1974 CollationElementIterator *iter = coll->createCollationElementIterator(str);
1975 id = *((char *)iter->getDynamicClassID());
1976 if (id != 0) {
1977 errln("Dynamic class id for CollationElementIterator should be 0");
1978 }
1979 delete key;
1980 delete iter;
1981 delete coll;
1982 }
1983
1984 class TestCollator : public Collator
1985 {
1986 public:
1987 virtual Collator* clone(void) const;
1988
1989 using Collator::compare;
1990
1991 virtual UCollationResult compare(const UnicodeString& source,
1992 const UnicodeString& target,
1993 UErrorCode& status) const;
1994 virtual UCollationResult compare(const UnicodeString& source,
1995 const UnicodeString& target,
1996 int32_t length,
1997 UErrorCode& status) const;
1998 virtual UCollationResult compare(const UChar* source,
1999 int32_t sourceLength,
2000 const UChar* target,
2001 int32_t targetLength,
2002 UErrorCode& status) const;
2003 virtual CollationKey& getCollationKey(const UnicodeString& source,
2004 CollationKey& key,
2005 UErrorCode& status) const;
2006 virtual CollationKey& getCollationKey(const UChar*source,
2007 int32_t sourceLength,
2008 CollationKey& key,
2009 UErrorCode& status) const;
2010 virtual int32_t hashCode(void) const;
2011 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
2012 virtual ECollationStrength getStrength(void) const;
2013 virtual void setStrength(ECollationStrength newStrength);
2014 virtual UClassID getDynamicClassID(void) const;
2015 virtual void getVersion(UVersionInfo info) const;
2016 virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
2017 UErrorCode &status);
2018 virtual UColAttributeValue getAttribute(UColAttribute attr,
2019 UErrorCode &status) const;
2020 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len,
2021 UErrorCode &status);
2022 virtual uint32_t setVariableTop(const UnicodeString &varTop,
2023 UErrorCode &status);
2024 virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
2025 virtual uint32_t getVariableTop(UErrorCode &status) const;
2026 virtual int32_t getSortKey(const UnicodeString& source,
2027 uint8_t* result,
2028 int32_t resultLength) const;
2029 virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
2030 uint8_t*result, int32_t resultLength) const;
2031 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
2032 virtual UBool operator==(const Collator& other) const;
2033 // Collator::operator!= calls !Collator::operator== which works for all subclasses.
2034 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
TestCollator()2035 TestCollator() : Collator() {};
TestCollator(UCollationStrength collationStrength,UNormalizationMode decompositionMode)2036 TestCollator(UCollationStrength collationStrength,
2037 UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {};
2038 };
2039
operator ==(const Collator & other) const2040 inline UBool TestCollator::operator==(const Collator& other) const {
2041 // TestCollator has no fields, so we test for identity.
2042 return this == &other;
2043
2044 // Normally, subclasses should do something like the following:
2045 // if (this == &other) { return TRUE; }
2046 // if (!Collator::operator==(other)) { return FALSE; } // not the same class
2047 //
2048 // const TestCollator &o = (const TestCollator&)other;
2049 // (compare this vs. o's subclass fields)
2050 }
2051
clone() const2052 Collator* TestCollator::clone() const
2053 {
2054 return new TestCollator();
2055 }
2056
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const2057 UCollationResult TestCollator::compare(const UnicodeString& source,
2058 const UnicodeString& target,
2059 UErrorCode& status) const
2060 {
2061 if(U_SUCCESS(status)) {
2062 return UCollationResult(source.compare(target));
2063 } else {
2064 return UCOL_EQUAL;
2065 }
2066 }
2067
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const2068 UCollationResult TestCollator::compare(const UnicodeString& source,
2069 const UnicodeString& target,
2070 int32_t length,
2071 UErrorCode& status) const
2072 {
2073 if(U_SUCCESS(status)) {
2074 return UCollationResult(source.compare(0, length, target));
2075 } else {
2076 return UCOL_EQUAL;
2077 }
2078 }
2079
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const2080 UCollationResult TestCollator::compare(const UChar* source,
2081 int32_t sourceLength,
2082 const UChar* target,
2083 int32_t targetLength,
2084 UErrorCode& status) const
2085 {
2086 UnicodeString s(source, sourceLength);
2087 UnicodeString t(target, targetLength);
2088 return compare(s, t, status);
2089 }
2090
getCollationKey(const UnicodeString & source,CollationKey & key,UErrorCode & status) const2091 CollationKey& TestCollator::getCollationKey(const UnicodeString& source,
2092 CollationKey& key,
2093 UErrorCode& status) const
2094 {
2095 char temp[100];
2096 int length = 100;
2097 length = source.extract(temp, length, NULL, status);
2098 temp[length] = 0;
2099 CollationKey tempkey((uint8_t*)temp, length);
2100 key = tempkey;
2101 return key;
2102 }
2103
getCollationKey(const UChar * source,int32_t sourceLength,CollationKey & key,UErrorCode & status) const2104 CollationKey& TestCollator::getCollationKey(const UChar*source,
2105 int32_t sourceLength,
2106 CollationKey& key,
2107 UErrorCode& status) const
2108 {
2109 //s tack allocation used since collationkey does not keep the unicodestring
2110 UnicodeString str(source, sourceLength);
2111 return getCollationKey(str, key, status);
2112 }
2113
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const2114 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result,
2115 int32_t resultLength) const
2116 {
2117 UErrorCode status = U_ZERO_ERROR;
2118 int32_t length = source.extract((char *)result, resultLength, NULL,
2119 status);
2120 result[length] = 0;
2121 return length;
2122 }
2123
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const2124 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength,
2125 uint8_t*result, int32_t resultLength) const
2126 {
2127 UnicodeString str(source, sourceLength);
2128 return getSortKey(str, result, resultLength);
2129 }
2130
hashCode() const2131 int32_t TestCollator::hashCode() const
2132 {
2133 return 0;
2134 }
2135
getLocale(ULocDataLocaleType type,UErrorCode & status) const2136 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const
2137 {
2138 // api not used, this is to make the compiler happy
2139 if (U_FAILURE(status)) {
2140 (void)type;
2141 }
2142 return NULL;
2143 }
2144
getStrength() const2145 Collator::ECollationStrength TestCollator::getStrength() const
2146 {
2147 return TERTIARY;
2148 }
2149
setStrength(Collator::ECollationStrength newStrength)2150 void TestCollator::setStrength(Collator::ECollationStrength newStrength)
2151 {
2152 // api not used, this is to make the compiler happy
2153 (void)newStrength;
2154 }
2155
getDynamicClassID(void) const2156 UClassID TestCollator::getDynamicClassID(void) const
2157 {
2158 return 0;
2159 }
2160
getVersion(UVersionInfo info) const2161 void TestCollator::getVersion(UVersionInfo info) const
2162 {
2163 // api not used, this is to make the compiler happy
2164 memset(info, 0, U_MAX_VERSION_LENGTH);
2165 }
2166
setAttribute(UColAttribute,UColAttributeValue,UErrorCode &)2167 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/,
2168 UErrorCode & /*status*/)
2169 {
2170 }
2171
getAttribute(UColAttribute attr,UErrorCode & status) const2172 UColAttributeValue TestCollator::getAttribute(UColAttribute attr,
2173 UErrorCode &status) const
2174 {
2175 // api not used, this is to make the compiler happy
2176 if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) {
2177 return UCOL_OFF;
2178 }
2179 return UCOL_DEFAULT;
2180 }
2181
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)2182 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len,
2183 UErrorCode &status)
2184 {
2185 // api not used, this is to make the compiler happy
2186 if (U_SUCCESS(status) && (varTop == 0 || len < -1)) {
2187 status = U_ILLEGAL_ARGUMENT_ERROR;
2188 }
2189 return 0;
2190 }
2191
setVariableTop(const UnicodeString & varTop,UErrorCode & status)2192 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop,
2193 UErrorCode &status)
2194 {
2195 // api not used, this is to make the compiler happy
2196 if (U_SUCCESS(status) && varTop.length() == 0) {
2197 status = U_ILLEGAL_ARGUMENT_ERROR;
2198 }
2199 return 0;
2200 }
2201
setVariableTop(uint32_t varTop,UErrorCode & status)2202 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status)
2203 {
2204 // api not used, this is to make the compiler happy
2205 if (U_SUCCESS(status) && varTop == 0) {
2206 status = U_ILLEGAL_ARGUMENT_ERROR;
2207 }
2208 }
2209
getVariableTop(UErrorCode & status) const2210 uint32_t TestCollator::getVariableTop(UErrorCode &status) const
2211 {
2212
2213 // api not used, this is to make the compiler happy
2214 if (U_SUCCESS(status)) {
2215 return 0;
2216 }
2217 return (uint32_t)(0xFFFFFFFFu);
2218 }
2219
getTailoredSet(UErrorCode & status) const2220 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const
2221 {
2222 return Collator::getTailoredSet(status);
2223 }
2224
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)2225 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale)
2226 {
2227 Collator::setLocales(requestedLocale, validLocale, actualLocale);
2228 }
2229
2230
TestSubclass()2231 void CollationAPITest::TestSubclass()
2232 {
2233 TestCollator col1;
2234 TestCollator col2;
2235 doAssert(col1 != col2, "2 instances of TestCollator should be different");
2236 if (col1.hashCode() != col2.hashCode()) {
2237 errln("Every TestCollator has the same hashcode");
2238 }
2239 UnicodeString abc("abc", 3);
2240 UnicodeString bcd("bcd", 3);
2241 if (col1.compare(abc, bcd) != abc.compare(bcd)) {
2242 errln("TestCollator compare should be the same as the default "
2243 "string comparison");
2244 }
2245 CollationKey key;
2246 UErrorCode status = U_ZERO_ERROR;
2247 col1.getCollationKey(abc, key, status);
2248 int32_t length = 0;
2249 const char* bytes = (const char *)key.getByteArray(length);
2250 UnicodeString keyarray(bytes, length, NULL, status);
2251 if (abc != keyarray) {
2252 errln("TestCollator collationkey API is returning wrong values");
2253 }
2254
2255 UnicodeSet expectedset(0, 0x10FFFF);
2256 UnicodeSet *defaultset = col1.getTailoredSet(status);
2257 if (!defaultset->containsAll(expectedset)
2258 || !expectedset.containsAll(*defaultset)) {
2259 errln("Error: expected default tailoring to be 0 to 0x10ffff");
2260 }
2261 delete defaultset;
2262
2263 // use base class implementation
2264 Locale loc1 = Locale::getGermany();
2265 Locale loc2 = Locale::getFrance();
2266 col1.setLocales(loc1, loc2, loc2); // default implementation has no effect
2267
2268 UnicodeString displayName;
2269 col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale
2270
2271 TestCollator col3(UCOL_TERTIARY, UNORM_NONE);
2272 UnicodeString a("a");
2273 UnicodeString b("b");
2274 Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b));
2275 if(col1.compare(a, b) != result) {
2276 errln("Collator doesn't give default result");
2277 }
2278 if(col1.compare(a, b, 1) != result) {
2279 errln("Collator doesn't give default result");
2280 }
2281 if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) {
2282 errln("Collator doesn't give default result");
2283 }
2284 }
2285
TestNULLCharTailoring()2286 void CollationAPITest::TestNULLCharTailoring()
2287 {
2288 UErrorCode status = U_ZERO_ERROR;
2289 UChar buf[256] = {0};
2290 int32_t len = u_unescape("&a < '\\u0000'", buf, 256);
2291 UnicodeString first((UChar)0x0061);
2292 UnicodeString second((UChar)0);
2293 RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status);
2294 if(U_FAILURE(status)) {
2295 delete coll;
2296 errcheckln(status, "Failed to open collator - %s", u_errorName(status));
2297 return;
2298 }
2299 UCollationResult res = coll->compare(first, second, status);
2300 if(res != UCOL_LESS) {
2301 errln("a should be less then NULL after tailoring");
2302 }
2303 delete coll;
2304 }
2305
TestClone()2306 void CollationAPITest::TestClone() {
2307 logln("\ninit c0");
2308 UErrorCode status = U_ZERO_ERROR;
2309 RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status);
2310
2311 if (U_FAILURE(status)) {
2312 errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status));
2313 return;
2314 }
2315
2316 c0->setStrength(Collator::TERTIARY);
2317 dump("c0", c0, status);
2318
2319 logln("\ninit c1");
2320 RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status);
2321 c1->setStrength(Collator::TERTIARY);
2322 UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status);
2323 if(val == UCOL_LOWER_FIRST){
2324 c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2325 }else{
2326 c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2327 }
2328 dump("c0", c0, status);
2329 dump("c1", c1, status);
2330
2331 logln("\ninit c2");
2332 RuleBasedCollator* c2 = (RuleBasedCollator*)c1->clone();
2333 val = c2->getAttribute(UCOL_CASE_FIRST, status);
2334 if(val == UCOL_LOWER_FIRST){
2335 c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2336 }else{
2337 c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2338 }
2339 if(U_FAILURE(status)){
2340 errln("set and get attributes of collator failed. %s\n", u_errorName(status));
2341 return;
2342 }
2343 dump("c0", c0, status);
2344 dump("c1", c1, status);
2345 dump("c2", c2, status);
2346 if(*c1 == *c2){
2347 errln("The cloned objects refer to same data");
2348 }
2349 delete c0;
2350 delete c1;
2351 delete c2;
2352 }
2353
TestCloneBinary()2354 void CollationAPITest::TestCloneBinary() {
2355 IcuTestErrorCode errorCode(*this, "TestCloneBinary");
2356 LocalPointer<Collator> root(Collator::createInstance(Locale::getRoot(), errorCode));
2357 LocalPointer<Collator> coll(Collator::createInstance("de@collation=phonebook", errorCode));
2358 if(errorCode.logDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) {
2359 return;
2360 }
2361 RuleBasedCollator *rbRoot = dynamic_cast<RuleBasedCollator *>(root.getAlias());
2362 RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias());
2363 if(rbRoot == NULL || rbc == NULL) {
2364 infoln("root or de@collation=phonebook is not a RuleBasedCollator");
2365 return;
2366 }
2367 rbc->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
2368 UnicodeString uUmlaut((UChar)0xfc);
2369 UnicodeString ue = UNICODE_STRING_SIMPLE("ue");
2370 assertEquals("rbc/primary: u-umlaut==ue", UCOL_EQUAL, rbc->compare(uUmlaut, ue, errorCode));
2371 uint8_t bin[25000];
2372 int32_t binLength = rbc->cloneBinary(bin, LENGTHOF(bin), errorCode);
2373 if(errorCode.logDataIfFailureAndReset("rbc->cloneBinary()")) {
2374 return;
2375 }
2376 logln("rbc->cloneBinary() -> %d bytes", (int)binLength);
2377
2378 RuleBasedCollator rbc2(bin, binLength, rbRoot, errorCode);
2379 if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) {
2380 return;
2381 }
2382 assertEquals("rbc2.strength==primary", UCOL_PRIMARY, rbc2.getAttribute(UCOL_STRENGTH, errorCode));
2383 assertEquals("rbc2: u-umlaut==ue", UCOL_EQUAL, rbc2.compare(uUmlaut, ue, errorCode));
2384 assertTrue("rbc==rbc2", *rbc == rbc2);
2385 uint8_t bin2[25000];
2386 int32_t bin2Length = rbc2.cloneBinary(bin2, LENGTHOF(bin2), errorCode);
2387 assertEquals("len(rbc binary)==len(rbc2 binary)", binLength, bin2Length);
2388 assertTrue("rbc binary==rbc2 binary", binLength == bin2Length && memcmp(bin, bin2, binLength) == 0);
2389 }
2390
TestIterNumeric()2391 void CollationAPITest::TestIterNumeric() {
2392 // Regression test for ticket #9915.
2393 // The collation code sometimes masked the continuation marker away
2394 // but later tested the result for isContinuation().
2395 // This test case failed because the third bytes of the computed numeric-collation primaries
2396 // were permutated with the script reordering table.
2397 // It should have been possible to reproduce this with the root collator
2398 // and characters with appropriate 3-byte primary weights.
2399 // The effectiveness of this test depends completely on the collation elements
2400 // and on the implementation code.
2401 IcuTestErrorCode errorCode(*this, "TestIterNumeric");
2402 RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode);
2403 if(errorCode.logDataIfFailureAndReset("RuleBasedCollator constructor")) {
2404 return;
2405 }
2406 coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode);
2407 UCharIterator iter40, iter72;
2408 uiter_setUTF8(&iter40, "\x34\x30", 2);
2409 uiter_setUTF8(&iter72, "\x37\x32", 2);
2410 UCollationResult result = coll.compare(iter40, iter72, errorCode);
2411 assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result);
2412 }
2413
TestBadKeywords()2414 void CollationAPITest::TestBadKeywords() {
2415 // Test locale IDs with errors.
2416 // Valid locale IDs are tested via data-driven tests.
2417 UErrorCode errorCode = U_ZERO_ERROR;
2418 Locale bogusLocale(Locale::getRoot());
2419 bogusLocale.setToBogus();
2420 LocalPointer<Collator> coll(Collator::createInstance(bogusLocale, errorCode));
2421 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2422 errln("Collator::createInstance(bogus locale) did not fail as expected - %s",
2423 u_errorName(errorCode));
2424 }
2425
2426 // Unknown value.
2427 const char *localeID = "it-u-ks-xyz";
2428 errorCode = U_ZERO_ERROR;
2429 coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2430 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2431 errln("Collator::createInstance(%s) did not fail as expected - %s",
2432 localeID, u_errorName(errorCode));
2433 }
2434
2435 // Unsupported attributes.
2436 localeID = "it@colHiraganaQuaternary=true";
2437 errorCode = U_ZERO_ERROR;
2438 coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2439 if(errorCode != U_UNSUPPORTED_ERROR) {
2440 errln("Collator::createInstance(%s) did not fail as expected - %s",
2441 localeID, u_errorName(errorCode));
2442 }
2443
2444 localeID = "it-u-vt-u24";
2445 errorCode = U_ZERO_ERROR;
2446 coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2447 if(errorCode != U_UNSUPPORTED_ERROR) {
2448 errln("Collator::createInstance(%s) did not fail as expected - %s",
2449 localeID, u_errorName(errorCode));
2450 }
2451 }
2452
dump(UnicodeString msg,RuleBasedCollator * c,UErrorCode & status)2453 void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) {
2454 const char* bigone = "One";
2455 const char* littleone = "one";
2456
2457 logln(msg + " " + c->compare(bigone, littleone) +
2458 " s: " + c->getStrength() +
2459 " u: " + c->getAttribute(UCOL_CASE_FIRST, status));
2460 }
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)2461 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
2462 {
2463 if (exec) logln("TestSuite CollationAPITest: ");
2464 TESTCASE_AUTO_BEGIN;
2465 TESTCASE_AUTO(TestProperty);
2466 TESTCASE_AUTO(TestOperators);
2467 TESTCASE_AUTO(TestDuplicate);
2468 TESTCASE_AUTO(TestCompare);
2469 TESTCASE_AUTO(TestHashCode);
2470 TESTCASE_AUTO(TestCollationKey);
2471 TESTCASE_AUTO(TestElemIter);
2472 TESTCASE_AUTO(TestGetAll);
2473 TESTCASE_AUTO(TestRuleBasedColl);
2474 TESTCASE_AUTO(TestDecomposition);
2475 TESTCASE_AUTO(TestSafeClone);
2476 TESTCASE_AUTO(TestSortKey);
2477 TESTCASE_AUTO(TestSortKeyOverflow);
2478 TESTCASE_AUTO(TestMaxExpansion);
2479 TESTCASE_AUTO(TestDisplayName);
2480 TESTCASE_AUTO(TestAttribute);
2481 TESTCASE_AUTO(TestVariableTopSetting);
2482 TESTCASE_AUTO(TestMaxVariable);
2483 TESTCASE_AUTO(TestRules);
2484 TESTCASE_AUTO(TestGetLocale);
2485 TESTCASE_AUTO(TestBounds);
2486 TESTCASE_AUTO(TestGetTailoredSet);
2487 TESTCASE_AUTO(TestUClassID);
2488 TESTCASE_AUTO(TestSubclass);
2489 TESTCASE_AUTO(TestNULLCharTailoring);
2490 TESTCASE_AUTO(TestClone);
2491 TESTCASE_AUTO(TestCloneBinary);
2492 TESTCASE_AUTO(TestIterNumeric);
2493 TESTCASE_AUTO(TestBadKeywords);
2494 TESTCASE_AUTO_END;
2495 }
2496
2497 #endif /* #if !UCONFIG_NO_COLLATION */
2498