1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 //===============================================================================
9 //
10 // File apicoll.cpp
11 //
12 //
13 //
14 // Created by: Helena Shih
15 //
16 // Modification History:
17 //
18 // Date Name Description
19 // 2/5/97 aliu Added streamIn and streamOut methods. Added
20 // constructor which reads RuleBasedCollator object from
21 // a binary file. Added writeToFile method which streams
22 // RuleBasedCollator out to a binary file. The streamIn
23 // and streamOut methods use istream and ostream objects
24 // in binary mode.
25 // 6/30/97 helena Added tests for CollationElementIterator::setText, getOffset
26 // setOffset and DecompositionIterator::getOffset, setOffset.
27 // DecompositionIterator is made public so add class scope
28 // testing.
29 // 02/10/98 damiba Added test for compare(UnicodeString&, UnicodeString&, int32_t)
30 //===============================================================================
31
32 #include "unicode/utypes.h"
33
34 #if !UCONFIG_NO_COLLATION
35
36 #include "unicode/localpointer.h"
37 #include "unicode/coll.h"
38 #include "unicode/tblcoll.h"
39 #include "unicode/coleitr.h"
40 #include "unicode/sortkey.h"
41 #include "apicoll.h"
42 #include "unicode/chariter.h"
43 #include "unicode/schriter.h"
44 #include "unicode/ustring.h"
45 #include "unicode/ucol.h"
46
47 #include "sfwdchit.h"
48 #include "cmemory.h"
49 #include <stdlib.h>
50
51 void
doAssert(UBool condition,const char * message)52 CollationAPITest::doAssert(UBool condition, const char *message)
53 {
54 if (!condition) {
55 errln(UnicodeString("ERROR : ") + message);
56 }
57 }
58
59 // Collator Class Properties
60 // ctor, dtor, createInstance, compare, getStrength/setStrength
61 // getDecomposition/setDecomposition, getDisplayName
62 void
TestProperty()63 CollationAPITest::TestProperty(/* char* par */)
64 {
65 UErrorCode success = U_ZERO_ERROR;
66 Collator *col = 0;
67 /*
68 * Expected version of the English collator.
69 * Currently, the major/minor version numbers change when the builder code
70 * changes,
71 * number 2 is from the tailoring data version and
72 * number 3 is the UCA version.
73 * This changes with every UCA version change, and the expected value
74 * needs to be adjusted.
75 * Same in cintltst/capitst.c.
76 */
77 UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A}; // from ICU 4.4/UCA 5.2
78 UVersionInfo versionArray;
79
80 logln("The property tests begin : ");
81 logln("Test ctors : ");
82 col = Collator::createInstance(Locale::getEnglish(), success);
83 if (U_FAILURE(success)){
84 errcheckln(success, "Default Collator creation failed. - %s", u_errorName(success));
85 return;
86 }
87
88 StringEnumeration* kwEnum = col->getKeywordValuesForLocale("", Locale::getEnglish(),true,success);
89 if (U_FAILURE(success)){
90 errcheckln(success, "Get Keyword Values for Locale failed. - %s", u_errorName(success));
91 return;
92 }
93 delete kwEnum;
94
95 col->getVersion(versionArray);
96 // Check for a version greater than some value rather than equality
97 // so that we need not update the expected version each time.
98 if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
99 errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
100 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
101 } else {
102 logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
103 versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
104 }
105
106 doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed");
107 doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed");
108 doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed");
109 doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed");
110 doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed");
111 doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed");
112
113 doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed");
114 success = U_ZERO_ERROR;
115 {
116 UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape();
117 UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape();
118 UCharIterator abauIter, abssIter;
119 uiter_setReplaceable(&abauIter, &abau);
120 uiter_setReplaceable(&abssIter, &abss);
121 doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed");
122 success = U_ZERO_ERROR;
123 }
124
125 /*start of update [Bertrand A. D. 02/10/98]*/
126 doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed");
127 doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB with length 2 comparison failed");
128 doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa with length 1 comparison failed");
129 doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa with length 2 comparison failed");
130 doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed");
131 doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed");
132 doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed");
133 /*end of update [Bertrand A. D. 02/10/98]*/
134
135
136 logln("Test ctors ends.");
137 logln("testing Collator::getStrength() method ...");
138 doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength");
139 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
140
141
142 logln("testing Collator::setStrength() method ...");
143 col->setStrength(Collator::SECONDARY);
144 doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference");
145 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
146 doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength");
147
148 UnicodeString name;
149
150 logln("Get display name for the US English collation in German : ");
151 logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name));
152 doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed");
153
154 logln("Get display name for the US English collation in English : ");
155 logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name));
156 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed");
157 #if 0
158 // weiv : this test is bogus if we're running on any machine that has different default locale than English.
159 // Therefore, it is banned!
160 logln("Get display name for the US English in default locale language : ");
161 logln(Collator::getDisplayName(Locale::US, name));
162 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine");
163 #endif
164 delete col; col = 0;
165 RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK",
166 success);
167 if (U_FAILURE(success)) {
168 errcheckln(success, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success));
169 return;
170 }
171 const UnicodeString &daRules = rcol->getRules();
172 if(daRules.isEmpty()) {
173 dataerrln("missing da_DK tailoring rule string");
174 } else {
175 doAssert(daRules.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'");
176 }
177 delete rcol;
178
179 col = Collator::createInstance(Locale::getFrench(), success);
180 if (U_FAILURE(success))
181 {
182 errln("Creating French collation failed.");
183 return;
184 }
185
186 col->setStrength(Collator::PRIMARY);
187 logln("testing Collator::getStrength() method again ...");
188 doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength");
189 doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference");
190
191 logln("testing French Collator::setStrength() method ...");
192 col->setStrength(Collator::TERTIARY);
193 doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference");
194 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
195 doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference");
196 delete col;
197
198 logln("Create junk collation: ");
199 Locale abcd("ab", "CD", "");
200 success = U_ZERO_ERROR;
201 Collator *junk = 0;
202 junk = Collator::createInstance(abcd, success);
203
204 if (U_FAILURE(success))
205 {
206 errln("Junk collation creation failed, should at least return default.");
207 return;
208 }
209
210 doAssert(((RuleBasedCollator *)junk)->getRules().isEmpty(),
211 "The root collation should be returned for an unsupported language.");
212 Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success);
213 if (U_FAILURE(success))
214 {
215 errln("Creating fr_CA collator failed.");
216 delete junk;
217 return;
218 }
219
220 // If the default locale isn't French, the French and non-French collators
221 // should be different
222 if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) {
223 doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator.");
224 }
225 Collator *aFrCol = frCol->clone();
226 doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed.");
227 logln("Collator property test ended.");
228
229 delete frCol;
230 delete aFrCol;
231 delete junk;
232 }
233
234 void
TestRuleBasedColl()235 CollationAPITest::TestRuleBasedColl()
236 {
237 RuleBasedCollator *col1, *col2, *col3, *col4;
238 UErrorCode status = U_ZERO_ERROR;
239
240 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
241 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
242
243 col1 = new RuleBasedCollator(ruleset1, status);
244 if (U_FAILURE(status)) {
245 errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status));
246 return;
247 }
248 else {
249 logln("PASS: RuleBased Collator creation passed\n");
250 }
251
252 status = U_ZERO_ERROR;
253 col2 = new RuleBasedCollator(ruleset2, status);
254 if (U_FAILURE(status)) {
255 errln("RuleBased Collator creation failed.\n");
256 return;
257 }
258 else {
259 logln("PASS: RuleBased Collator creation passed\n");
260 }
261
262 status = U_ZERO_ERROR;
263 Locale locale("aa", "AA");
264 col3 = (RuleBasedCollator *)Collator::createInstance(locale, status);
265 if (U_FAILURE(status)) {
266 errln("Fallback Collator creation failed.: %s\n");
267 return;
268 }
269 else {
270 logln("PASS: Fallback Collator creation passed\n");
271 }
272 delete col3;
273
274 status = U_ZERO_ERROR;
275 col3 = (RuleBasedCollator *)Collator::createInstance(status);
276 if (U_FAILURE(status)) {
277 errln("Default Collator creation failed.: %s\n");
278 return;
279 }
280 else {
281 logln("PASS: Default Collator creation passed\n");
282 }
283
284 UnicodeString rule1 = col1->getRules();
285 UnicodeString rule2 = col2->getRules();
286 UnicodeString rule3 = col3->getRules();
287
288 doAssert(rule1 != rule2, "Default collator getRules failed");
289 doAssert(rule2 != rule3, "Default collator getRules failed");
290 doAssert(rule1 != rule3, "Default collator getRules failed");
291
292 col4 = new RuleBasedCollator(rule2, status);
293 if (U_FAILURE(status)) {
294 errln("RuleBased Collator creation failed.\n");
295 return;
296 }
297
298 UnicodeString rule4 = col4->getRules();
299 doAssert(rule2 == rule4, "Default collator getRules failed");
300 int32_t length4 = 0;
301 uint8_t *clonedrule4 = col4->cloneRuleData(length4, status);
302 if (U_FAILURE(status)) {
303 errln("Cloned rule data failed.\n");
304 return;
305 }
306
307 // free(clonedrule4); BAD API!!!!
308 uprv_free(clonedrule4);
309
310
311 delete col1;
312 delete col2;
313 delete col3;
314 delete col4;
315 }
316
317 void
TestRules()318 CollationAPITest::TestRules()
319 {
320 RuleBasedCollator *coll;
321 UErrorCode status = U_ZERO_ERROR;
322 UnicodeString rules;
323
324 coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status);
325 if (U_FAILURE(status)) {
326 errcheckln(status, "English Collator creation failed. - %s", u_errorName(status));
327 return;
328 }
329 else {
330 logln("PASS: RuleBased Collator creation passed\n");
331 }
332
333 coll->getRules(UCOL_TAILORING_ONLY, rules);
334 if (rules.length() != 0x00) {
335 errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00);
336 }
337
338 coll->getRules(UCOL_FULL_RULES, rules);
339 if (rules.length() < 0) {
340 errln("English full rules failed");
341 }
342 delete coll;
343 }
344
345 void
TestDecomposition()346 CollationAPITest::TestDecomposition() {
347 UErrorCode status = U_ZERO_ERROR;
348 Collator *en_US = Collator::createInstance("en_US", status),
349 *el_GR = Collator::createInstance("el_GR", status),
350 *vi_VN = Collator::createInstance("vi_VN", status);
351
352 if (U_FAILURE(status)) {
353 errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status));
354 return;
355 }
356
357 /* there is no reason to have canonical decomposition in en_US OR default locale */
358 if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
359 {
360 errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
361 }
362
363 if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
364 {
365 errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
366 }
367
368 if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
369 {
370 errln("ERROR: en_US collation had canonical decomposition for normalization!\n");
371 }
372
373 delete en_US;
374 delete el_GR;
375 delete vi_VN;
376 }
377
378 void
TestSafeClone()379 CollationAPITest::TestSafeClone() {
380 static const int CLONETEST_COLLATOR_COUNT = 3;
381 Collator *someCollators [CLONETEST_COLLATOR_COUNT];
382 Collator *col;
383 UErrorCode err = U_ZERO_ERROR;
384 int index;
385
386 UnicodeString test1("abCda");
387 UnicodeString test2("abcda");
388
389 /* one default collator & two complex ones */
390 someCollators[0] = Collator::createInstance("en_US", err);
391 someCollators[1] = Collator::createInstance("ko", err);
392 someCollators[2] = Collator::createInstance("ja_JP", err);
393 if(U_FAILURE(err)) {
394 errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err));
395 delete someCollators[0];
396 delete someCollators[1];
397 delete someCollators[2];
398 return;
399 }
400
401 /* change orig & clone & make sure they are independent */
402
403 for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++)
404 {
405 col = someCollators[index]->safeClone();
406 if (col == 0) {
407 errln("SafeClone of collator should not return null\n");
408 break;
409 }
410 col->setStrength(Collator::TERTIARY);
411 someCollators[index]->setStrength(Collator::PRIMARY);
412 col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
413 someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
414
415 doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" ");
416 doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\"");
417 delete col;
418 delete someCollators[index];
419 }
420 }
421
422 void
TestHashCode()423 CollationAPITest::TestHashCode(/* char* par */)
424 {
425 logln("hashCode tests begin.");
426 UErrorCode success = U_ZERO_ERROR;
427 Collator *col1 = 0;
428 col1 = Collator::createInstance(Locale::getEnglish(), success);
429 if (U_FAILURE(success))
430 {
431 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
432 return;
433 }
434
435 Collator *col2 = 0;
436 Locale dk("da", "DK", "");
437 col2 = Collator::createInstance(dk, success);
438 if (U_FAILURE(success))
439 {
440 errln("Danish collation creation failed.");
441 return;
442 }
443
444 Collator *col3 = 0;
445 col3 = Collator::createInstance(Locale::getEnglish(), success);
446 if (U_FAILURE(success))
447 {
448 errln("2nd default collation creation failed.");
449 return;
450 }
451
452 logln("Collator::hashCode() testing ...");
453
454 doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" );
455 doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" );
456 doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" );
457
458 logln("hashCode tests end.");
459 delete col1;
460 delete col2;
461
462 UnicodeString test1("Abcda");
463 UnicodeString test2("abcda");
464
465 CollationKey sortk1, sortk2, sortk3;
466 UErrorCode status = U_ZERO_ERROR;
467
468 col3->getCollationKey(test1, sortk1, status);
469 col3->getCollationKey(test2, sortk2, status);
470 col3->getCollationKey(test2, sortk3, status);
471
472 doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect");
473 doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" );
474
475 delete col3;
476 }
477
478 //----------------------------------------------------------------------------
479 // CollationKey -- Tests the CollationKey methods
480 //
481 void
TestCollationKey()482 CollationAPITest::TestCollationKey(/* char* par */)
483 {
484 logln("testing CollationKey begins...");
485 Collator *col = 0;
486 UErrorCode success=U_ZERO_ERROR;
487 col = Collator::createInstance(Locale::getEnglish(), success);
488 if (U_FAILURE(success))
489 {
490 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
491 return;
492 }
493 col->setStrength(Collator::TERTIARY);
494
495 CollationKey sortk1, sortk2;
496 UnicodeString test1("Abcda"), test2("abcda");
497 UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR;
498
499 logln("Testing weird arguments");
500 // No string vs. empty string vs. completely-ignorable string:
501 // See ICU ticket #10495.
502 CollationKey sortkNone;
503 int32_t length;
504 sortkNone.getByteArray(length);
505 doAssert(!sortkNone.isBogus() && length == 0,
506 "Default-constructed collation key should be empty");
507 CollationKey sortkEmpty;
508 col->getCollationKey(NULL, 0, sortkEmpty, key1Status);
509 // key gets reset here
510 const uint8_t* byteArrayEmpty = sortkEmpty.getByteArray(length);
511 doAssert(sortkEmpty.isBogus() == FALSE && length == 3 &&
512 byteArrayEmpty[0] == 1 && byteArrayEmpty[1] == 1 && byteArrayEmpty[2] == 0,
513 "Empty string should return a collation key with empty levels");
514 doAssert(sortkNone.compareTo(sortkEmpty) == Collator::LESS,
515 "Expected no collation key < collation key for empty string");
516 doAssert(sortkEmpty.compareTo(sortkNone) == Collator::GREATER,
517 "Expected collation key for empty string > no collation key");
518
519 CollationKey sortkIgnorable;
520 // Most control codes and CGJ are completely ignorable.
521 // A string with only completely ignorables must compare equal to an empty string.
522 col->getCollationKey(UnicodeString((UChar)1).append((UChar)0x34f), sortkIgnorable, key1Status);
523 sortkIgnorable.getByteArray(length);
524 doAssert(!sortkIgnorable.isBogus() && length == 3,
525 "Completely ignorable string should return a collation key with empty levels");
526 doAssert(sortkIgnorable.compareTo(sortkEmpty) == Collator::EQUAL,
527 "Completely ignorable string should compare equal to empty string");
528
529 // bogus key returned here
530 key1Status = U_ILLEGAL_ARGUMENT_ERROR;
531 col->getCollationKey(NULL, 0, sortk1, key1Status);
532 doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0,
533 "Error code should return bogus collation key");
534
535 key1Status = U_ZERO_ERROR;
536 logln("Use tertiary comparison level testing ....");
537
538 col->getCollationKey(test1, sortk1, key1Status);
539 if (U_FAILURE(key1Status)) {
540 errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status));
541 return;
542 }
543 doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status)))
544 == Collator::GREATER,
545 "Result should be \"Abcda\" >>> \"abcda\"");
546
547 CollationKey sortk3(sortk2), sortkNew;
548
549 sortkNew = sortk1;
550 doAssert((sortk1 != sortk2), "The sort keys should be different");
551 doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed");
552 doAssert((sortk2 == sortk3), "The sort keys should be the same");
553 doAssert((sortk1 == sortkNew), "The sort keys assignment failed");
554 doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed");
555 doAssert((sortkNew != sortk3), "The sort keys should be different");
556 doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
557 doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\"");
558 doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\"");
559 doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)");
560 doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)");
561 doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
562 doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\"");
563 doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\"");
564 doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)");
565 doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)");
566
567 int32_t cnt1, cnt2, cnt3, cnt4;
568
569 const uint8_t* byteArray1 = sortk1.getByteArray(cnt1);
570 const uint8_t* byteArray2 = sortk2.getByteArray(cnt2);
571
572 const uint8_t* byteArray3 = 0;
573 byteArray3 = sortk1.getByteArray(cnt3);
574
575 const uint8_t* byteArray4 = 0;
576 byteArray4 = sortk2.getByteArray(cnt4);
577
578 CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2);
579 CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4);
580
581 doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed.");
582 doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed.");
583 doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed");
584 doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed.");
585 doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed.");
586 doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed");
587
588 logln("Equality tests : ");
589 doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed.");
590 doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed.");
591 doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed.");
592 doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed.");
593 doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed.");
594 doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed.");
595
596 byteArray1 = 0;
597 byteArray2 = 0;
598
599 sortk3 = sortk1;
600 doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed.");
601 doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed.");
602 logln("testing sortkey ends...");
603
604 col->setStrength(Collator::SECONDARY);
605 doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo(
606 col->getCollationKey(test2, sortk2, key2Status))
607 == Collator::EQUAL,
608 "Result should be \"Abcda\" == \"abcda\"");
609 delete col;
610 }
611
612 //----------------------------------------------------------------------------
613 // Tests the CollatorElementIterator class.
614 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!=
615 //
616 void
TestElemIter()617 CollationAPITest::TestElemIter(/* char* par */)
618 {
619 logln("testing sortkey begins...");
620 Collator *col = 0;
621 UErrorCode success = U_ZERO_ERROR;
622 col = Collator::createInstance(Locale::getEnglish(), success);
623 if (U_FAILURE(success))
624 {
625 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
626 return;
627 }
628
629 UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
630 UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
631 logln("Constructors and comparison testing....");
632 CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
633
634 CharacterIterator *chariter=new StringCharacterIterator(testString1);
635 CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter);
636
637 // copy ctor
638 CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
639 CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2);
640
641 int32_t offset = iterator1->getOffset();
642 if (offset != 0) {
643 errln("Error in getOffset for collation element iterator\n");
644 return;
645 }
646 iterator1->setOffset(6, success);
647 if (U_FAILURE(success)) {
648 errln("Error in setOffset for collation element iterator\n");
649 return;
650 }
651 iterator1->setOffset(0, success);
652 int32_t order1, order2, order3;
653 doAssert((*iterator1 == *iterator2), "The two iterators should be the same");
654 doAssert((*iterator1 != *iterator3), "The two iterators should be different");
655
656 doAssert((*coliter == *iterator1), "The two iterators should be the same");
657 doAssert((*coliter == *iterator2), "The two iterators should be the same");
658 doAssert((*coliter != *iterator3), "The two iterators should be different");
659
660 order1 = iterator1->next(success);
661 if (U_FAILURE(success))
662 {
663 errln("Somehow ran out of memory stepping through the iterator.");
664 return;
665 }
666
667 doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
668 order2 = iterator2->getOffset();
669 doAssert((order1 != order2), "The order result should not be the same");
670 order2 = iterator2->next(success);
671 if (U_FAILURE(success))
672 {
673 errln("Somehow ran out of memory stepping through the iterator.");
674 return;
675 }
676
677 doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
678 doAssert((order1 == order2), "The order result should be the same");
679 order3 = iterator3->next(success);
680 if (U_FAILURE(success))
681 {
682 errln("Somehow ran out of memory stepping through the iterator.");
683 return;
684 }
685
686 doAssert((CollationElementIterator::primaryOrder(order1) ==
687 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
688 doAssert((CollationElementIterator::secondaryOrder(order1) ==
689 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
690 doAssert((CollationElementIterator::tertiaryOrder(order1) ==
691 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
692
693 order1 = iterator1->next(success); order3 = iterator3->next(success);
694 if (U_FAILURE(success))
695 {
696 errln("Somehow ran out of memory stepping through the iterator.");
697 return;
698 }
699
700 doAssert((CollationElementIterator::primaryOrder(order1) ==
701 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
702 doAssert((CollationElementIterator::tertiaryOrder(order1) !=
703 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
704
705 order1 = iterator1->next(success);
706 order3 = iterator3->next(success);
707 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
708 /*
709 doAssert((CollationElementIterator::secondaryOrder(order1) !=
710 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
711 */
712 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
713
714 iterator1->reset(); iterator2->reset(); iterator3->reset();
715 order1 = iterator1->next(success);
716 if (U_FAILURE(success))
717 {
718 errln("Somehow ran out of memory stepping through the iterator.");
719 return;
720 }
721
722 doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
723
724 order2 = iterator2->next(success);
725 if (U_FAILURE(success))
726 {
727 errln("Somehow ran out of memory stepping through the iterator.");
728 return;
729 }
730
731 doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
732 doAssert((order1 == order2), "The order result should be the same");
733
734 order3 = iterator3->next(success);
735 if (U_FAILURE(success))
736 {
737 errln("Somehow ran out of memory stepping through the iterator.");
738 return;
739 }
740
741 doAssert((CollationElementIterator::primaryOrder(order1) ==
742 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
743 doAssert((CollationElementIterator::secondaryOrder(order1) ==
744 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
745 doAssert((CollationElementIterator::tertiaryOrder(order1) ==
746 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
747
748 order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success);
749 if (U_FAILURE(success))
750 {
751 errln("Somehow ran out of memory stepping through the iterator.");
752 return;
753 }
754
755 doAssert((CollationElementIterator::primaryOrder(order1) ==
756 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
757 doAssert((CollationElementIterator::tertiaryOrder(order1) !=
758 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
759
760 order1 = iterator1->next(success); order3 = iterator3->next(success);
761 if (U_FAILURE(success))
762 {
763 errln("Somehow ran out of memory stepping through the iterator.");
764 return;
765 }
766
767 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
768 /*
769 doAssert((CollationElementIterator::secondaryOrder(order1) !=
770 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
771 */
772 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
773 doAssert((*iterator2 != *iterator3), "The iterators should be different");
774
775
776 //test error values
777 success=U_UNSUPPORTED_ERROR;
778 Collator *colerror=NULL;
779 colerror=Collator::createInstance(Locale::getEnglish(), success);
780 if (colerror != 0 || success == U_ZERO_ERROR){
781 errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n");
782 }
783 int32_t position=coliter->previous(success);
784 if(position != CollationElementIterator::NULLORDER){
785 errln((UnicodeString)"Expected NULLORDER got" + position);
786 }
787 coliter->reset();
788 coliter->setText(*chariter, success);
789 if(!U_FAILURE(success)){
790 errln("Expeceted error");
791 }
792 iterator1->setText((UnicodeString)"hello there", success);
793 if(!U_FAILURE(success)){
794 errln("Expeceted error");
795 }
796
797 delete chariter;
798 delete coliter;
799 delete iterator1;
800 delete iterator2;
801 delete iterator3;
802 delete col;
803
804
805
806 logln("testing CollationElementIterator ends...");
807 }
808
809 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules
810 void
TestOperators()811 CollationAPITest::TestOperators(/* char* par */)
812 {
813 UErrorCode success = U_ZERO_ERROR;
814 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
815 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
816 RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success);
817 if (U_FAILURE(success)) {
818 errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success));
819 return;
820 }
821 success = U_ZERO_ERROR;
822 RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success);
823 if (U_FAILURE(success)) {
824 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set.");
825 return;
826 }
827 logln("The operator tests begin : ");
828 logln("testing operator==, operator!=, clone methods ...");
829 doAssert((*col1 != *col2), "The two different table collations compared equal");
830 *col1 = *col2;
831 doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)");
832
833 success = U_ZERO_ERROR;
834 Collator *col3 = Collator::createInstance(Locale::getEnglish(), success);
835 if (U_FAILURE(success)) {
836 errln("Default collation creation failed.");
837 return;
838 }
839 doAssert((*col1 != *col3), "The two different table collations compared equal");
840 Collator* col4 = col1->clone();
841 Collator* col5 = col3->clone();
842 doAssert((*col1 == *col4), "Cloned collation objects not equal");
843 doAssert((*col3 != *col4), "Two different table collations compared equal");
844 doAssert((*col3 == *col5), "Cloned collation objects not equal");
845 doAssert((*col4 != *col5), "Two cloned collations compared equal");
846
847 const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules();
848 RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success);
849 if (U_FAILURE(success)) {
850 errln("Creating default collation with rules failed.");
851 return;
852 }
853 doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed");
854
855 success = U_ZERO_ERROR;
856 RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success);
857 if (U_FAILURE(success)) {
858 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength.");
859 return;
860 }
861 success = U_ZERO_ERROR;
862 RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success);
863 if (U_FAILURE(success)) {
864 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP.");
865 return;
866 }
867 success = U_ZERO_ERROR;
868 RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success);
869 if (U_FAILURE(success)) {
870 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP.");
871 return;
872 }
873 // doAssert((*col7 == *col8), "The two equal table collations compared different");
874 doAssert((*col7 != *col9), "The two different table collations compared equal");
875 doAssert((*col8 != *col9), "The two different table collations compared equal");
876
877 logln("operator tests ended.");
878 delete col1;
879 delete col2;
880 delete col3;
881 delete col4;
882 delete col5;
883 delete col6;
884 delete col7;
885 delete col8;
886 delete col9;
887 }
888
889 // test clone and copy
890 void
TestDuplicate()891 CollationAPITest::TestDuplicate(/* char* par */)
892 {
893 UErrorCode status = U_ZERO_ERROR;
894 Collator *col1 = Collator::createInstance(Locale::getEnglish(), status);
895 if (U_FAILURE(status)) {
896 logln("Default collator creation failed.");
897 return;
898 }
899 Collator *col2 = col1->clone();
900 doAssert((*col1 == *col2), "Cloned object is not equal to the orginal");
901 UnicodeString ruleset("&9 < a, A < b, B < c, C < d, D, e, E");
902 RuleBasedCollator *col3 = new RuleBasedCollator(ruleset, status);
903 if (U_FAILURE(status)) {
904 logln("Collation tailoring failed.");
905 return;
906 }
907 doAssert((*col1 != *col3), "Cloned object is equal to some dummy");
908 *col3 = *((RuleBasedCollator*)col1);
909 doAssert((*col1 == *col3), "Copied object is not equal to the orginal");
910
911 UCollationResult res;
912 UnicodeString first((UChar)0x0061);
913 UnicodeString second((UChar)0x0062);
914 UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules());
915
916 delete col1;
917
918 // Try using the cloned collators after deleting the original data
919 res = col2->compare(first, second, status);
920 if(res != UCOL_LESS) {
921 errln("a should be less then b after tailoring");
922 }
923 if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) {
924 errln(UnicodeString("English rule difference. ")
925 + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules());
926 }
927 res = col3->compare(first, second, status);
928 if(res != UCOL_LESS) {
929 errln("a should be less then b after tailoring");
930 }
931 if (col3->getRules() != copiedEnglishRules) {
932 errln(UnicodeString("English rule difference. ")
933 + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules());
934 }
935
936 delete col2;
937 delete col3;
938 }
939
940 void
TestCompare()941 CollationAPITest::TestCompare(/* char* par */)
942 {
943 logln("The compare tests begin : ");
944 Collator *col = 0;
945 UErrorCode success = U_ZERO_ERROR;
946 col = Collator::createInstance(Locale::getEnglish(), success);
947 if (U_FAILURE(success)) {
948 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
949 return;
950 }
951 UnicodeString test1("Abcda"), test2("abcda");
952 logln("Use tertiary comparison level testing ....");
953
954 doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\"");
955 doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
956 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
957
958 col->setStrength(Collator::SECONDARY);
959 logln("Use secondary comparison level testing ....");
960
961 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
962 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
963 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
964
965 col->setStrength(Collator::PRIMARY);
966 logln("Use primary comparison level testing ....");
967
968 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
969 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
970 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
971
972 // Test different APIs
973 const UChar* t1 = test1.getBuffer();
974 int32_t t1Len = test1.length();
975 const UChar* t2 = test2.getBuffer();
976 int32_t t2Len = test2.length();
977
978 doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem");
979 doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem");
980 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem");
981 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem");
982 doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem");
983 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem");
984
985 col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success);
986 doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem");
987 doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem");
988 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem");
989 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem");
990 doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem");
991 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem");
992
993
994
995 logln("The compare tests end.");
996 delete col;
997 }
998
999 void
TestGetAll()1000 CollationAPITest::TestGetAll(/* char* par */)
1001 {
1002 int32_t count1, count2;
1003 UErrorCode status = U_ZERO_ERROR;
1004
1005 logln("Trying Collator::getAvailableLocales(int&)");
1006
1007 const Locale* list = Collator::getAvailableLocales(count1);
1008 for (int32_t i = 0; i < count1; ++i) {
1009 UnicodeString dispName;
1010 logln(UnicodeString("Locale name: ")
1011 + UnicodeString(list[i].getName())
1012 + UnicodeString(" , the display name is : ")
1013 + UnicodeString(list[i].getDisplayName(dispName)));
1014 }
1015
1016 if (count1 == 0 || list == NULL) {
1017 dataerrln("getAvailableLocales(int&) returned an empty list");
1018 }
1019
1020 logln("Trying Collator::getAvailableLocales()");
1021 StringEnumeration* localeEnum = Collator::getAvailableLocales();
1022 const UnicodeString* locStr;
1023 const char *locCStr;
1024 count2 = 0;
1025
1026 if (localeEnum == NULL) {
1027 dataerrln("getAvailableLocales() returned NULL");
1028 return;
1029 }
1030
1031 while ((locStr = localeEnum->snext(status)) != NULL)
1032 {
1033 logln(UnicodeString("Locale name is: ") + *locStr);
1034 count2++;
1035 }
1036 if (count1 != count2) {
1037 errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2);
1038 }
1039
1040 logln("Trying Collator::getAvailableLocales() clone");
1041 count1 = 0;
1042 StringEnumeration* localeEnum2 = localeEnum->clone();
1043 localeEnum2->reset(status);
1044 while ((locCStr = localeEnum2->next(NULL, status)) != NULL)
1045 {
1046 logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr));
1047 count1++;
1048 }
1049 if (count1 != count2) {
1050 errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2);
1051 }
1052 if (localeEnum->count(status) != count1) {
1053 errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1);
1054 }
1055 delete localeEnum;
1056 delete localeEnum2;
1057 }
1058
TestSortKey()1059 void CollationAPITest::TestSortKey()
1060 {
1061 UErrorCode status = U_ZERO_ERROR;
1062 /*
1063 this is supposed to open default date format, but later on it treats
1064 it like it is "en_US"
1065 - very bad if you try to run the tests on machine where default
1066 locale is NOT "en_US"
1067 */
1068 Collator *col = Collator::createInstance(Locale::getEnglish(), status);
1069 if (U_FAILURE(status)) {
1070 errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status));
1071 return;
1072 }
1073
1074 if (col->getStrength() != Collator::TERTIARY)
1075 {
1076 errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1077 }
1078
1079 /* Need to use identical strength */
1080 col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status);
1081
1082 UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
1083 test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
1084 test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
1085
1086 uint8_t sortkey1[64];
1087 uint8_t sortkey2[64];
1088 uint8_t sortkey3[64];
1089
1090 logln("Use tertiary comparison level testing ....\n");
1091
1092 CollationKey key1;
1093 col->getCollationKey(test1, u_strlen(test1), key1, status);
1094
1095 CollationKey key2;
1096 col->getCollationKey(test2, u_strlen(test2), key2, status);
1097
1098 CollationKey key3;
1099 col->getCollationKey(test3, u_strlen(test3), key3, status);
1100
1101 doAssert(key1.compareTo(key2) == Collator::GREATER,
1102 "Result should be \"Abcda\" > \"abcda\"");
1103 doAssert(key2.compareTo(key1) == Collator::LESS,
1104 "Result should be \"abcda\" < \"Abcda\"");
1105 doAssert(key2.compareTo(key3) == Collator::EQUAL,
1106 "Result should be \"abcda\" == \"abcda\"");
1107
1108 // Clone the key2 sortkey for later.
1109 int32_t keylength = 0;
1110 const uint8_t *key2primary_alias = key2.getByteArray(keylength);
1111 LocalArray<uint8_t> key2primary(new uint8_t[keylength]);
1112 memcpy(key2primary.getAlias(), key2primary_alias, keylength);
1113
1114 col->getSortKey(test1, sortkey1, 64);
1115 col->getSortKey(test2, sortkey2, 64);
1116 col->getSortKey(test3, sortkey3, 64);
1117
1118 const uint8_t *tempkey = key1.getByteArray(keylength);
1119 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1120 "Test1 string should have the same collation key and sort key");
1121 tempkey = key2.getByteArray(keylength);
1122 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1123 "Test2 string should have the same collation key and sort key");
1124 tempkey = key3.getByteArray(keylength);
1125 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1126 "Test3 string should have the same collation key and sort key");
1127
1128 col->getSortKey(test1, 5, sortkey1, 64);
1129 col->getSortKey(test2, 5, sortkey2, 64);
1130 col->getSortKey(test3, 5, sortkey3, 64);
1131
1132 tempkey = key1.getByteArray(keylength);
1133 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1134 "Test1 string should have the same collation key and sort key");
1135 tempkey = key2.getByteArray(keylength);
1136 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1137 "Test2 string should have the same collation key and sort key");
1138 tempkey = key3.getByteArray(keylength);
1139 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1140 "Test3 string should have the same collation key and sort key");
1141
1142 UnicodeString strtest1(test1);
1143 col->getSortKey(strtest1, sortkey1, 64);
1144 UnicodeString strtest2(test2);
1145 col->getSortKey(strtest2, sortkey2, 64);
1146 UnicodeString strtest3(test3);
1147 col->getSortKey(strtest3, sortkey3, 64);
1148
1149 tempkey = key1.getByteArray(keylength);
1150 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1151 "Test1 string should have the same collation key and sort key");
1152 tempkey = key2.getByteArray(keylength);
1153 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1154 "Test2 string should have the same collation key and sort key");
1155 tempkey = key3.getByteArray(keylength);
1156 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1157 "Test3 string should have the same collation key and sort key");
1158
1159 logln("Use secondary comparision level testing ...\n");
1160 col->setStrength(Collator::SECONDARY);
1161
1162 col->getCollationKey(test1, u_strlen(test1), key1, status);
1163 col->getCollationKey(test2, u_strlen(test2), key2, status);
1164 col->getCollationKey(test3, u_strlen(test3), key3, status);
1165
1166 doAssert(key1.compareTo(key2) == Collator::EQUAL,
1167 "Result should be \"Abcda\" == \"abcda\"");
1168 doAssert(key2.compareTo(key3) == Collator::EQUAL,
1169 "Result should be \"abcda\" == \"abcda\"");
1170
1171 tempkey = key2.getByteArray(keylength);
1172 doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0,
1173 "Binary format for 'abcda' sortkey different for secondary strength!");
1174
1175 col->getSortKey(test1, sortkey1, 64);
1176 col->getSortKey(test2, sortkey2, 64);
1177 col->getSortKey(test3, sortkey3, 64);
1178
1179 tempkey = key1.getByteArray(keylength);
1180 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1181 "Test1 string should have the same collation key and sort key");
1182 tempkey = key2.getByteArray(keylength);
1183 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1184 "Test2 string should have the same collation key and sort key");
1185 tempkey = key3.getByteArray(keylength);
1186 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1187 "Test3 string should have the same collation key and sort key");
1188
1189 col->getSortKey(test1, 5, sortkey1, 64);
1190 col->getSortKey(test2, 5, sortkey2, 64);
1191 col->getSortKey(test3, 5, sortkey3, 64);
1192
1193 tempkey = key1.getByteArray(keylength);
1194 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1195 "Test1 string should have the same collation key and sort key");
1196 tempkey = key2.getByteArray(keylength);
1197 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1198 "Test2 string should have the same collation key and sort key");
1199 tempkey = key3.getByteArray(keylength);
1200 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1201 "Test3 string should have the same collation key and sort key");
1202
1203 col->getSortKey(strtest1, sortkey1, 64);
1204 col->getSortKey(strtest2, sortkey2, 64);
1205 col->getSortKey(strtest3, sortkey3, 64);
1206
1207 tempkey = key1.getByteArray(keylength);
1208 doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1209 "Test1 string should have the same collation key and sort key");
1210 tempkey = key2.getByteArray(keylength);
1211 doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1212 "Test2 string should have the same collation key and sort key");
1213 tempkey = key3.getByteArray(keylength);
1214 doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1215 "Test3 string should have the same collation key and sort key");
1216
1217 logln("testing sortkey ends...");
1218 delete col;
1219 }
1220
TestSortKeyOverflow()1221 void CollationAPITest::TestSortKeyOverflow() {
1222 IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()");
1223 LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
1224 if (errorCode.logDataIfFailureAndReset("Collator::createInstance(English) failed")) {
1225 return;
1226 }
1227 col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
1228 UChar i_and_phi[] = { 0x438, 0x3c6 }; // Cyrillic small i & Greek small phi.
1229 // The sort key should be 6 bytes:
1230 // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator,
1231 // 2 bytes for the Greek phi, and 1 byte for the NUL terminator.
1232 uint8_t sortKey[12];
1233 int32_t length = col->getSortKey(i_and_phi, 2, sortKey, UPRV_LENGTHOF(sortKey));
1234 uint8_t sortKey2[12];
1235 for (int32_t capacity = 0; capacity < length; ++capacity) {
1236 uprv_memset(sortKey2, 2, UPRV_LENGTHOF(sortKey2));
1237 int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity);
1238 if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) {
1239 errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity);
1240 } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) {
1241 errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity);
1242 }
1243 }
1244
1245 // Now try to break getCollationKey().
1246 // Internally, it always starts with a large stack buffer.
1247 // Since we cannot control the initial capacity, we throw an increasing number
1248 // of characters at it, with the problematic part at the end.
1249 const int32_t longCapacity = 2000;
1250 // Each 'a' in the prefix should result in one primary sort key byte.
1251 // For i_and_phi we expect 6 bytes, then the NUL terminator.
1252 const int32_t maxPrefixLength = longCapacity - 6 - 1;
1253 LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]);
1254 UnicodeString s(FALSE, i_and_phi, 2);
1255 for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) {
1256 length = col->getSortKey(s, longSortKey.getAlias(), longCapacity);
1257 CollationKey collKey;
1258 col->getCollationKey(s, collKey, errorCode);
1259 int32_t collKeyLength;
1260 const uint8_t *collSortKey = collKey.getByteArray(collKeyLength);
1261 if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) {
1262 errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength);
1263 }
1264
1265 // Insert an 'a' to match ++prefixLength.
1266 s.insert(prefixLength, (UChar)0x61);
1267 }
1268 }
1269
TestMaxExpansion()1270 void CollationAPITest::TestMaxExpansion()
1271 {
1272 UErrorCode status = U_ZERO_ERROR;
1273 UChar ch = 0;
1274 UChar32 unassigned = 0xEFFFD;
1275 uint32_t sorder = 0;
1276 uint32_t temporder = 0;
1277
1278 UnicodeString rule("&a < ab < c/aba < d < z < ch");
1279 RuleBasedCollator coll(rule, status);
1280 if(U_FAILURE(status)) {
1281 errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1282 return;
1283 }
1284 UnicodeString str(ch);
1285 CollationElementIterator *iter =
1286 coll.createCollationElementIterator(str);
1287
1288 while (ch < 0xFFFF && U_SUCCESS(status)) {
1289 int count = 1;
1290 uint32_t order;
1291 int32_t size = 0;
1292
1293 ch ++;
1294
1295 str.setCharAt(0, ch);
1296 iter->setText(str, status);
1297 order = iter->previous(status);
1298
1299 /* thai management */
1300 if (order == 0)
1301 order = iter->previous(status);
1302
1303 while (U_SUCCESS(status) && iter->previous(status) != CollationElementIterator::NULLORDER) {
1304 count ++;
1305 }
1306
1307 size = coll.getMaxExpansion(order);
1308 if (U_FAILURE(status) || size < count) {
1309 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1310 ch, size, count);
1311 }
1312 }
1313
1314 /* testing for exact max expansion */
1315 int32_t size;
1316 ch = 0;
1317 while (ch < 0x61) {
1318 uint32_t order;
1319 str.setCharAt(0, ch);
1320 iter->setText(str, status);
1321 order = iter->previous(status);
1322 size = coll.getMaxExpansion(order);
1323 if (U_FAILURE(status) || size != 1) {
1324 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1325 ch, size, 1);
1326 }
1327 ch ++;
1328 }
1329
1330 ch = 0x63;
1331 str.setTo(ch);
1332 iter->setText(str, status);
1333 temporder = iter->previous(status);
1334 size = coll.getMaxExpansion(temporder);
1335 if (U_FAILURE(status) || size != 3) {
1336 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1337 ch, temporder, size, 3);
1338 }
1339
1340 ch = 0x64;
1341 str.setTo(ch);
1342 iter->setText(str, status);
1343 temporder = iter->previous(status);
1344 size = coll.getMaxExpansion(temporder);
1345 if (U_FAILURE(status) || size != 1) {
1346 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1347 ch, temporder, size, 1);
1348 }
1349
1350 str.setTo(unassigned);
1351 iter->setText(str, status);
1352 sorder = iter->previous(status);
1353 size = coll.getMaxExpansion(sorder);
1354 if (U_FAILURE(status) || size != 2) {
1355 errln("Failure at supplementary codepoints, maximum expansion count %d < %d",
1356 size, 2);
1357 }
1358
1359 /* testing jamo */
1360 ch = 0x1165;
1361 str.setTo(ch);
1362 iter->setText(str, status);
1363 temporder = iter->previous(status);
1364 size = coll.getMaxExpansion(temporder);
1365 if (U_FAILURE(status) || size > 3) {
1366 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1367 ch, size, 3);
1368 }
1369
1370 delete iter;
1371
1372 /* testing special jamo &a<\u1160 */
1373 rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071");
1374
1375 RuleBasedCollator jamocoll(rule, status);
1376 iter = jamocoll.createCollationElementIterator(str);
1377 temporder = iter->previous(status);
1378 size = iter->getMaxExpansion(temporder);
1379 if (U_FAILURE(status) || size != 6) {
1380 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1381 ch, size, 5);
1382 }
1383
1384 delete iter;
1385 }
1386
TestDisplayName()1387 void CollationAPITest::TestDisplayName()
1388 {
1389 UErrorCode error = U_ZERO_ERROR;
1390 Collator *coll = Collator::createInstance("en_US", error);
1391 if (U_FAILURE(error)) {
1392 errcheckln(error, "Failure creating english collator - %s", u_errorName(error));
1393 return;
1394 }
1395 UnicodeString name;
1396 UnicodeString result;
1397 coll->getDisplayName(Locale::getCanadaFrench(), result);
1398 Locale::getCanadaFrench().getDisplayName(name);
1399 if (result.compare(name)) {
1400 errln("Failure getting the correct name for locale en_US");
1401 }
1402
1403 coll->getDisplayName(Locale::getSimplifiedChinese(), result);
1404 Locale::getSimplifiedChinese().getDisplayName(name);
1405 if (result.compare(name)) {
1406 errln("Failure getting the correct name for locale zh_SG");
1407 }
1408 delete coll;
1409 }
1410
TestAttribute()1411 void CollationAPITest::TestAttribute()
1412 {
1413 UErrorCode error = U_ZERO_ERROR;
1414 Collator *coll = Collator::createInstance(error);
1415
1416 if (U_FAILURE(error)) {
1417 errcheckln(error, "Creation of default collator failed - %s", u_errorName(error));
1418 return;
1419 }
1420
1421 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error);
1422 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF ||
1423 U_FAILURE(error)) {
1424 errln("Setting and retrieving of the french collation failed");
1425 }
1426
1427 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error);
1428 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON ||
1429 U_FAILURE(error)) {
1430 errln("Setting and retrieving of the french collation failed");
1431 }
1432
1433 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error);
1434 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED ||
1435 U_FAILURE(error)) {
1436 errln("Setting and retrieving of the alternate handling failed");
1437 }
1438
1439 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error);
1440 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE ||
1441 U_FAILURE(error)) {
1442 errln("Setting and retrieving of the alternate handling failed");
1443 }
1444
1445 coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error);
1446 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST ||
1447 U_FAILURE(error)) {
1448 errln("Setting and retrieving of the case first attribute failed");
1449 }
1450
1451 coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error);
1452 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST ||
1453 U_FAILURE(error)) {
1454 errln("Setting and retrieving of the case first attribute failed");
1455 }
1456
1457 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error);
1458 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON ||
1459 U_FAILURE(error)) {
1460 errln("Setting and retrieving of the case level attribute failed");
1461 }
1462
1463 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error);
1464 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF ||
1465 U_FAILURE(error)) {
1466 errln("Setting and retrieving of the case level attribute failed");
1467 }
1468
1469 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error);
1470 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON ||
1471 U_FAILURE(error)) {
1472 errln("Setting and retrieving of the normalization on/off attribute failed");
1473 }
1474
1475 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error);
1476 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF ||
1477 U_FAILURE(error)) {
1478 errln("Setting and retrieving of the normalization on/off attribute failed");
1479 }
1480
1481 coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error);
1482 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY ||
1483 U_FAILURE(error)) {
1484 errln("Setting and retrieving of the collation strength failed");
1485 }
1486
1487 coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error);
1488 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY ||
1489 U_FAILURE(error)) {
1490 errln("Setting and retrieving of the collation strength failed");
1491 }
1492
1493 coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error);
1494 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY ||
1495 U_FAILURE(error)) {
1496 errln("Setting and retrieving of the collation strength failed");
1497 }
1498
1499 coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error);
1500 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY ||
1501 U_FAILURE(error)) {
1502 errln("Setting and retrieving of the collation strength failed");
1503 }
1504
1505 coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error);
1506 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL ||
1507 U_FAILURE(error)) {
1508 errln("Setting and retrieving of the collation strength failed");
1509 }
1510
1511 delete coll;
1512 }
1513
TestVariableTopSetting()1514 void CollationAPITest::TestVariableTopSetting() {
1515 UErrorCode status = U_ZERO_ERROR;
1516
1517 UChar vt[256] = { 0 };
1518
1519 // Use the root collator, not the default collator.
1520 // This test fails with en_US_POSIX which tailors the dollar sign after 'A'.
1521 Collator *coll = Collator::createInstance(Locale::getRoot(), status);
1522 if(U_FAILURE(status)) {
1523 delete coll;
1524 errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1525 return;
1526 }
1527
1528 uint32_t oldVarTop = coll->getVariableTop(status);
1529
1530 // ICU 53+: The character must be in a supported reordering group,
1531 // and the variable top is pinned to the end of that group.
1532 vt[0] = 0x0041;
1533
1534 (void)coll->setVariableTop(vt, 1, status);
1535 if(status != U_ILLEGAL_ARGUMENT_ERROR) {
1536 errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status));
1537 }
1538
1539 status = U_ZERO_ERROR;
1540 vt[0] = 0x24; // dollar sign (currency symbol)
1541 uint32_t newVarTop = coll->setVariableTop(vt, 1, status);
1542 if(U_FAILURE(status)) {
1543 errln("setVariableTop(dollar sign) failed: %s", u_errorName(status));
1544 return;
1545 }
1546 if(newVarTop != coll->getVariableTop(status)) {
1547 errln("setVariableTop(dollar sign) != following getVariableTop()");
1548 }
1549
1550 UnicodeString dollar((UChar)0x24);
1551 UnicodeString euro((UChar)0x20AC);
1552 uint32_t newVarTop2 = coll->setVariableTop(euro, status);
1553 assertEquals("setVariableTop(Euro sign) == following getVariableTop()",
1554 (int64_t)newVarTop2, (int64_t)coll->getVariableTop(status));
1555 assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)",
1556 (int64_t)newVarTop2, (int64_t)newVarTop);
1557
1558 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1559 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), dollar));
1560 assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), euro));
1561 assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(dollar, UnicodeString((UChar)0x30)));
1562
1563 coll->setVariableTop(oldVarTop, status);
1564
1565 uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status);
1566
1567 if(newVarTop != newerVarTop) {
1568 errln("Didn't set vartop properly from UnicodeString!\n");
1569 }
1570
1571 delete coll;
1572
1573 }
1574
TestMaxVariable()1575 void CollationAPITest::TestMaxVariable() {
1576 UErrorCode errorCode = U_ZERO_ERROR;
1577 LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), errorCode));
1578 if(U_FAILURE(errorCode)) {
1579 errcheckln(errorCode, "Collator creation failed with error %s", u_errorName(errorCode));
1580 return;
1581 }
1582
1583 (void)coll->setMaxVariable(UCOL_REORDER_CODE_OTHERS, errorCode);
1584 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
1585 errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode));
1586 }
1587
1588 errorCode = U_ZERO_ERROR;
1589 (void)coll->setMaxVariable(UCOL_REORDER_CODE_CURRENCY, errorCode);
1590
1591 if(UCOL_REORDER_CODE_CURRENCY != coll->getMaxVariable()) {
1592 errln("setMaxVariable(currency) != following getMaxVariable()");
1593 }
1594
1595 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, errorCode);
1596 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x24)));
1597 assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x20AC)));
1598 assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(UnicodeString((UChar)0x24), UnicodeString((UChar)0x30)));
1599 }
1600
TestGetLocale()1601 void CollationAPITest::TestGetLocale() {
1602 UErrorCode status = U_ZERO_ERROR;
1603 const char *rules = "&a<x<y<z";
1604 UChar rlz[256] = {0};
1605
1606 Collator *coll = Collator::createInstance("root", status);
1607 if(U_FAILURE(status)) {
1608 dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status));
1609 return;
1610 }
1611 Locale locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1612 if(locale != Locale::getRoot()) {
1613 errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); "
1614 "getLocale().getName() = \"%s\"",
1615 locale.getName());
1616 }
1617 delete coll;
1618
1619 coll = Collator::createInstance("", status);
1620 if(U_FAILURE(status)) {
1621 dataerrln("Failed to open collator for \"\" with %s", u_errorName(status));
1622 return;
1623 }
1624 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1625 if(locale != Locale::getRoot()) {
1626 errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); "
1627 "getLocale().getName() = \"%s\"",
1628 locale.getName());
1629 }
1630 delete coll;
1631
1632 int32_t i = 0;
1633
1634 static const struct {
1635 const char* requestedLocale;
1636 const char* validLocale;
1637 const char* actualLocale;
1638 } testStruct[] = {
1639 // Note: Locale::getRoot().getName() == "" not "root".
1640 { "de_DE", "de", "" },
1641 { "sr_RS", "sr_Cyrl_RS", "sr" },
1642 { "en_US_CALIFORNIA", "en_US", "" },
1643 { "fr_FR_NONEXISTANT", "fr", "" },
1644 // pinyin is the default, therefore suppressed.
1645 { "zh_CN", "zh_Hans_CN", "zh" },
1646 // zh_Hant has default=stroke but the data is in zh.
1647 { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" },
1648 { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" },
1649 { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" }
1650 };
1651
1652 u_unescape(rules, rlz, 256);
1653
1654 /* test opening collators for different locales */
1655 for(i = 0; i<UPRV_LENGTHOF(testStruct); i++) {
1656 status = U_ZERO_ERROR;
1657 coll = Collator::createInstance(testStruct[i].requestedLocale, status);
1658 if(U_FAILURE(status)) {
1659 errln("Failed to open collator for %s with %s", testStruct[i].requestedLocale, u_errorName(status));
1660 delete coll;
1661 continue;
1662 }
1663 // The requested locale may be the same as the valid locale,
1664 // or may not be supported at all. See ticket #10477.
1665 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1666 if(U_SUCCESS(status) &&
1667 locale != testStruct[i].requestedLocale && locale != testStruct[i].validLocale) {
1668 errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s",
1669 testStruct[i].requestedLocale,
1670 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1671 }
1672 status = U_ZERO_ERROR;
1673 locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1674 if(locale != testStruct[i].validLocale) {
1675 errln("[Coll %s]: Error in valid locale, expected %s, got %s",
1676 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1677 }
1678 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1679 if(locale != testStruct[i].actualLocale) {
1680 errln("[Coll %s]: Error in actual locale, expected %s, got %s",
1681 testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName());
1682 }
1683 // If we open a collator for the actual locale, we should get an equivalent one again.
1684 LocalPointer<Collator> coll2(Collator::createInstance(locale, status));
1685 if(U_FAILURE(status)) {
1686 errln("Failed to open collator for actual locale \"%s\" with %s",
1687 locale.getName(), u_errorName(status));
1688 } else {
1689 Locale actual2 = coll2->getLocale(ULOC_ACTUAL_LOCALE, status);
1690 if(actual2 != locale) {
1691 errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"",
1692 locale.getName(), actual2.getName());
1693 }
1694 if(*coll2 != *coll) {
1695 errln("[Coll actual \"%s\"]: Got different collator than before", locale.getName());
1696 }
1697 }
1698 delete coll;
1699 }
1700
1701 /* completely non-existent locale for collator should get a root collator */
1702 {
1703 LocalPointer<Collator> coll(Collator::createInstance("blahaha", status));
1704 if(U_FAILURE(status)) {
1705 errln("Failed to open collator with %s", u_errorName(status));
1706 return;
1707 }
1708 Locale valid = coll->getLocale(ULOC_VALID_LOCALE, status);
1709 const char *name = valid.getName();
1710 if(*name != 0 && strcmp(name, "root") != 0) {
1711 errln("Valid locale for nonexisting-locale collator is \"%s\" not root", name);
1712 }
1713 Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1714 name = actual.getName();
1715 if(*name != 0 && strcmp(name, "root") != 0) {
1716 errln("Actual locale for nonexisting-locale collator is \"%s\" not root", name);
1717 }
1718 }
1719
1720
1721
1722 /* collator instantiated from rules should have all three locales NULL */
1723 coll = new RuleBasedCollator(rlz, status);
1724 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1725 if(U_SUCCESS(status) && !locale.isBogus()) {
1726 errln("For collator instantiated from rules, requested locale %s is not bogus", locale.getName());
1727 }
1728 status = U_ZERO_ERROR;
1729 locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1730 if(!locale.isBogus()) {
1731 errln("For collator instantiated from rules, valid locale %s is not bogus", locale.getName());
1732 }
1733 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1734 if(!locale.isBogus()) {
1735 errln("For collator instantiated from rules, actual locale %s is not bogus", locale.getName());
1736 }
1737 delete coll;
1738 }
1739
1740 struct teststruct {
1741 const char *original;
1742 uint8_t key[256];
1743 };
1744
1745
1746
1747 U_CDECL_BEGIN
1748 static int U_CALLCONV
compare_teststruct(const void * string1,const void * string2)1749 compare_teststruct(const void *string1, const void *string2) {
1750 return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key));
1751 }
1752 U_CDECL_END
1753
1754
TestBounds(void)1755 void CollationAPITest::TestBounds(void) {
1756 UErrorCode status = U_ZERO_ERROR;
1757
1758 Collator *coll = Collator::createInstance(Locale("sh"), status);
1759 if(U_FAILURE(status)) {
1760 delete coll;
1761 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1762 return;
1763 }
1764
1765 uint8_t sortkey[512], lower[512], upper[512];
1766 UChar buffer[512];
1767
1768 static const char * const test[] = {
1769 "John Smith",
1770 "JOHN SMITH",
1771 "john SMITH",
1772 "j\\u00F6hn sm\\u00EFth",
1773 "J\\u00F6hn Sm\\u00EFth",
1774 "J\\u00D6HN SM\\u00CFTH",
1775 "john smithsonian",
1776 "John Smithsonian"
1777 };
1778
1779 struct teststruct tests[] = {
1780 {"\\u010CAKI MIHALJ", {0}},
1781 {"\\u010CAKI MIHALJ", {0}},
1782 {"\\u010CAKI PIRO\\u0160KA", {0}},
1783 {"\\u010CABAI ANDRIJA", {0}},
1784 {"\\u010CABAI LAJO\\u0160", {0}},
1785 {"\\u010CABAI MARIJA", {0}},
1786 {"\\u010CABAI STEVAN", {0}},
1787 {"\\u010CABAI STEVAN", {0}},
1788 {"\\u010CABARKAPA BRANKO", {0}},
1789 {"\\u010CABARKAPA MILENKO", {0}},
1790 {"\\u010CABARKAPA MIROSLAV", {0}},
1791 {"\\u010CABARKAPA SIMO", {0}},
1792 {"\\u010CABARKAPA STANKO", {0}},
1793 {"\\u010CABARKAPA TAMARA", {0}},
1794 {"\\u010CABARKAPA TOMA\\u0160", {0}},
1795 {"\\u010CABDARI\\u0106 NIKOLA", {0}},
1796 {"\\u010CABDARI\\u0106 ZORICA", {0}},
1797 {"\\u010CABI NANDOR", {0}},
1798 {"\\u010CABOVI\\u0106 MILAN", {0}},
1799 {"\\u010CABRADI AGNEZIJA", {0}},
1800 {"\\u010CABRADI IVAN", {0}},
1801 {"\\u010CABRADI JELENA", {0}},
1802 {"\\u010CABRADI LJUBICA", {0}},
1803 {"\\u010CABRADI STEVAN", {0}},
1804 {"\\u010CABRDA MARTIN", {0}},
1805 {"\\u010CABRILO BOGDAN", {0}},
1806 {"\\u010CABRILO BRANISLAV", {0}},
1807 {"\\u010CABRILO LAZAR", {0}},
1808 {"\\u010CABRILO LJUBICA", {0}},
1809 {"\\u010CABRILO SPASOJA", {0}},
1810 {"\\u010CADE\\u0160 ZDENKA", {0}},
1811 {"\\u010CADESKI BLAGOJE", {0}},
1812 {"\\u010CADOVSKI VLADIMIR", {0}},
1813 {"\\u010CAGLJEVI\\u0106 TOMA", {0}},
1814 {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}},
1815 {"\\u010CAJA VANKA", {0}},
1816 {"\\u010CAJI\\u0106 BOGOLJUB", {0}},
1817 {"\\u010CAJI\\u0106 BORISLAV", {0}},
1818 {"\\u010CAJI\\u0106 RADOSLAV", {0}},
1819 {"\\u010CAK\\u0160IRAN MILADIN", {0}},
1820 {"\\u010CAKAN EUGEN", {0}},
1821 {"\\u010CAKAN EVGENIJE", {0}},
1822 {"\\u010CAKAN IVAN", {0}},
1823 {"\\u010CAKAN JULIJAN", {0}},
1824 {"\\u010CAKAN MIHAJLO", {0}},
1825 {"\\u010CAKAN STEVAN", {0}},
1826 {"\\u010CAKAN VLADIMIR", {0}},
1827 {"\\u010CAKAN VLADIMIR", {0}},
1828 {"\\u010CAKAN VLADIMIR", {0}},
1829 {"\\u010CAKARA ANA", {0}},
1830 {"\\u010CAKAREVI\\u0106 MOMIR", {0}},
1831 {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}},
1832 {"\\u010CAKI \\u0160ANDOR", {0}},
1833 {"\\u010CAKI AMALIJA", {0}},
1834 {"\\u010CAKI ANDRA\\u0160", {0}},
1835 {"\\u010CAKI LADISLAV", {0}},
1836 {"\\u010CAKI LAJO\\u0160", {0}},
1837 {"\\u010CAKI LASLO", {0}}
1838 };
1839
1840
1841
1842 int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0;
1843 int32_t arraySize = UPRV_LENGTHOF(tests);
1844
1845 (void)lowerSize; // Suppress unused variable warnings.
1846 (void)upperSize;
1847
1848 for(i = 0; i<arraySize; i++) {
1849 buffSize = u_unescape(tests[i].original, buffer, 512);
1850 skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512);
1851 }
1852
1853 qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct);
1854
1855 for(i = 0; i < arraySize-1; i++) {
1856 for(j = i+1; j < arraySize; j++) {
1857 lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status);
1858 upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status);
1859 for(k = i; k <= j; k++) {
1860 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) {
1861 errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original);
1862 }
1863 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) {
1864 errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original);
1865 }
1866 }
1867 }
1868 }
1869
1870
1871 for(i = 0; i<UPRV_LENGTHOF(test); i++) {
1872 buffSize = u_unescape(test[i], buffer, 512);
1873 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1874 lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1875 upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1876 for(j = i+1; j<UPRV_LENGTHOF(test); j++) {
1877 buffSize = u_unescape(test[j], buffer, 512);
1878 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1879 if(strcmp((const char *)lower, (const char *)sortkey) > 0) {
1880 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1881 }
1882 if(strcmp((const char *)upper, (const char *)sortkey) <= 0) {
1883 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1884 }
1885 }
1886 }
1887 delete coll;
1888 }
1889
1890
TestGetTailoredSet()1891 void CollationAPITest::TestGetTailoredSet()
1892 {
1893 struct {
1894 const char *rules;
1895 const char *tests[20];
1896 int32_t testsize;
1897 } setTest[] = {
1898 { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1899 { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1900 };
1901
1902 int32_t i = 0, j = 0;
1903 UErrorCode status = U_ZERO_ERROR;
1904
1905 UnicodeString buff;
1906 UnicodeSet *set = NULL;
1907
1908 for(i = 0; i < UPRV_LENGTHOF(setTest); i++) {
1909 buff = UnicodeString(setTest[i].rules, -1, US_INV).unescape();
1910 RuleBasedCollator coll(buff, status);
1911 if(U_SUCCESS(status)) {
1912 set = coll.getTailoredSet(status);
1913 if(set->size() < setTest[i].testsize) {
1914 errln("Tailored set size smaller (%d) than expected (%d)", set->size(), setTest[i].testsize);
1915 }
1916 for(j = 0; j < setTest[i].testsize; j++) {
1917 buff = UnicodeString(setTest[i].tests[j], -1, US_INV).unescape();
1918 if(!set->contains(buff)) {
1919 errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]);
1920 }
1921 }
1922 delete set;
1923 } else {
1924 errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status));
1925 }
1926 }
1927 }
1928
TestUClassID()1929 void CollationAPITest::TestUClassID()
1930 {
1931 char id = *((char *)RuleBasedCollator::getStaticClassID());
1932 if (id != 0) {
1933 errln("Static class id for RuleBasedCollator should be 0");
1934 }
1935 UErrorCode status = U_ZERO_ERROR;
1936 RuleBasedCollator *coll
1937 = (RuleBasedCollator *)Collator::createInstance(status);
1938 if(U_FAILURE(status)) {
1939 delete coll;
1940 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1941 return;
1942 }
1943 id = *((char *)coll->getDynamicClassID());
1944 if (id != 0) {
1945 errln("Dynamic class id for RuleBasedCollator should be 0");
1946 }
1947 id = *((char *)CollationKey::getStaticClassID());
1948 if (id != 0) {
1949 errln("Static class id for CollationKey should be 0");
1950 }
1951 CollationKey *key = new CollationKey();
1952 id = *((char *)key->getDynamicClassID());
1953 if (id != 0) {
1954 errln("Dynamic class id for CollationKey should be 0");
1955 }
1956 id = *((char *)CollationElementIterator::getStaticClassID());
1957 if (id != 0) {
1958 errln("Static class id for CollationElementIterator should be 0");
1959 }
1960 UnicodeString str("testing");
1961 CollationElementIterator *iter = coll->createCollationElementIterator(str);
1962 id = *((char *)iter->getDynamicClassID());
1963 if (id != 0) {
1964 errln("Dynamic class id for CollationElementIterator should be 0");
1965 }
1966 delete key;
1967 delete iter;
1968 delete coll;
1969 }
1970
1971 class TestCollator : public Collator
1972 {
1973 public:
1974 virtual Collator* clone(void) const;
1975
1976 using Collator::compare;
1977
1978 virtual UCollationResult compare(const UnicodeString& source,
1979 const UnicodeString& target,
1980 UErrorCode& status) const;
1981 virtual UCollationResult compare(const UnicodeString& source,
1982 const UnicodeString& target,
1983 int32_t length,
1984 UErrorCode& status) const;
1985 virtual UCollationResult compare(const UChar* source,
1986 int32_t sourceLength,
1987 const UChar* target,
1988 int32_t targetLength,
1989 UErrorCode& status) const;
1990 virtual CollationKey& getCollationKey(const UnicodeString& source,
1991 CollationKey& key,
1992 UErrorCode& status) const;
1993 virtual CollationKey& getCollationKey(const UChar*source,
1994 int32_t sourceLength,
1995 CollationKey& key,
1996 UErrorCode& status) const;
1997 virtual int32_t hashCode(void) const;
1998 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
1999 virtual ECollationStrength getStrength(void) const;
2000 virtual void setStrength(ECollationStrength newStrength);
2001 virtual UClassID getDynamicClassID(void) const;
2002 virtual void getVersion(UVersionInfo info) const;
2003 virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
2004 UErrorCode &status);
2005 virtual UColAttributeValue getAttribute(UColAttribute attr,
2006 UErrorCode &status) const;
2007 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len,
2008 UErrorCode &status);
2009 virtual uint32_t setVariableTop(const UnicodeString &varTop,
2010 UErrorCode &status);
2011 virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
2012 virtual uint32_t getVariableTop(UErrorCode &status) const;
2013 virtual int32_t getSortKey(const UnicodeString& source,
2014 uint8_t* result,
2015 int32_t resultLength) const;
2016 virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
2017 uint8_t*result, int32_t resultLength) const;
2018 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
2019 virtual UBool operator==(const Collator& other) const;
2020 // Collator::operator!= calls !Collator::operator== which works for all subclasses.
2021 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
TestCollator()2022 TestCollator() : Collator() {};
TestCollator(UCollationStrength collationStrength,UNormalizationMode decompositionMode)2023 TestCollator(UCollationStrength collationStrength,
2024 UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {};
2025 };
2026
operator ==(const Collator & other) const2027 inline UBool TestCollator::operator==(const Collator& other) const {
2028 // TestCollator has no fields, so we test for identity.
2029 return this == &other;
2030
2031 // Normally, subclasses should do something like the following:
2032 // if (this == &other) { return TRUE; }
2033 // if (!Collator::operator==(other)) { return FALSE; } // not the same class
2034 //
2035 // const TestCollator &o = (const TestCollator&)other;
2036 // (compare this vs. o's subclass fields)
2037 }
2038
clone() const2039 Collator* TestCollator::clone() const
2040 {
2041 return new TestCollator();
2042 }
2043
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const2044 UCollationResult TestCollator::compare(const UnicodeString& source,
2045 const UnicodeString& target,
2046 UErrorCode& status) const
2047 {
2048 if(U_SUCCESS(status)) {
2049 return UCollationResult(source.compare(target));
2050 } else {
2051 return UCOL_EQUAL;
2052 }
2053 }
2054
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const2055 UCollationResult TestCollator::compare(const UnicodeString& source,
2056 const UnicodeString& target,
2057 int32_t length,
2058 UErrorCode& status) const
2059 {
2060 if(U_SUCCESS(status)) {
2061 return UCollationResult(source.compare(0, length, target));
2062 } else {
2063 return UCOL_EQUAL;
2064 }
2065 }
2066
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const2067 UCollationResult TestCollator::compare(const UChar* source,
2068 int32_t sourceLength,
2069 const UChar* target,
2070 int32_t targetLength,
2071 UErrorCode& status) const
2072 {
2073 UnicodeString s(source, sourceLength);
2074 UnicodeString t(target, targetLength);
2075 return compare(s, t, status);
2076 }
2077
getCollationKey(const UnicodeString & source,CollationKey & key,UErrorCode & status) const2078 CollationKey& TestCollator::getCollationKey(const UnicodeString& source,
2079 CollationKey& key,
2080 UErrorCode& status) const
2081 {
2082 char temp[100];
2083 int length = 100;
2084 length = source.extract(temp, length, NULL, status);
2085 temp[length] = 0;
2086 CollationKey tempkey((uint8_t*)temp, length);
2087 key = tempkey;
2088 return key;
2089 }
2090
getCollationKey(const UChar * source,int32_t sourceLength,CollationKey & key,UErrorCode & status) const2091 CollationKey& TestCollator::getCollationKey(const UChar*source,
2092 int32_t sourceLength,
2093 CollationKey& key,
2094 UErrorCode& status) const
2095 {
2096 //s tack allocation used since collationkey does not keep the unicodestring
2097 UnicodeString str(source, sourceLength);
2098 return getCollationKey(str, key, status);
2099 }
2100
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const2101 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result,
2102 int32_t resultLength) const
2103 {
2104 UErrorCode status = U_ZERO_ERROR;
2105 int32_t length = source.extract((char *)result, resultLength, NULL,
2106 status);
2107 result[length] = 0;
2108 return length;
2109 }
2110
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const2111 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength,
2112 uint8_t*result, int32_t resultLength) const
2113 {
2114 UnicodeString str(source, sourceLength);
2115 return getSortKey(str, result, resultLength);
2116 }
2117
hashCode() const2118 int32_t TestCollator::hashCode() const
2119 {
2120 return 0;
2121 }
2122
getLocale(ULocDataLocaleType type,UErrorCode & status) const2123 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const
2124 {
2125 // api not used, this is to make the compiler happy
2126 if (U_FAILURE(status)) {
2127 (void)type;
2128 }
2129 return NULL;
2130 }
2131
getStrength() const2132 Collator::ECollationStrength TestCollator::getStrength() const
2133 {
2134 return TERTIARY;
2135 }
2136
setStrength(Collator::ECollationStrength newStrength)2137 void TestCollator::setStrength(Collator::ECollationStrength newStrength)
2138 {
2139 // api not used, this is to make the compiler happy
2140 (void)newStrength;
2141 }
2142
getDynamicClassID(void) const2143 UClassID TestCollator::getDynamicClassID(void) const
2144 {
2145 return 0;
2146 }
2147
getVersion(UVersionInfo info) const2148 void TestCollator::getVersion(UVersionInfo info) const
2149 {
2150 // api not used, this is to make the compiler happy
2151 memset(info, 0, U_MAX_VERSION_LENGTH);
2152 }
2153
setAttribute(UColAttribute,UColAttributeValue,UErrorCode &)2154 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/,
2155 UErrorCode & /*status*/)
2156 {
2157 }
2158
getAttribute(UColAttribute attr,UErrorCode & status) const2159 UColAttributeValue TestCollator::getAttribute(UColAttribute attr,
2160 UErrorCode &status) const
2161 {
2162 // api not used, this is to make the compiler happy
2163 if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) {
2164 return UCOL_OFF;
2165 }
2166 return UCOL_DEFAULT;
2167 }
2168
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)2169 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len,
2170 UErrorCode &status)
2171 {
2172 // api not used, this is to make the compiler happy
2173 if (U_SUCCESS(status) && (varTop == 0 || len < -1)) {
2174 status = U_ILLEGAL_ARGUMENT_ERROR;
2175 }
2176 return 0;
2177 }
2178
setVariableTop(const UnicodeString & varTop,UErrorCode & status)2179 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop,
2180 UErrorCode &status)
2181 {
2182 // api not used, this is to make the compiler happy
2183 if (U_SUCCESS(status) && varTop.length() == 0) {
2184 status = U_ILLEGAL_ARGUMENT_ERROR;
2185 }
2186 return 0;
2187 }
2188
setVariableTop(uint32_t varTop,UErrorCode & status)2189 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status)
2190 {
2191 // api not used, this is to make the compiler happy
2192 if (U_SUCCESS(status) && varTop == 0) {
2193 status = U_ILLEGAL_ARGUMENT_ERROR;
2194 }
2195 }
2196
getVariableTop(UErrorCode & status) const2197 uint32_t TestCollator::getVariableTop(UErrorCode &status) const
2198 {
2199
2200 // api not used, this is to make the compiler happy
2201 if (U_SUCCESS(status)) {
2202 return 0;
2203 }
2204 return (uint32_t)(0xFFFFFFFFu);
2205 }
2206
getTailoredSet(UErrorCode & status) const2207 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const
2208 {
2209 return Collator::getTailoredSet(status);
2210 }
2211
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)2212 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale)
2213 {
2214 Collator::setLocales(requestedLocale, validLocale, actualLocale);
2215 }
2216
2217
TestSubclass()2218 void CollationAPITest::TestSubclass()
2219 {
2220 TestCollator col1;
2221 TestCollator col2;
2222 doAssert(col1 != col2, "2 instances of TestCollator should be different");
2223 if (col1.hashCode() != col2.hashCode()) {
2224 errln("Every TestCollator has the same hashcode");
2225 }
2226 UnicodeString abc("abc", 3);
2227 UnicodeString bcd("bcd", 3);
2228 if (col1.compare(abc, bcd) != abc.compare(bcd)) {
2229 errln("TestCollator compare should be the same as the default "
2230 "string comparison");
2231 }
2232 CollationKey key;
2233 UErrorCode status = U_ZERO_ERROR;
2234 col1.getCollationKey(abc, key, status);
2235 int32_t length = 0;
2236 const char* bytes = (const char *)key.getByteArray(length);
2237 UnicodeString keyarray(bytes, length, NULL, status);
2238 if (abc != keyarray) {
2239 errln("TestCollator collationkey API is returning wrong values");
2240 }
2241
2242 UnicodeSet expectedset(0, 0x10FFFF);
2243 UnicodeSet *defaultset = col1.getTailoredSet(status);
2244 if (!defaultset->containsAll(expectedset)
2245 || !expectedset.containsAll(*defaultset)) {
2246 errln("Error: expected default tailoring to be 0 to 0x10ffff");
2247 }
2248 delete defaultset;
2249
2250 // use base class implementation
2251 Locale loc1 = Locale::getGermany();
2252 Locale loc2 = Locale::getFrance();
2253 col1.setLocales(loc1, loc2, loc2); // default implementation has no effect
2254
2255 UnicodeString displayName;
2256 col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale
2257
2258 TestCollator col3(UCOL_TERTIARY, UNORM_NONE);
2259 UnicodeString a("a");
2260 UnicodeString b("b");
2261 Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b));
2262 if(col1.compare(a, b) != result) {
2263 errln("Collator doesn't give default result");
2264 }
2265 if(col1.compare(a, b, 1) != result) {
2266 errln("Collator doesn't give default result");
2267 }
2268 if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) {
2269 errln("Collator doesn't give default result");
2270 }
2271 }
2272
TestNULLCharTailoring()2273 void CollationAPITest::TestNULLCharTailoring()
2274 {
2275 UErrorCode status = U_ZERO_ERROR;
2276 UChar buf[256] = {0};
2277 int32_t len = u_unescape("&a < '\\u0000'", buf, 256);
2278 UnicodeString first((UChar)0x0061);
2279 UnicodeString second((UChar)0);
2280 RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status);
2281 if(U_FAILURE(status)) {
2282 delete coll;
2283 errcheckln(status, "Failed to open collator - %s", u_errorName(status));
2284 return;
2285 }
2286 UCollationResult res = coll->compare(first, second, status);
2287 if(res != UCOL_LESS) {
2288 errln("a should be less then NULL after tailoring");
2289 }
2290 delete coll;
2291 }
2292
TestClone()2293 void CollationAPITest::TestClone() {
2294 logln("\ninit c0");
2295 UErrorCode status = U_ZERO_ERROR;
2296 RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status);
2297
2298 if (U_FAILURE(status)) {
2299 errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status));
2300 return;
2301 }
2302
2303 c0->setStrength(Collator::TERTIARY);
2304 dump("c0", c0, status);
2305
2306 logln("\ninit c1");
2307 RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status);
2308 c1->setStrength(Collator::TERTIARY);
2309 UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status);
2310 if(val == UCOL_LOWER_FIRST){
2311 c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2312 }else{
2313 c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2314 }
2315 dump("c0", c0, status);
2316 dump("c1", c1, status);
2317
2318 logln("\ninit c2");
2319 RuleBasedCollator* c2 = (RuleBasedCollator*)c1->clone();
2320 val = c2->getAttribute(UCOL_CASE_FIRST, status);
2321 if(val == UCOL_LOWER_FIRST){
2322 c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2323 }else{
2324 c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2325 }
2326 if(U_FAILURE(status)){
2327 errln("set and get attributes of collator failed. %s\n", u_errorName(status));
2328 return;
2329 }
2330 dump("c0", c0, status);
2331 dump("c1", c1, status);
2332 dump("c2", c2, status);
2333 if(*c1 == *c2){
2334 errln("The cloned objects refer to same data");
2335 }
2336 delete c0;
2337 delete c1;
2338 delete c2;
2339 }
2340
TestCloneBinary()2341 void CollationAPITest::TestCloneBinary() {
2342 IcuTestErrorCode errorCode(*this, "TestCloneBinary");
2343 LocalPointer<Collator> root(Collator::createInstance(Locale::getRoot(), errorCode));
2344 LocalPointer<Collator> coll(Collator::createInstance("de@collation=phonebook", errorCode));
2345 if(errorCode.logDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) {
2346 return;
2347 }
2348 RuleBasedCollator *rbRoot = dynamic_cast<RuleBasedCollator *>(root.getAlias());
2349 RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias());
2350 if(rbRoot == NULL || rbc == NULL) {
2351 infoln("root or de@collation=phonebook is not a RuleBasedCollator");
2352 return;
2353 }
2354 rbc->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
2355 UnicodeString uUmlaut((UChar)0xfc);
2356 UnicodeString ue = UNICODE_STRING_SIMPLE("ue");
2357 assertEquals("rbc/primary: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc->compare(uUmlaut, ue, errorCode));
2358 uint8_t bin[25000];
2359 int32_t binLength = rbc->cloneBinary(bin, UPRV_LENGTHOF(bin), errorCode);
2360 if(errorCode.logDataIfFailureAndReset("rbc->cloneBinary()")) {
2361 return;
2362 }
2363 logln("rbc->cloneBinary() -> %d bytes", (int)binLength);
2364
2365 RuleBasedCollator rbc2(bin, binLength, rbRoot, errorCode);
2366 if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) {
2367 return;
2368 }
2369 assertEquals("rbc2.strength==primary", (int32_t)UCOL_PRIMARY, rbc2.getAttribute(UCOL_STRENGTH, errorCode));
2370 assertEquals("rbc2: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc2.compare(uUmlaut, ue, errorCode));
2371 assertTrue("rbc==rbc2", *rbc == rbc2);
2372 uint8_t bin2[25000];
2373 int32_t bin2Length = rbc2.cloneBinary(bin2, UPRV_LENGTHOF(bin2), errorCode);
2374 assertEquals("len(rbc binary)==len(rbc2 binary)", binLength, bin2Length);
2375 assertTrue("rbc binary==rbc2 binary", binLength == bin2Length && memcmp(bin, bin2, binLength) == 0);
2376
2377 RuleBasedCollator rbc3(bin, -1, rbRoot, errorCode);
2378 if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary, length<0)")) {
2379 return;
2380 }
2381 assertEquals("rbc3.strength==primary", (int32_t)UCOL_PRIMARY, rbc3.getAttribute(UCOL_STRENGTH, errorCode));
2382 assertEquals("rbc3: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc3.compare(uUmlaut, ue, errorCode));
2383 assertTrue("rbc==rbc3", *rbc == rbc3);
2384 }
2385
TestIterNumeric()2386 void CollationAPITest::TestIterNumeric() {
2387 // Regression test for ticket #9915.
2388 // The collation code sometimes masked the continuation marker away
2389 // but later tested the result for isContinuation().
2390 // This test case failed because the third bytes of the computed numeric-collation primaries
2391 // were permutated with the script reordering table.
2392 // It should have been possible to reproduce this with the root collator
2393 // and characters with appropriate 3-byte primary weights.
2394 // The effectiveness of this test depends completely on the collation elements
2395 // and on the implementation code.
2396 IcuTestErrorCode errorCode(*this, "TestIterNumeric");
2397 RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode);
2398 if(errorCode.logDataIfFailureAndReset("RuleBasedCollator constructor")) {
2399 return;
2400 }
2401 coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode);
2402 UCharIterator iter40, iter72;
2403 uiter_setUTF8(&iter40, "\x34\x30", 2);
2404 uiter_setUTF8(&iter72, "\x37\x32", 2);
2405 UCollationResult result = coll.compare(iter40, iter72, errorCode);
2406 assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result);
2407 }
2408
TestBadKeywords()2409 void CollationAPITest::TestBadKeywords() {
2410 // Test locale IDs with errors.
2411 // Valid locale IDs are tested via data-driven tests.
2412 UErrorCode errorCode = U_ZERO_ERROR;
2413 Locale bogusLocale(Locale::getRoot());
2414 bogusLocale.setToBogus();
2415 LocalPointer<Collator> coll(Collator::createInstance(bogusLocale, errorCode));
2416 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2417 errln("Collator::createInstance(bogus locale) did not fail as expected - %s",
2418 u_errorName(errorCode));
2419 }
2420
2421 // Unknown value.
2422 const char *localeID = "it-u-ks-xyz";
2423 errorCode = U_ZERO_ERROR;
2424 coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2425 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2426 dataerrln("Collator::createInstance(%s) did not fail as expected - %s",
2427 localeID, u_errorName(errorCode));
2428 }
2429
2430 // Unsupported attributes.
2431 localeID = "it@colHiraganaQuaternary=true";
2432 errorCode = U_ZERO_ERROR;
2433 coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2434 if(errorCode != U_UNSUPPORTED_ERROR) {
2435 if (errorCode == U_FILE_ACCESS_ERROR) {
2436 dataerrln("Collator::createInstance(it@colHiraganaQuaternary=true) : %s", u_errorName(errorCode));
2437 } else {
2438 errln("Collator::createInstance(%s) did not fail as expected - %s",
2439 localeID, u_errorName(errorCode));
2440 }
2441 }
2442
2443 localeID = "it-u-vt-u24";
2444 errorCode = U_ZERO_ERROR;
2445 coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2446 if(errorCode != U_UNSUPPORTED_ERROR) {
2447 if (errorCode == U_ILLEGAL_ARGUMENT_ERROR || errorCode == U_FILE_ACCESS_ERROR) {
2448 dataerrln("Collator::createInstance(it-u-vt-u24) : %s", u_errorName(errorCode));
2449 } else {
2450 errln("Collator::createInstance(%s) did not fail as expected - %s",
2451 localeID, u_errorName(errorCode));
2452 }
2453 }
2454 }
2455
dump(UnicodeString msg,RuleBasedCollator * c,UErrorCode & status)2456 void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) {
2457 const char* bigone = "One";
2458 const char* littleone = "one";
2459
2460 logln(msg + " " + c->compare(bigone, littleone) +
2461 " s: " + c->getStrength() +
2462 " u: " + c->getAttribute(UCOL_CASE_FIRST, status));
2463 }
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)2464 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
2465 {
2466 if (exec) logln("TestSuite CollationAPITest: ");
2467 TESTCASE_AUTO_BEGIN;
2468 TESTCASE_AUTO(TestProperty);
2469 TESTCASE_AUTO(TestOperators);
2470 TESTCASE_AUTO(TestDuplicate);
2471 TESTCASE_AUTO(TestCompare);
2472 TESTCASE_AUTO(TestHashCode);
2473 TESTCASE_AUTO(TestCollationKey);
2474 TESTCASE_AUTO(TestElemIter);
2475 TESTCASE_AUTO(TestGetAll);
2476 TESTCASE_AUTO(TestRuleBasedColl);
2477 TESTCASE_AUTO(TestDecomposition);
2478 TESTCASE_AUTO(TestSafeClone);
2479 TESTCASE_AUTO(TestSortKey);
2480 TESTCASE_AUTO(TestSortKeyOverflow);
2481 TESTCASE_AUTO(TestMaxExpansion);
2482 TESTCASE_AUTO(TestDisplayName);
2483 TESTCASE_AUTO(TestAttribute);
2484 TESTCASE_AUTO(TestVariableTopSetting);
2485 TESTCASE_AUTO(TestMaxVariable);
2486 TESTCASE_AUTO(TestRules);
2487 TESTCASE_AUTO(TestGetLocale);
2488 TESTCASE_AUTO(TestBounds);
2489 TESTCASE_AUTO(TestGetTailoredSet);
2490 TESTCASE_AUTO(TestUClassID);
2491 TESTCASE_AUTO(TestSubclass);
2492 TESTCASE_AUTO(TestNULLCharTailoring);
2493 TESTCASE_AUTO(TestClone);
2494 TESTCASE_AUTO(TestCloneBinary);
2495 TESTCASE_AUTO(TestIterNumeric);
2496 TESTCASE_AUTO(TestBadKeywords);
2497 TESTCASE_AUTO_END;
2498 }
2499
2500 #endif /* #if !UCONFIG_NO_COLLATION */
2501