• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 //===============================================================================
7 //
8 // File apicoll.cpp
9 //
10 //
11 //
12 // Created by: Helena Shih
13 //
14 // Modification History:
15 //
16 //  Date         Name          Description
17 //  2/5/97      aliu        Added streamIn and streamOut methods.  Added
18 //                          constructor which reads RuleBasedCollator object from
19 //                          a binary file.  Added writeToFile method which streams
20 //                          RuleBasedCollator out to a binary file.  The streamIn
21 //                          and streamOut methods use istream and ostream objects
22 //                          in binary mode.
23 //  6/30/97     helena      Added tests for CollationElementIterator::setText, getOffset
24 //                          setOffset and DecompositionIterator::getOffset, setOffset.
25 //                          DecompositionIterator is made public so add class scope
26 //                          testing.
27 //  02/10/98    damiba      Added test for compare(UnicodeString&, UnicodeString&, int32_t)
28 //===============================================================================
29 
30 #include "unicode/utypes.h"
31 
32 #if !UCONFIG_NO_COLLATION
33 
34 #include "unicode/localpointer.h"
35 #include "unicode/coll.h"
36 #include "unicode/tblcoll.h"
37 #include "unicode/coleitr.h"
38 #include "unicode/sortkey.h"
39 #include "apicoll.h"
40 #include "unicode/chariter.h"
41 #include "unicode/schriter.h"
42 #include "unicode/ustring.h"
43 #include "unicode/ucol.h"
44 
45 #include "sfwdchit.h"
46 #include "cmemory.h"
47 #include <stdlib.h>
48 
49 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
50 
51 void
doAssert(UBool condition,const char * message)52 CollationAPITest::doAssert(UBool condition, const char *message)
53 {
54     if (!condition) {
55         errln(UnicodeString("ERROR : ") + message);
56     }
57 }
58 
59 // Collator Class Properties
60 // ctor, dtor, createInstance, compare, getStrength/setStrength
61 // getDecomposition/setDecomposition, getDisplayName
62 void
TestProperty()63 CollationAPITest::TestProperty(/* char* par */)
64 {
65     UErrorCode success = U_ZERO_ERROR;
66     Collator *col = 0;
67     /*
68      * Expected version of the English collator.
69      * Currently, the major/minor version numbers change when the builder code
70      * changes,
71      * number 2 is from the tailoring data version and
72      * number 3 is the UCA version.
73      * This changes with every UCA version change, and the expected value
74      * needs to be adjusted.
75      * Same in cintltst/capitst.c.
76      */
77     UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A};  // from ICU 4.4/UCA 5.2
78     UVersionInfo versionArray;
79 
80     logln("The property tests begin : ");
81     logln("Test ctors : ");
82     col = Collator::createInstance(Locale::getEnglish(), success);
83     if (U_FAILURE(success)){
84         errcheckln(success, "Default Collator creation failed. - %s", u_errorName(success));
85         return;
86     }
87 
88     StringEnumeration* kwEnum = col->getKeywordValuesForLocale("", Locale::getEnglish(),true,success);
89     if (U_FAILURE(success)){
90         errcheckln(success, "Get Keyword Values for Locale failed. - %s", u_errorName(success));
91         return;
92     }
93     delete kwEnum;
94 
95     col->getVersion(versionArray);
96     // Check for a version greater than some value rather than equality
97     // so that we need not update the expected version each time.
98     if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
99       errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
100             versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
101     } else {
102       logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
103             versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
104     }
105 
106     doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed");
107     doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed");
108     doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed");
109     doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed");
110     doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed");
111     doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed");
112 
113     doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed");
114     success = U_ZERO_ERROR;
115     {
116         UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape();
117         UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape();
118         UCharIterator abauIter, abssIter;
119         uiter_setReplaceable(&abauIter, &abau);
120         uiter_setReplaceable(&abssIter, &abss);
121         doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed");
122         success = U_ZERO_ERROR;
123     }
124 
125     /*start of update [Bertrand A. D. 02/10/98]*/
126     doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed");
127     doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB  with length 2 comparison failed");
128     doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa  with length 1 comparison failed");
129     doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa  with length 2 comparison failed");
130     doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed");
131     doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed");
132     doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed");
133     /*end of update [Bertrand A. D. 02/10/98]*/
134 
135 
136     logln("Test ctors ends.");
137     logln("testing Collator::getStrength() method ...");
138     doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength");
139     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
140 
141 
142     logln("testing Collator::setStrength() method ...");
143     col->setStrength(Collator::SECONDARY);
144     doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference");
145     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
146     doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength");
147 
148     UnicodeString name;
149 
150     logln("Get display name for the US English collation in German : ");
151     logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name));
152     doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed");
153 
154     logln("Get display name for the US English collation in English : ");
155     logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name));
156     doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed");
157 #if 0
158     // weiv : this test is bogus if we're running on any machine that has different default locale than English.
159     // Therefore, it is banned!
160     logln("Get display name for the US English in default locale language : ");
161     logln(Collator::getDisplayName(Locale::US, name));
162     doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine");
163 #endif
164     delete col; col = 0;
165     RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK",
166                                                                             success);
167     if (U_FAILURE(success)) {
168         errcheckln(success, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success));
169         return;
170     }
171     const UnicodeString &daRules = rcol->getRules();
172     if(daRules.isEmpty()) {
173         dataerrln("missing da_DK tailoring rule string");
174     } else {
175         doAssert(daRules.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'");
176     }
177     delete rcol;
178 
179     col = Collator::createInstance(Locale::getFrench(), success);
180     if (U_FAILURE(success))
181     {
182         errln("Creating French collation failed.");
183         return;
184     }
185 
186     col->setStrength(Collator::PRIMARY);
187     logln("testing Collator::getStrength() method again ...");
188     doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength");
189     doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference");
190 
191     logln("testing French Collator::setStrength() method ...");
192     col->setStrength(Collator::TERTIARY);
193     doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference");
194     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
195     doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference");
196 
197     logln("Create junk collation: ");
198     Locale abcd("ab", "CD", "");
199     success = U_ZERO_ERROR;
200     Collator *junk = 0;
201     junk = Collator::createInstance(abcd, success);
202 
203     if (U_FAILURE(success))
204     {
205         errln("Junk collation creation failed, should at least return default.");
206         delete col;
207         return;
208     }
209 
210     delete col;
211     col = Collator::createInstance(success);
212     if (U_FAILURE(success))
213     {
214         errln("Creating default collator failed.");
215         delete junk;
216         return;
217     }
218 
219     doAssert(((RuleBasedCollator *)col)->getRules() == ((RuleBasedCollator *)junk)->getRules(),
220                "The default collation should be returned.");
221     Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success);
222     if (U_FAILURE(success))
223     {
224         errln("Creating fr_CA collator failed.");
225         delete col;
226         delete junk;
227         return;
228     }
229 
230     // If the default locale isn't French, the French and non-French collators
231     // should be different
232     if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) {
233         doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator.");
234     }
235     Collator *aFrCol = frCol->clone();
236     doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed.");
237     logln("Collator property test ended.");
238 
239     delete col;
240     delete frCol;
241     delete aFrCol;
242     delete junk;
243 }
244 
245 void
TestRuleBasedColl()246 CollationAPITest::TestRuleBasedColl()
247 {
248     RuleBasedCollator *col1, *col2, *col3, *col4;
249     UErrorCode status = U_ZERO_ERROR;
250 
251     UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
252     UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
253 
254     col1 = new RuleBasedCollator(ruleset1, status);
255     if (U_FAILURE(status)) {
256         errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status));
257         return;
258     }
259     else {
260         logln("PASS: RuleBased Collator creation passed\n");
261     }
262 
263     status = U_ZERO_ERROR;
264     col2 = new RuleBasedCollator(ruleset2, status);
265     if (U_FAILURE(status)) {
266         errln("RuleBased Collator creation failed.\n");
267         return;
268     }
269     else {
270         logln("PASS: RuleBased Collator creation passed\n");
271     }
272 
273     status = U_ZERO_ERROR;
274     Locale locale("aa", "AA");
275     col3 = (RuleBasedCollator *)Collator::createInstance(locale, status);
276     if (U_FAILURE(status)) {
277         errln("Fallback Collator creation failed.: %s\n");
278         return;
279     }
280     else {
281         logln("PASS: Fallback Collator creation passed\n");
282     }
283     delete col3;
284 
285     status = U_ZERO_ERROR;
286     col3 = (RuleBasedCollator *)Collator::createInstance(status);
287     if (U_FAILURE(status)) {
288         errln("Default Collator creation failed.: %s\n");
289         return;
290     }
291     else {
292         logln("PASS: Default Collator creation passed\n");
293     }
294 
295     UnicodeString rule1 = col1->getRules();
296     UnicodeString rule2 = col2->getRules();
297     UnicodeString rule3 = col3->getRules();
298 
299     doAssert(rule1 != rule2, "Default collator getRules failed");
300     doAssert(rule2 != rule3, "Default collator getRules failed");
301     doAssert(rule1 != rule3, "Default collator getRules failed");
302 
303     col4 = new RuleBasedCollator(rule2, status);
304     if (U_FAILURE(status)) {
305         errln("RuleBased Collator creation failed.\n");
306         return;
307     }
308 
309     UnicodeString rule4 = col4->getRules();
310     doAssert(rule2 == rule4, "Default collator getRules failed");
311     int32_t length4 = 0;
312     uint8_t *clonedrule4 = col4->cloneRuleData(length4, status);
313     if (U_FAILURE(status)) {
314         errln("Cloned rule data failed.\n");
315         return;
316     }
317 
318  //   free(clonedrule4);     BAD API!!!!
319     uprv_free(clonedrule4);
320 
321 
322     delete col1;
323     delete col2;
324     delete col3;
325     delete col4;
326 }
327 
328 void
TestRules()329 CollationAPITest::TestRules()
330 {
331     RuleBasedCollator *coll;
332     UErrorCode status = U_ZERO_ERROR;
333     UnicodeString rules;
334 
335     coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status);
336     if (U_FAILURE(status)) {
337         errcheckln(status, "English Collator creation failed. - %s", u_errorName(status));
338         return;
339     }
340     else {
341         logln("PASS: RuleBased Collator creation passed\n");
342     }
343 
344     coll->getRules(UCOL_TAILORING_ONLY, rules);
345     if (rules.length() != 0x00) {
346       errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00);
347     }
348 
349     coll->getRules(UCOL_FULL_RULES, rules);
350     if (rules.length() < 0) {
351         errln("English full rules failed");
352     }
353     delete coll;
354 }
355 
356 void
TestDecomposition()357 CollationAPITest::TestDecomposition() {
358   UErrorCode status = U_ZERO_ERROR;
359   Collator *en_US = Collator::createInstance("en_US", status),
360     *el_GR = Collator::createInstance("el_GR", status),
361     *vi_VN = Collator::createInstance("vi_VN", status);
362 
363   if (U_FAILURE(status)) {
364     errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status));
365     return;
366   }
367 
368   /* there is no reason to have canonical decomposition in en_US OR default locale */
369   if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
370   {
371     errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
372   }
373 
374   if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
375   {
376     errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
377   }
378 
379   if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
380   {
381     errln("ERROR: en_US collation had canonical decomposition for normalization!\n");
382   }
383 
384   delete en_US;
385   delete el_GR;
386   delete vi_VN;
387 }
388 
389 void
TestSafeClone()390 CollationAPITest::TestSafeClone() {
391     static const int CLONETEST_COLLATOR_COUNT = 3;
392     Collator *someCollators [CLONETEST_COLLATOR_COUNT];
393     Collator *col;
394     UErrorCode err = U_ZERO_ERROR;
395     int index;
396 
397     UnicodeString test1("abCda");
398     UnicodeString test2("abcda");
399 
400     /* one default collator & two complex ones */
401     someCollators[0] = Collator::createInstance("en_US", err);
402     someCollators[1] = Collator::createInstance("ko", err);
403     someCollators[2] = Collator::createInstance("ja_JP", err);
404     if(U_FAILURE(err)) {
405       errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err));
406       delete someCollators[0];
407       delete someCollators[1];
408       delete someCollators[2];
409       return;
410     }
411 
412     /* change orig & clone & make sure they are independent */
413 
414     for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++)
415     {
416         col = someCollators[index]->safeClone();
417         if (col == 0) {
418             errln("SafeClone of collator should not return null\n");
419             break;
420         }
421         col->setStrength(Collator::TERTIARY);
422         someCollators[index]->setStrength(Collator::PRIMARY);
423         col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
424         someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
425 
426         doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" ");
427         doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\"");
428         delete col;
429         delete someCollators[index];
430     }
431 }
432 
433 void
TestHashCode()434 CollationAPITest::TestHashCode(/* char* par */)
435 {
436     logln("hashCode tests begin.");
437     UErrorCode success = U_ZERO_ERROR;
438     Collator *col1 = 0;
439     col1 = Collator::createInstance(Locale::getEnglish(), success);
440     if (U_FAILURE(success))
441     {
442         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
443         return;
444     }
445 
446     Collator *col2 = 0;
447     Locale dk("da", "DK", "");
448     col2 = Collator::createInstance(dk, success);
449     if (U_FAILURE(success))
450     {
451         errln("Danish collation creation failed.");
452         return;
453     }
454 
455     Collator *col3 = 0;
456     col3 = Collator::createInstance(Locale::getEnglish(), success);
457     if (U_FAILURE(success))
458     {
459         errln("2nd default collation creation failed.");
460         return;
461     }
462 
463     logln("Collator::hashCode() testing ...");
464 
465     doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" );
466     doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" );
467     doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" );
468 
469     logln("hashCode tests end.");
470     delete col1;
471     delete col2;
472 
473     UnicodeString test1("Abcda");
474     UnicodeString test2("abcda");
475 
476     CollationKey sortk1, sortk2, sortk3;
477     UErrorCode status = U_ZERO_ERROR;
478 
479     col3->getCollationKey(test1, sortk1, status);
480     col3->getCollationKey(test2, sortk2, status);
481     col3->getCollationKey(test2, sortk3, status);
482 
483     doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect");
484     doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" );
485 
486     delete col3;
487 }
488 
489 //----------------------------------------------------------------------------
490 // CollationKey -- Tests the CollationKey methods
491 //
492 void
TestCollationKey()493 CollationAPITest::TestCollationKey(/* char* par */)
494 {
495     logln("testing CollationKey begins...");
496     Collator *col = 0;
497     UErrorCode success=U_ZERO_ERROR;
498     col = Collator::createInstance(Locale::getEnglish(), success);
499     if (U_FAILURE(success))
500     {
501         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
502         return;
503     }
504     col->setStrength(Collator::TERTIARY);
505 
506     CollationKey sortk1, sortk2;
507     UnicodeString test1("Abcda"), test2("abcda");
508     UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR;
509 
510     logln("Testing weird arguments");
511     // No string vs. empty string vs. completely-ignorable string:
512     // See ICU ticket #10495.
513     CollationKey sortkNone;
514     int32_t length;
515     sortkNone.getByteArray(length);
516     doAssert(!sortkNone.isBogus() && length == 0,
517              "Default-constructed collation key should be empty");
518     CollationKey sortkEmpty;
519     col->getCollationKey(NULL, 0, sortkEmpty, key1Status);
520     // key gets reset here
521     const uint8_t* byteArrayEmpty = sortkEmpty.getByteArray(length);
522     doAssert(sortkEmpty.isBogus() == FALSE && length == 3 &&
523              byteArrayEmpty[0] == 1 && byteArrayEmpty[1] == 1 && byteArrayEmpty[2] == 0,
524              "Empty string should return a collation key with empty levels");
525     doAssert(sortkNone.compareTo(sortkEmpty) == Collator::LESS,
526              "Expected no collation key < collation key for empty string");
527     doAssert(sortkEmpty.compareTo(sortkNone) == Collator::GREATER,
528              "Expected collation key for empty string > no collation key");
529 
530     CollationKey sortkIgnorable;
531     // Most control codes and CGJ are completely ignorable.
532     // A string with only completely ignorables must compare equal to an empty string.
533     col->getCollationKey(UnicodeString((UChar)1).append((UChar)0x34f), sortkIgnorable, key1Status);
534     sortkIgnorable.getByteArray(length);
535     doAssert(!sortkIgnorable.isBogus() && length == 3,
536              "Completely ignorable string should return a collation key with empty levels");
537     doAssert(sortkIgnorable.compareTo(sortkEmpty) == Collator::EQUAL,
538              "Completely ignorable string should compare equal to empty string");
539 
540     // bogus key returned here
541     key1Status = U_ILLEGAL_ARGUMENT_ERROR;
542     col->getCollationKey(NULL, 0, sortk1, key1Status);
543     doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0,
544         "Error code should return bogus collation key");
545 
546     key1Status = U_ZERO_ERROR;
547     logln("Use tertiary comparison level testing ....");
548 
549     col->getCollationKey(test1, sortk1, key1Status);
550     if (U_FAILURE(key1Status)) {
551         errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status));
552         return;
553     }
554     doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status)))
555                  == Collator::GREATER,
556                 "Result should be \"Abcda\" >>> \"abcda\"");
557 
558     CollationKey sortk3(sortk2), sortkNew;
559 
560     sortkNew = sortk1;
561     doAssert((sortk1 != sortk2), "The sort keys should be different");
562     doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed");
563     doAssert((sortk2 == sortk3), "The sort keys should be the same");
564     doAssert((sortk1 == sortkNew), "The sort keys assignment failed");
565     doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed");
566     doAssert((sortkNew != sortk3), "The sort keys should be different");
567     doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
568     doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\"");
569     doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\"");
570     doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)");
571     doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)");
572     doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
573     doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\"");
574     doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\"");
575     doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)");
576     doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)");
577 
578     int32_t    cnt1, cnt2, cnt3, cnt4;
579 
580     const uint8_t* byteArray1 = sortk1.getByteArray(cnt1);
581     const uint8_t* byteArray2 = sortk2.getByteArray(cnt2);
582 
583     const uint8_t* byteArray3 = 0;
584     byteArray3 = sortk1.getByteArray(cnt3);
585 
586     const uint8_t* byteArray4 = 0;
587     byteArray4 = sortk2.getByteArray(cnt4);
588 
589     CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2);
590     CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4);
591 
592     doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed.");
593     doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed.");
594     doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed");
595     doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed.");
596     doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed.");
597     doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed");
598 
599     logln("Equality tests : ");
600     doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed.");
601     doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed.");
602     doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed.");
603     doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed.");
604     doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed.");
605     doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed.");
606 
607     byteArray1 = 0;
608     byteArray2 = 0;
609 
610     sortk3 = sortk1;
611     doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed.");
612     doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed.");
613     logln("testing sortkey ends...");
614 
615     col->setStrength(Collator::SECONDARY);
616     doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo(
617                                   col->getCollationKey(test2, sortk2, key2Status))
618                                   == Collator::EQUAL,
619                                   "Result should be \"Abcda\" == \"abcda\"");
620     delete col;
621 }
622 
623 //----------------------------------------------------------------------------
624 // Tests the CollatorElementIterator class.
625 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!=
626 //
627 void
TestElemIter()628 CollationAPITest::TestElemIter(/* char* par */)
629 {
630     logln("testing sortkey begins...");
631     Collator *col = 0;
632     UErrorCode success = U_ZERO_ERROR;
633     col = Collator::createInstance(Locale::getEnglish(), success);
634     if (U_FAILURE(success))
635     {
636         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
637         return;
638     }
639 
640     UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
641     UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
642     logln("Constructors and comparison testing....");
643     CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
644 
645     CharacterIterator *chariter=new StringCharacterIterator(testString1);
646     CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter);
647 
648     // copy ctor
649     CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
650     CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2);
651 
652     int32_t offset = iterator1->getOffset();
653     if (offset != 0) {
654         errln("Error in getOffset for collation element iterator\n");
655         return;
656     }
657     iterator1->setOffset(6, success);
658     if (U_FAILURE(success)) {
659         errln("Error in setOffset for collation element iterator\n");
660         return;
661     }
662     iterator1->setOffset(0, success);
663     int32_t order1, order2, order3;
664     doAssert((*iterator1 == *iterator2), "The two iterators should be the same");
665     doAssert((*iterator1 != *iterator3), "The two iterators should be different");
666 
667     doAssert((*coliter == *iterator1), "The two iterators should be the same");
668     doAssert((*coliter == *iterator2), "The two iterators should be the same");
669     doAssert((*coliter != *iterator3), "The two iterators should be different");
670 
671     order1 = iterator1->next(success);
672     if (U_FAILURE(success))
673     {
674         errln("Somehow ran out of memory stepping through the iterator.");
675         return;
676     }
677 
678     doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
679     order2 = iterator2->getOffset();
680     doAssert((order1 != order2), "The order result should not be the same");
681     order2 = iterator2->next(success);
682     if (U_FAILURE(success))
683     {
684         errln("Somehow ran out of memory stepping through the iterator.");
685         return;
686     }
687 
688     doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
689     doAssert((order1 == order2), "The order result should be the same");
690     order3 = iterator3->next(success);
691     if (U_FAILURE(success))
692     {
693         errln("Somehow ran out of memory stepping through the iterator.");
694         return;
695     }
696 
697     doAssert((CollationElementIterator::primaryOrder(order1) ==
698         CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
699     doAssert((CollationElementIterator::secondaryOrder(order1) ==
700         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
701     doAssert((CollationElementIterator::tertiaryOrder(order1) ==
702         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
703 
704     order1 = iterator1->next(success); order3 = iterator3->next(success);
705     if (U_FAILURE(success))
706     {
707         errln("Somehow ran out of memory stepping through the iterator.");
708         return;
709     }
710 
711     doAssert((CollationElementIterator::primaryOrder(order1) ==
712         CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
713     doAssert((CollationElementIterator::tertiaryOrder(order1) !=
714         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
715 
716     order1 = iterator1->next(success);
717     order3 = iterator3->next(success);
718     /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
719     /*
720     doAssert((CollationElementIterator::secondaryOrder(order1) !=
721         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
722     */
723     doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
724 
725     iterator1->reset(); iterator2->reset(); iterator3->reset();
726     order1 = iterator1->next(success);
727     if (U_FAILURE(success))
728     {
729         errln("Somehow ran out of memory stepping through the iterator.");
730         return;
731     }
732 
733     doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
734 
735     order2 = iterator2->next(success);
736     if (U_FAILURE(success))
737     {
738         errln("Somehow ran out of memory stepping through the iterator.");
739         return;
740     }
741 
742     doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
743     doAssert((order1 == order2), "The order result should be the same");
744 
745     order3 = iterator3->next(success);
746     if (U_FAILURE(success))
747     {
748         errln("Somehow ran out of memory stepping through the iterator.");
749         return;
750     }
751 
752     doAssert((CollationElementIterator::primaryOrder(order1) ==
753         CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
754     doAssert((CollationElementIterator::secondaryOrder(order1) ==
755         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
756     doAssert((CollationElementIterator::tertiaryOrder(order1) ==
757         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
758 
759     order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success);
760     if (U_FAILURE(success))
761     {
762         errln("Somehow ran out of memory stepping through the iterator.");
763         return;
764     }
765 
766     doAssert((CollationElementIterator::primaryOrder(order1) ==
767         CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
768     doAssert((CollationElementIterator::tertiaryOrder(order1) !=
769         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
770 
771     order1 = iterator1->next(success); order3 = iterator3->next(success);
772     if (U_FAILURE(success))
773     {
774         errln("Somehow ran out of memory stepping through the iterator.");
775         return;
776     }
777 
778     /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
779     /*
780     doAssert((CollationElementIterator::secondaryOrder(order1) !=
781         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
782     */
783     doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
784     doAssert((*iterator2 != *iterator3), "The iterators should be different");
785 
786 
787     //test error values
788     success=U_UNSUPPORTED_ERROR;
789     Collator *colerror=NULL;
790     colerror=Collator::createInstance(Locale::getEnglish(), success);
791     if (colerror != 0 || success == U_ZERO_ERROR){
792         errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n");
793     }
794     int32_t position=coliter->previous(success);
795     if(position != CollationElementIterator::NULLORDER){
796         errln((UnicodeString)"Expected NULLORDER got" + position);
797     }
798     coliter->reset();
799     coliter->setText(*chariter, success);
800     if(!U_FAILURE(success)){
801         errln("Expeceted error");
802     }
803     iterator1->setText((UnicodeString)"hello there", success);
804     if(!U_FAILURE(success)){
805         errln("Expeceted error");
806     }
807 
808     delete chariter;
809     delete coliter;
810     delete iterator1;
811     delete iterator2;
812     delete iterator3;
813     delete col;
814 
815 
816 
817     logln("testing CollationElementIterator ends...");
818 }
819 
820 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules
821 void
TestOperators()822 CollationAPITest::TestOperators(/* char* par */)
823 {
824     UErrorCode success = U_ZERO_ERROR;
825     UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
826     UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
827     RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success);
828     if (U_FAILURE(success)) {
829         errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success));
830         return;
831     }
832     success = U_ZERO_ERROR;
833     RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success);
834     if (U_FAILURE(success)) {
835         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set.");
836         return;
837     }
838     logln("The operator tests begin : ");
839     logln("testing operator==, operator!=, clone  methods ...");
840     doAssert((*col1 != *col2), "The two different table collations compared equal");
841     *col1 = *col2;
842     doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)");
843 
844     success = U_ZERO_ERROR;
845     Collator *col3 = Collator::createInstance(Locale::getEnglish(), success);
846     if (U_FAILURE(success)) {
847         errln("Default collation creation failed.");
848         return;
849     }
850     doAssert((*col1 != *col3), "The two different table collations compared equal");
851     Collator* col4 = col1->clone();
852     Collator* col5 = col3->clone();
853     doAssert((*col1 == *col4), "Cloned collation objects not equal");
854     doAssert((*col3 != *col4), "Two different table collations compared equal");
855     doAssert((*col3 == *col5), "Cloned collation objects not equal");
856     doAssert((*col4 != *col5), "Two cloned collations compared equal");
857 
858     const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules();
859     RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success);
860     if (U_FAILURE(success)) {
861         errln("Creating default collation with rules failed.");
862         return;
863     }
864     doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed");
865 
866     success = U_ZERO_ERROR;
867     RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success);
868     if (U_FAILURE(success)) {
869         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength.");
870         return;
871     }
872     success = U_ZERO_ERROR;
873     RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success);
874     if (U_FAILURE(success)) {
875         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP.");
876         return;
877     }
878     success = U_ZERO_ERROR;
879     RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success);
880     if (U_FAILURE(success)) {
881         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP.");
882         return;
883     }
884   //  doAssert((*col7 == *col8), "The two equal table collations compared different");
885     doAssert((*col7 != *col9), "The two different table collations compared equal");
886     doAssert((*col8 != *col9), "The two different table collations compared equal");
887 
888     logln("operator tests ended.");
889     delete col1;
890     delete col2;
891     delete col3;
892     delete col4;
893     delete col5;
894     delete col6;
895     delete col7;
896     delete col8;
897     delete col9;
898 }
899 
900 // test clone and copy
901 void
TestDuplicate()902 CollationAPITest::TestDuplicate(/* char* par */)
903 {
904     UErrorCode status = U_ZERO_ERROR;
905     Collator *col1 = Collator::createInstance(Locale::getEnglish(), status);
906     if (U_FAILURE(status)) {
907         logln("Default collator creation failed.");
908         return;
909     }
910     Collator *col2 = col1->clone();
911     doAssert((*col1 == *col2), "Cloned object is not equal to the orginal");
912     UnicodeString ruleset("&9 < a, A < b, B < c, C < d, D, e, E");
913     RuleBasedCollator *col3 = new RuleBasedCollator(ruleset, status);
914     if (U_FAILURE(status)) {
915         logln("Collation tailoring failed.");
916         return;
917     }
918     doAssert((*col1 != *col3), "Cloned object is equal to some dummy");
919     *col3 = *((RuleBasedCollator*)col1);
920     doAssert((*col1 == *col3), "Copied object is not equal to the orginal");
921 
922     UCollationResult res;
923     UnicodeString first((UChar)0x0061);
924     UnicodeString second((UChar)0x0062);
925     UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules());
926 
927     delete col1;
928 
929     // Try using the cloned collators after deleting the original data
930     res = col2->compare(first, second, status);
931     if(res != UCOL_LESS) {
932         errln("a should be less then b after tailoring");
933     }
934     if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) {
935         errln(UnicodeString("English rule difference. ")
936             + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules());
937     }
938     res = col3->compare(first, second, status);
939     if(res != UCOL_LESS) {
940         errln("a should be less then b after tailoring");
941     }
942     if (col3->getRules() != copiedEnglishRules) {
943         errln(UnicodeString("English rule difference. ")
944             + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules());
945     }
946 
947     delete col2;
948     delete col3;
949 }
950 
951 void
TestCompare()952 CollationAPITest::TestCompare(/* char* par */)
953 {
954     logln("The compare tests begin : ");
955     Collator *col = 0;
956     UErrorCode success = U_ZERO_ERROR;
957     col = Collator::createInstance(Locale::getEnglish(), success);
958     if (U_FAILURE(success)) {
959         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
960         return;
961     }
962     UnicodeString test1("Abcda"), test2("abcda");
963     logln("Use tertiary comparison level testing ....");
964 
965     doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\"");
966     doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
967     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
968 
969     col->setStrength(Collator::SECONDARY);
970     logln("Use secondary comparison level testing ....");
971 
972     doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
973     doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
974     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
975 
976     col->setStrength(Collator::PRIMARY);
977     logln("Use primary comparison level testing ....");
978 
979     doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
980     doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
981     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
982 
983     // Test different APIs
984     const UChar* t1 = test1.getBuffer();
985     int32_t t1Len = test1.length();
986     const UChar* t2 = test2.getBuffer();
987     int32_t t2Len = test2.length();
988 
989     doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem");
990     doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem");
991     doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem");
992     doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem");
993     doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem");
994     doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem");
995 
996     col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success);
997     doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem");
998     doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem");
999     doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem");
1000     doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem");
1001     doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem");
1002     doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem");
1003 
1004 
1005 
1006     logln("The compare tests end.");
1007     delete col;
1008 }
1009 
1010 void
TestGetAll()1011 CollationAPITest::TestGetAll(/* char* par */)
1012 {
1013     if (logKnownIssue("10774","Side effects from utility/LocaleTest/TestGetLocale")) {
1014         return;
1015     }
1016     int32_t count1, count2;
1017     UErrorCode status = U_ZERO_ERROR;
1018 
1019     logln("Trying Collator::getAvailableLocales(int&)");
1020 
1021     const Locale* list = Collator::getAvailableLocales(count1);
1022     for (int32_t i = 0; i < count1; ++i) {
1023         UnicodeString dispName;
1024         logln(UnicodeString("Locale name: ")
1025             + UnicodeString(list[i].getName())
1026             + UnicodeString(" , the display name is : ")
1027             + UnicodeString(list[i].getDisplayName(dispName)));
1028     }
1029 
1030     if (count1 == 0 || list == NULL) {
1031         dataerrln("getAvailableLocales(int&) returned an empty list");
1032     }
1033 
1034     logln("Trying Collator::getAvailableLocales()");
1035     StringEnumeration* localeEnum = Collator::getAvailableLocales();
1036     const UnicodeString* locStr;
1037     const char *locCStr;
1038     count2 = 0;
1039 
1040     if (localeEnum == NULL) {
1041         dataerrln("getAvailableLocales() returned NULL");
1042         return;
1043     }
1044 
1045     while ((locStr = localeEnum->snext(status)) != NULL)
1046     {
1047         logln(UnicodeString("Locale name is: ") + *locStr);
1048         count2++;
1049     }
1050     if (count1 != count2) {
1051         errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2);
1052     }
1053 
1054     logln("Trying Collator::getAvailableLocales() clone");
1055     count1 = 0;
1056     StringEnumeration* localeEnum2 = localeEnum->clone();
1057     localeEnum2->reset(status);
1058     while ((locCStr = localeEnum2->next(NULL, status)) != NULL)
1059     {
1060         logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr));
1061         count1++;
1062     }
1063     if (count1 != count2) {
1064         errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2);
1065     }
1066     if (localeEnum->count(status) != count1) {
1067         errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1);
1068     }
1069     delete localeEnum;
1070     delete localeEnum2;
1071 }
1072 
TestSortKey()1073 void CollationAPITest::TestSortKey()
1074 {
1075     UErrorCode status = U_ZERO_ERROR;
1076     /*
1077     this is supposed to open default date format, but later on it treats
1078     it like it is "en_US"
1079     - very bad if you try to run the tests on machine where default
1080       locale is NOT "en_US"
1081     */
1082     Collator *col = Collator::createInstance(Locale::getEnglish(), status);
1083     if (U_FAILURE(status)) {
1084         errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status));
1085         return;
1086     }
1087 
1088     if (col->getStrength() != Collator::TERTIARY)
1089     {
1090         errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1091     }
1092 
1093     /* Need to use identical strength */
1094     col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status);
1095 
1096     UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
1097           test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
1098           test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
1099 
1100     uint8_t sortkey1[64];
1101     uint8_t sortkey2[64];
1102     uint8_t sortkey3[64];
1103 
1104     logln("Use tertiary comparison level testing ....\n");
1105 
1106     CollationKey key1;
1107     col->getCollationKey(test1, u_strlen(test1), key1, status);
1108 
1109     CollationKey key2;
1110     col->getCollationKey(test2, u_strlen(test2), key2, status);
1111 
1112     CollationKey key3;
1113     col->getCollationKey(test3, u_strlen(test3), key3, status);
1114 
1115     doAssert(key1.compareTo(key2) == Collator::GREATER,
1116         "Result should be \"Abcda\" > \"abcda\"");
1117     doAssert(key2.compareTo(key1) == Collator::LESS,
1118         "Result should be \"abcda\" < \"Abcda\"");
1119     doAssert(key2.compareTo(key3) == Collator::EQUAL,
1120         "Result should be \"abcda\" ==  \"abcda\"");
1121 
1122     // Clone the key2 sortkey for later.
1123     int32_t keylength = 0;
1124     const uint8_t *key2primary_alias = key2.getByteArray(keylength);
1125     LocalArray<uint8_t> key2primary(new uint8_t[keylength]);
1126     memcpy(key2primary.getAlias(), key2primary_alias, keylength);
1127 
1128     col->getSortKey(test1, sortkey1, 64);
1129     col->getSortKey(test2, sortkey2, 64);
1130     col->getSortKey(test3, sortkey3, 64);
1131 
1132     const uint8_t *tempkey = key1.getByteArray(keylength);
1133     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1134         "Test1 string should have the same collation key and sort key");
1135     tempkey = key2.getByteArray(keylength);
1136     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1137         "Test2 string should have the same collation key and sort key");
1138     tempkey = key3.getByteArray(keylength);
1139     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1140         "Test3 string should have the same collation key and sort key");
1141 
1142     col->getSortKey(test1, 5, sortkey1, 64);
1143     col->getSortKey(test2, 5, sortkey2, 64);
1144     col->getSortKey(test3, 5, sortkey3, 64);
1145 
1146     tempkey = key1.getByteArray(keylength);
1147     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1148         "Test1 string should have the same collation key and sort key");
1149     tempkey = key2.getByteArray(keylength);
1150     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1151         "Test2 string should have the same collation key and sort key");
1152     tempkey = key3.getByteArray(keylength);
1153     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1154         "Test3 string should have the same collation key and sort key");
1155 
1156     UnicodeString strtest1(test1);
1157     col->getSortKey(strtest1, sortkey1, 64);
1158     UnicodeString strtest2(test2);
1159     col->getSortKey(strtest2, sortkey2, 64);
1160     UnicodeString strtest3(test3);
1161     col->getSortKey(strtest3, sortkey3, 64);
1162 
1163     tempkey = key1.getByteArray(keylength);
1164     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1165         "Test1 string should have the same collation key and sort key");
1166     tempkey = key2.getByteArray(keylength);
1167     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1168         "Test2 string should have the same collation key and sort key");
1169     tempkey = key3.getByteArray(keylength);
1170     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1171         "Test3 string should have the same collation key and sort key");
1172 
1173     logln("Use secondary comparision level testing ...\n");
1174     col->setStrength(Collator::SECONDARY);
1175 
1176     col->getCollationKey(test1, u_strlen(test1), key1, status);
1177     col->getCollationKey(test2, u_strlen(test2), key2, status);
1178     col->getCollationKey(test3, u_strlen(test3), key3, status);
1179 
1180     doAssert(key1.compareTo(key2) == Collator::EQUAL,
1181         "Result should be \"Abcda\" == \"abcda\"");
1182     doAssert(key2.compareTo(key3) == Collator::EQUAL,
1183         "Result should be \"abcda\" ==  \"abcda\"");
1184 
1185     tempkey = key2.getByteArray(keylength);
1186     doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0,
1187              "Binary format for 'abcda' sortkey different for secondary strength!");
1188 
1189     col->getSortKey(test1, sortkey1, 64);
1190     col->getSortKey(test2, sortkey2, 64);
1191     col->getSortKey(test3, sortkey3, 64);
1192 
1193     tempkey = key1.getByteArray(keylength);
1194     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1195         "Test1 string should have the same collation key and sort key");
1196     tempkey = key2.getByteArray(keylength);
1197     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1198         "Test2 string should have the same collation key and sort key");
1199     tempkey = key3.getByteArray(keylength);
1200     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1201         "Test3 string should have the same collation key and sort key");
1202 
1203     col->getSortKey(test1, 5, sortkey1, 64);
1204     col->getSortKey(test2, 5, sortkey2, 64);
1205     col->getSortKey(test3, 5, sortkey3, 64);
1206 
1207     tempkey = key1.getByteArray(keylength);
1208     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1209         "Test1 string should have the same collation key and sort key");
1210     tempkey = key2.getByteArray(keylength);
1211     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1212         "Test2 string should have the same collation key and sort key");
1213     tempkey = key3.getByteArray(keylength);
1214     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1215         "Test3 string should have the same collation key and sort key");
1216 
1217     col->getSortKey(strtest1, sortkey1, 64);
1218     col->getSortKey(strtest2, sortkey2, 64);
1219     col->getSortKey(strtest3, sortkey3, 64);
1220 
1221     tempkey = key1.getByteArray(keylength);
1222     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1223         "Test1 string should have the same collation key and sort key");
1224     tempkey = key2.getByteArray(keylength);
1225     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1226         "Test2 string should have the same collation key and sort key");
1227     tempkey = key3.getByteArray(keylength);
1228     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1229         "Test3 string should have the same collation key and sort key");
1230 
1231     logln("testing sortkey ends...");
1232     delete col;
1233 }
1234 
TestSortKeyOverflow()1235 void CollationAPITest::TestSortKeyOverflow() {
1236     IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()");
1237     LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
1238     if (errorCode.logDataIfFailureAndReset("Collator::createInstance(English) failed")) {
1239         return;
1240     }
1241     col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
1242     UChar i_and_phi[] = { 0x438, 0x3c6 };  // Cyrillic small i & Greek small phi.
1243     // The sort key should be 6 bytes:
1244     // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator,
1245     // 2 bytes for the Greek phi, and 1 byte for the NUL terminator.
1246     uint8_t sortKey[12];
1247     int32_t length = col->getSortKey(i_and_phi, 2, sortKey, LENGTHOF(sortKey));
1248     uint8_t sortKey2[12];
1249     for (int32_t capacity = 0; capacity < length; ++capacity) {
1250         uprv_memset(sortKey2, 2, LENGTHOF(sortKey2));
1251         int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity);
1252         if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) {
1253             errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity);
1254         } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) {
1255             errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity);
1256         }
1257     }
1258 
1259     // Now try to break getCollationKey().
1260     // Internally, it always starts with a large stack buffer.
1261     // Since we cannot control the initial capacity, we throw an increasing number
1262     // of characters at it, with the problematic part at the end.
1263     const int32_t longCapacity = 2000;
1264     // Each 'a' in the prefix should result in one primary sort key byte.
1265     // For i_and_phi we expect 6 bytes, then the NUL terminator.
1266     const int32_t maxPrefixLength = longCapacity - 6 - 1;
1267     LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]);
1268     UnicodeString s(FALSE, i_and_phi, 2);
1269     for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) {
1270         length = col->getSortKey(s, longSortKey.getAlias(), longCapacity);
1271         CollationKey collKey;
1272         col->getCollationKey(s, collKey, errorCode);
1273         int32_t collKeyLength;
1274         const uint8_t *collSortKey = collKey.getByteArray(collKeyLength);
1275         if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) {
1276             errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength);
1277         }
1278 
1279         // Insert an 'a' to match ++prefixLength.
1280         s.insert(prefixLength, (UChar)0x61);
1281     }
1282 }
1283 
TestMaxExpansion()1284 void CollationAPITest::TestMaxExpansion()
1285 {
1286     UErrorCode          status = U_ZERO_ERROR;
1287     UChar               ch     = 0;
1288     UChar32             unassigned = 0xEFFFD;
1289     uint32_t            sorder = 0;
1290     uint32_t            temporder = 0;
1291 
1292     UnicodeString rule("&a < ab < c/aba < d < z < ch");
1293     RuleBasedCollator coll(rule, status);
1294     if(U_FAILURE(status)) {
1295       errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1296       return;
1297     }
1298     UnicodeString str(ch);
1299     CollationElementIterator *iter =
1300                                   coll.createCollationElementIterator(str);
1301 
1302     while (ch < 0xFFFF && U_SUCCESS(status)) {
1303         int      count = 1;
1304         uint32_t order;
1305         int32_t  size = 0;
1306 
1307         ch ++;
1308 
1309         str.setCharAt(0, ch);
1310         iter->setText(str, status);
1311         order = iter->previous(status);
1312 
1313         /* thai management */
1314         if (order == 0)
1315             order = iter->previous(status);
1316 
1317         while (U_SUCCESS(status) && iter->previous(status) != CollationElementIterator::NULLORDER) {
1318             count ++;
1319         }
1320 
1321         size = coll.getMaxExpansion(order);
1322         if (U_FAILURE(status) || size < count) {
1323             errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1324                   ch, size, count);
1325         }
1326     }
1327 
1328     /* testing for exact max expansion */
1329     int32_t size;
1330     ch = 0;
1331     while (ch < 0x61) {
1332         uint32_t order;
1333         str.setCharAt(0, ch);
1334         iter->setText(str, status);
1335         order = iter->previous(status);
1336         size  = coll.getMaxExpansion(order);
1337         if (U_FAILURE(status) || size != 1) {
1338             errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1339                   ch, size, 1);
1340         }
1341         ch ++;
1342     }
1343 
1344     ch = 0x63;
1345     str.setTo(ch);
1346     iter->setText(str, status);
1347     temporder = iter->previous(status);
1348     size = coll.getMaxExpansion(temporder);
1349     if (U_FAILURE(status) || size != 3) {
1350         errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1351               ch, temporder, size, 3);
1352     }
1353 
1354     ch = 0x64;
1355     str.setTo(ch);
1356     iter->setText(str, status);
1357     temporder = iter->previous(status);
1358     size = coll.getMaxExpansion(temporder);
1359     if (U_FAILURE(status) || size != 1) {
1360         errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1361               ch, temporder, size, 1);
1362     }
1363 
1364     str.setTo(unassigned);
1365     iter->setText(str, status);
1366     sorder = iter->previous(status);
1367     size = coll.getMaxExpansion(sorder);
1368     if (U_FAILURE(status) || size != 2) {
1369         errln("Failure at supplementary codepoints, maximum expansion count %d < %d",
1370               size, 2);
1371     }
1372 
1373     /* testing jamo */
1374     ch = 0x1165;
1375     str.setTo(ch);
1376     iter->setText(str, status);
1377     temporder = iter->previous(status);
1378     size = coll.getMaxExpansion(temporder);
1379     if (U_FAILURE(status) || size > 3) {
1380         errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1381               ch, size, 3);
1382     }
1383 
1384     delete iter;
1385 
1386     /* testing special jamo &a<\u1160 */
1387     rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071");
1388 
1389     RuleBasedCollator jamocoll(rule, status);
1390     iter = jamocoll.createCollationElementIterator(str);
1391     temporder = iter->previous(status);
1392     size = iter->getMaxExpansion(temporder);
1393     if (U_FAILURE(status) || size != 6) {
1394         errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1395               ch, size, 5);
1396     }
1397 
1398     delete iter;
1399 }
1400 
TestDisplayName()1401 void CollationAPITest::TestDisplayName()
1402 {
1403     UErrorCode error = U_ZERO_ERROR;
1404     Collator *coll = Collator::createInstance("en_US", error);
1405     if (U_FAILURE(error)) {
1406         errcheckln(error, "Failure creating english collator - %s", u_errorName(error));
1407         return;
1408     }
1409     UnicodeString name;
1410     UnicodeString result;
1411     coll->getDisplayName(Locale::getCanadaFrench(), result);
1412     Locale::getCanadaFrench().getDisplayName(name);
1413     if (result.compare(name)) {
1414         errln("Failure getting the correct name for locale en_US");
1415     }
1416 
1417     coll->getDisplayName(Locale::getSimplifiedChinese(), result);
1418     Locale::getSimplifiedChinese().getDisplayName(name);
1419     if (result.compare(name)) {
1420         errln("Failure getting the correct name for locale zh_SG");
1421     }
1422     delete coll;
1423 }
1424 
TestAttribute()1425 void CollationAPITest::TestAttribute()
1426 {
1427     UErrorCode error = U_ZERO_ERROR;
1428     Collator *coll = Collator::createInstance(error);
1429 
1430     if (U_FAILURE(error)) {
1431         errcheckln(error, "Creation of default collator failed - %s", u_errorName(error));
1432         return;
1433     }
1434 
1435     coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error);
1436     if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF ||
1437         U_FAILURE(error)) {
1438         errln("Setting and retrieving of the french collation failed");
1439     }
1440 
1441     coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error);
1442     if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON ||
1443         U_FAILURE(error)) {
1444         errln("Setting and retrieving of the french collation failed");
1445     }
1446 
1447     coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error);
1448     if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED ||
1449         U_FAILURE(error)) {
1450         errln("Setting and retrieving of the alternate handling failed");
1451     }
1452 
1453     coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error);
1454     if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE ||
1455         U_FAILURE(error)) {
1456         errln("Setting and retrieving of the alternate handling failed");
1457     }
1458 
1459     coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error);
1460     if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST ||
1461         U_FAILURE(error)) {
1462         errln("Setting and retrieving of the case first attribute failed");
1463     }
1464 
1465     coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error);
1466     if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST ||
1467         U_FAILURE(error)) {
1468         errln("Setting and retrieving of the case first attribute failed");
1469     }
1470 
1471     coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error);
1472     if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON ||
1473         U_FAILURE(error)) {
1474         errln("Setting and retrieving of the case level attribute failed");
1475     }
1476 
1477     coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error);
1478     if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF ||
1479         U_FAILURE(error)) {
1480         errln("Setting and retrieving of the case level attribute failed");
1481     }
1482 
1483     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error);
1484     if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON ||
1485         U_FAILURE(error)) {
1486         errln("Setting and retrieving of the normalization on/off attribute failed");
1487     }
1488 
1489     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error);
1490     if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF ||
1491         U_FAILURE(error)) {
1492         errln("Setting and retrieving of the normalization on/off attribute failed");
1493     }
1494 
1495     coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error);
1496     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY ||
1497         U_FAILURE(error)) {
1498         errln("Setting and retrieving of the collation strength failed");
1499     }
1500 
1501     coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error);
1502     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY ||
1503         U_FAILURE(error)) {
1504         errln("Setting and retrieving of the collation strength failed");
1505     }
1506 
1507     coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error);
1508     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY ||
1509         U_FAILURE(error)) {
1510         errln("Setting and retrieving of the collation strength failed");
1511     }
1512 
1513     coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error);
1514     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY ||
1515         U_FAILURE(error)) {
1516         errln("Setting and retrieving of the collation strength failed");
1517     }
1518 
1519     coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error);
1520     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL ||
1521         U_FAILURE(error)) {
1522         errln("Setting and retrieving of the collation strength failed");
1523     }
1524 
1525     delete coll;
1526 }
1527 
TestVariableTopSetting()1528 void CollationAPITest::TestVariableTopSetting() {
1529   UErrorCode status = U_ZERO_ERROR;
1530 
1531   UChar vt[256] = { 0 };
1532 
1533   // Use the root collator, not the default collator.
1534   // This test fails with en_US_POSIX which tailors the dollar sign after 'A'.
1535   Collator *coll = Collator::createInstance(Locale::getRoot(), status);
1536   if(U_FAILURE(status)) {
1537     delete coll;
1538     errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1539     return;
1540   }
1541 
1542   uint32_t oldVarTop = coll->getVariableTop(status);
1543 
1544   // ICU 53+: The character must be in a supported reordering group,
1545   // and the variable top is pinned to the end of that group.
1546   vt[0] = 0x0041;
1547 
1548   (void)coll->setVariableTop(vt, 1, status);
1549   if(status != U_ILLEGAL_ARGUMENT_ERROR) {
1550     errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status));
1551   }
1552 
1553   status = U_ZERO_ERROR;
1554   vt[0] = 0x24;  // dollar sign (currency symbol)
1555   uint32_t newVarTop = coll->setVariableTop(vt, 1, status);
1556 
1557   if(newVarTop != coll->getVariableTop(status)) {
1558     errln("setVariableTop(dollar sign) != following getVariableTop()");
1559   }
1560 
1561   UnicodeString dollar((UChar)0x24);
1562   UnicodeString euro((UChar)0x20AC);
1563   uint32_t newVarTop2 = coll->setVariableTop(euro, status);
1564   assertEquals("setVariableTop(Euro sign) == following getVariableTop()",
1565                (int64_t)newVarTop2, (int64_t)coll->getVariableTop(status));
1566   assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)",
1567                (int64_t)newVarTop2, (int64_t)newVarTop);
1568 
1569   coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1570   assertEquals("empty==dollar", UCOL_EQUAL, coll->compare(UnicodeString(), dollar));
1571   assertEquals("empty==euro", UCOL_EQUAL, coll->compare(UnicodeString(), euro));
1572   assertEquals("dollar<zero", UCOL_LESS, coll->compare(dollar, UnicodeString((UChar)0x30)));
1573 
1574   coll->setVariableTop(oldVarTop, status);
1575 
1576   uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status);
1577 
1578   if(newVarTop != newerVarTop) {
1579     errln("Didn't set vartop properly from UnicodeString!\n");
1580   }
1581 
1582   delete coll;
1583 
1584 }
1585 
TestMaxVariable()1586 void CollationAPITest::TestMaxVariable() {
1587   UErrorCode errorCode = U_ZERO_ERROR;
1588   LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), errorCode));
1589   if(U_FAILURE(errorCode)) {
1590     errcheckln(errorCode, "Collator creation failed with error %s", u_errorName(errorCode));
1591     return;
1592   }
1593 
1594   (void)coll->setMaxVariable(UCOL_REORDER_CODE_OTHERS, errorCode);
1595   if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
1596     errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode));
1597   }
1598 
1599   errorCode = U_ZERO_ERROR;
1600   (void)coll->setMaxVariable(UCOL_REORDER_CODE_CURRENCY, errorCode);
1601 
1602   if(UCOL_REORDER_CODE_CURRENCY != coll->getMaxVariable()) {
1603     errln("setMaxVariable(currency) != following getMaxVariable()");
1604   }
1605 
1606   coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, errorCode);
1607   assertEquals("empty==dollar", UCOL_EQUAL, coll->compare(UnicodeString(), UnicodeString((UChar)0x24)));
1608   assertEquals("empty==euro", UCOL_EQUAL, coll->compare(UnicodeString(), UnicodeString((UChar)0x20AC)));
1609   assertEquals("dollar<zero", UCOL_LESS, coll->compare(UnicodeString((UChar)0x24), UnicodeString((UChar)0x30)));
1610 }
1611 
TestGetLocale()1612 void CollationAPITest::TestGetLocale() {
1613   UErrorCode status = U_ZERO_ERROR;
1614   const char *rules = "&a<x<y<z";
1615   UChar rlz[256] = {0};
1616 
1617   Collator *coll = Collator::createInstance("root", status);
1618   if(U_FAILURE(status)) {
1619     dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status));
1620     return;
1621   }
1622   Locale locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1623   if(locale != Locale::getRoot()) {
1624     errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); "
1625           "getLocale().getName() = \"%s\"",
1626           locale.getName());
1627   }
1628   delete coll;
1629 
1630   coll = Collator::createInstance("", status);
1631   if(U_FAILURE(status)) {
1632     dataerrln("Failed to open collator for \"\" with %s", u_errorName(status));
1633     return;
1634   }
1635   locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1636   if(locale != Locale::getRoot()) {
1637     errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); "
1638           "getLocale().getName() = \"%s\"",
1639           locale.getName());
1640   }
1641   delete coll;
1642 
1643   int32_t i = 0;
1644 
1645   static const struct {
1646     const char* requestedLocale;
1647     const char* validLocale;
1648     const char* actualLocale;
1649   } testStruct[] = {
1650     // Note: Locale::getRoot().getName() == "" not "root".
1651     { "de_DE", "de", "" },
1652     { "sr_RS", "sr_Cyrl_RS", "sr" },
1653     { "en_US_CALIFORNIA", "en_US", "" },
1654     { "fr_FR_NONEXISTANT", "fr", "" },
1655     // pinyin is the default, therefore suppressed.
1656     { "zh_CN", "zh_Hans_CN", "zh" },
1657     // zh_Hant has default=stroke but the data is in zh.
1658     { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" },
1659     { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" },
1660     { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" }
1661   };
1662 
1663   u_unescape(rules, rlz, 256);
1664 
1665   /* test opening collators for different locales */
1666   for(i = 0; i<(int32_t)LENGTHOF(testStruct); i++) {
1667     status = U_ZERO_ERROR;
1668     coll = Collator::createInstance(testStruct[i].requestedLocale, status);
1669     if(U_FAILURE(status)) {
1670       errln("Failed to open collator for %s with %s", testStruct[i].requestedLocale, u_errorName(status));
1671       delete coll;
1672       continue;
1673     }
1674     // The requested locale may be the same as the valid locale,
1675     // or may not be supported at all. See ticket #10477.
1676     locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1677     if(locale != testStruct[i].requestedLocale && locale != testStruct[i].validLocale) {
1678       errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s",
1679             testStruct[i].requestedLocale,
1680             testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1681     }
1682     locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1683     if(locale != testStruct[i].validLocale) {
1684       errln("[Coll %s]: Error in valid locale, expected %s, got %s",
1685             testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1686     }
1687     locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1688     if(locale != testStruct[i].actualLocale) {
1689       errln("[Coll %s]: Error in actual locale, expected %s, got %s",
1690             testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName());
1691     }
1692     // If we open a collator for the actual locale, we should get an equivalent one again.
1693     LocalPointer<Collator> coll2(Collator::createInstance(locale, status));
1694     if(U_FAILURE(status)) {
1695       errln("Failed to open collator for actual locale \"%s\" with %s",
1696             locale.getName(), u_errorName(status));
1697     } else {
1698       Locale actual2 = coll2->getLocale(ULOC_ACTUAL_LOCALE, status);
1699       if(actual2 != locale) {
1700         errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"",
1701               locale.getName(), actual2.getName());
1702       }
1703       if(*coll2 != *coll) {
1704         errln("[Coll actual \"%s\"]: Got different collator than before", locale.getName());
1705       }
1706     }
1707     delete coll;
1708   }
1709 
1710   /* completely non-existant locale for collator should get a default collator */
1711   {
1712     Collator *defaultColl = Collator::createInstance((const Locale)NULL, status);
1713     coll = Collator::createInstance("blahaha", status);
1714     if(U_FAILURE(status)) {
1715       errln("Failed to open collator with %s", u_errorName(status));
1716       delete coll;
1717       delete defaultColl;
1718       return;
1719     }
1720     if(coll->getLocale(ULOC_VALID_LOCALE, status) !=
1721       defaultColl->getLocale(ULOC_VALID_LOCALE, status)) {
1722       errln("Valid locale for nonexisting locale locale collator differs "
1723         "from valid locale for default collator");
1724     }
1725     if(coll->getLocale(ULOC_ACTUAL_LOCALE, status) !=
1726       defaultColl->getLocale(ULOC_ACTUAL_LOCALE, status)) {
1727       errln("Actual locale for nonexisting locale locale collator differs "
1728         "from actual locale for default collator");
1729     }
1730     delete coll;
1731     delete defaultColl;
1732   }
1733 
1734 
1735 
1736   /* collator instantiated from rules should have all three locales NULL */
1737   coll = new RuleBasedCollator(rlz, status);
1738   locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1739   if(!locale.isBogus()) {
1740     errln("For collator instantiated from rules, requested locale %s is not bogus", locale.getName());
1741   }
1742   locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1743   if(!locale.isBogus()) {
1744     errln("For collator instantiated from rules, valid locale %s is not bogus", locale.getName());
1745   }
1746   locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1747   if(!locale.isBogus()) {
1748     errln("For collator instantiated from rules, actual locale %s is not bogus", locale.getName());
1749   }
1750   delete coll;
1751 }
1752 
1753 struct teststruct {
1754     const char *original;
1755     uint8_t key[256];
1756 };
1757 
1758 
1759 
1760 U_CDECL_BEGIN
1761 static int U_CALLCONV
compare_teststruct(const void * string1,const void * string2)1762 compare_teststruct(const void *string1, const void *string2) {
1763   return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key));
1764 }
1765 U_CDECL_END
1766 
1767 
TestBounds(void)1768 void CollationAPITest::TestBounds(void) {
1769     UErrorCode status = U_ZERO_ERROR;
1770 
1771     Collator *coll = Collator::createInstance(Locale("sh"), status);
1772     if(U_FAILURE(status)) {
1773       delete coll;
1774       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1775       return;
1776     }
1777 
1778     uint8_t sortkey[512], lower[512], upper[512];
1779     UChar buffer[512];
1780 
1781     static const char * const test[] = {
1782         "John Smith",
1783         "JOHN SMITH",
1784         "john SMITH",
1785         "j\\u00F6hn sm\\u00EFth",
1786         "J\\u00F6hn Sm\\u00EFth",
1787         "J\\u00D6HN SM\\u00CFTH",
1788         "john smithsonian",
1789         "John Smithsonian"
1790     };
1791 
1792     struct teststruct tests[] = {
1793         {"\\u010CAKI MIHALJ", {0}},
1794         {"\\u010CAKI MIHALJ", {0}},
1795         {"\\u010CAKI PIRO\\u0160KA", {0}},
1796         {"\\u010CABAI ANDRIJA", {0}},
1797         {"\\u010CABAI LAJO\\u0160", {0}},
1798         {"\\u010CABAI MARIJA", {0}},
1799         {"\\u010CABAI STEVAN", {0}},
1800         {"\\u010CABAI STEVAN", {0}},
1801         {"\\u010CABARKAPA BRANKO", {0}},
1802         {"\\u010CABARKAPA MILENKO", {0}},
1803         {"\\u010CABARKAPA MIROSLAV", {0}},
1804         {"\\u010CABARKAPA SIMO", {0}},
1805         {"\\u010CABARKAPA STANKO", {0}},
1806         {"\\u010CABARKAPA TAMARA", {0}},
1807         {"\\u010CABARKAPA TOMA\\u0160", {0}},
1808         {"\\u010CABDARI\\u0106 NIKOLA", {0}},
1809         {"\\u010CABDARI\\u0106 ZORICA", {0}},
1810         {"\\u010CABI NANDOR", {0}},
1811         {"\\u010CABOVI\\u0106 MILAN", {0}},
1812         {"\\u010CABRADI AGNEZIJA", {0}},
1813         {"\\u010CABRADI IVAN", {0}},
1814         {"\\u010CABRADI JELENA", {0}},
1815         {"\\u010CABRADI LJUBICA", {0}},
1816         {"\\u010CABRADI STEVAN", {0}},
1817         {"\\u010CABRDA MARTIN", {0}},
1818         {"\\u010CABRILO BOGDAN", {0}},
1819         {"\\u010CABRILO BRANISLAV", {0}},
1820         {"\\u010CABRILO LAZAR", {0}},
1821         {"\\u010CABRILO LJUBICA", {0}},
1822         {"\\u010CABRILO SPASOJA", {0}},
1823         {"\\u010CADE\\u0160 ZDENKA", {0}},
1824         {"\\u010CADESKI BLAGOJE", {0}},
1825         {"\\u010CADOVSKI VLADIMIR", {0}},
1826         {"\\u010CAGLJEVI\\u0106 TOMA", {0}},
1827         {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}},
1828         {"\\u010CAJA VANKA", {0}},
1829         {"\\u010CAJI\\u0106 BOGOLJUB", {0}},
1830         {"\\u010CAJI\\u0106 BORISLAV", {0}},
1831         {"\\u010CAJI\\u0106 RADOSLAV", {0}},
1832         {"\\u010CAK\\u0160IRAN MILADIN", {0}},
1833         {"\\u010CAKAN EUGEN", {0}},
1834         {"\\u010CAKAN EVGENIJE", {0}},
1835         {"\\u010CAKAN IVAN", {0}},
1836         {"\\u010CAKAN JULIJAN", {0}},
1837         {"\\u010CAKAN MIHAJLO", {0}},
1838         {"\\u010CAKAN STEVAN", {0}},
1839         {"\\u010CAKAN VLADIMIR", {0}},
1840         {"\\u010CAKAN VLADIMIR", {0}},
1841         {"\\u010CAKAN VLADIMIR", {0}},
1842         {"\\u010CAKARA ANA", {0}},
1843         {"\\u010CAKAREVI\\u0106 MOMIR", {0}},
1844         {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}},
1845         {"\\u010CAKI \\u0160ANDOR", {0}},
1846         {"\\u010CAKI AMALIJA", {0}},
1847         {"\\u010CAKI ANDRA\\u0160", {0}},
1848         {"\\u010CAKI LADISLAV", {0}},
1849         {"\\u010CAKI LAJO\\u0160", {0}},
1850         {"\\u010CAKI LASLO", {0}}
1851     };
1852 
1853 
1854 
1855     int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0;
1856     int32_t arraySize = sizeof(tests)/sizeof(tests[0]);
1857 
1858     (void)lowerSize;  // Suppress unused variable warnings.
1859     (void)upperSize;
1860 
1861     for(i = 0; i<arraySize; i++) {
1862         buffSize = u_unescape(tests[i].original, buffer, 512);
1863         skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512);
1864     }
1865 
1866     qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct);
1867 
1868     for(i = 0; i < arraySize-1; i++) {
1869         for(j = i+1; j < arraySize; j++) {
1870             lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status);
1871             upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status);
1872             for(k = i; k <= j; k++) {
1873                 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) {
1874                     errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original);
1875                 }
1876                 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) {
1877                     errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original);
1878                 }
1879             }
1880         }
1881     }
1882 
1883 
1884     for(i = 0; i<(int32_t)(sizeof(test)/sizeof(test[0])); i++) {
1885         buffSize = u_unescape(test[i], buffer, 512);
1886         skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1887         lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1888         upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1889         for(j = i+1; j<(int32_t)(sizeof(test)/sizeof(test[0])); j++) {
1890             buffSize = u_unescape(test[j], buffer, 512);
1891             skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1892             if(strcmp((const char *)lower, (const char *)sortkey) > 0) {
1893                 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1894             }
1895             if(strcmp((const char *)upper, (const char *)sortkey) <= 0) {
1896                 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1897             }
1898         }
1899     }
1900     delete coll;
1901 }
1902 
1903 
TestGetTailoredSet()1904 void CollationAPITest::TestGetTailoredSet()
1905 {
1906   struct {
1907     const char *rules;
1908     const char *tests[20];
1909     int32_t testsize;
1910   } setTest[] = {
1911     { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1912     { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1913   };
1914 
1915   int32_t i = 0, j = 0;
1916   UErrorCode status = U_ZERO_ERROR;
1917 
1918   UnicodeString buff;
1919   UnicodeSet *set = NULL;
1920 
1921   for(i = 0; i < LENGTHOF(setTest); i++) {
1922     buff = UnicodeString(setTest[i].rules, -1, US_INV).unescape();
1923     RuleBasedCollator coll(buff, status);
1924     if(U_SUCCESS(status)) {
1925       set = coll.getTailoredSet(status);
1926       if(set->size() < setTest[i].testsize) {
1927         errln("Tailored set size smaller (%d) than expected (%d)", set->size(), setTest[i].testsize);
1928       }
1929       for(j = 0; j < setTest[i].testsize; j++) {
1930         buff = UnicodeString(setTest[i].tests[j], -1, US_INV).unescape();
1931         if(!set->contains(buff)) {
1932           errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]);
1933         }
1934       }
1935       delete set;
1936     } else {
1937       errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status));
1938     }
1939   }
1940 }
1941 
TestUClassID()1942 void CollationAPITest::TestUClassID()
1943 {
1944     char id = *((char *)RuleBasedCollator::getStaticClassID());
1945     if (id != 0) {
1946         errln("Static class id for RuleBasedCollator should be 0");
1947     }
1948     UErrorCode status = U_ZERO_ERROR;
1949     RuleBasedCollator *coll
1950         = (RuleBasedCollator *)Collator::createInstance(status);
1951     if(U_FAILURE(status)) {
1952       delete coll;
1953       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1954       return;
1955     }
1956     id = *((char *)coll->getDynamicClassID());
1957     if (id != 0) {
1958         errln("Dynamic class id for RuleBasedCollator should be 0");
1959     }
1960     id = *((char *)CollationKey::getStaticClassID());
1961     if (id != 0) {
1962         errln("Static class id for CollationKey should be 0");
1963     }
1964     CollationKey *key = new CollationKey();
1965     id = *((char *)key->getDynamicClassID());
1966     if (id != 0) {
1967         errln("Dynamic class id for CollationKey should be 0");
1968     }
1969     id = *((char *)CollationElementIterator::getStaticClassID());
1970     if (id != 0) {
1971         errln("Static class id for CollationElementIterator should be 0");
1972     }
1973     UnicodeString str("testing");
1974     CollationElementIterator *iter = coll->createCollationElementIterator(str);
1975     id = *((char *)iter->getDynamicClassID());
1976     if (id != 0) {
1977         errln("Dynamic class id for CollationElementIterator should be 0");
1978     }
1979     delete key;
1980     delete iter;
1981     delete coll;
1982 }
1983 
1984 class TestCollator  : public Collator
1985 {
1986 public:
1987     virtual Collator* clone(void) const;
1988 
1989     using Collator::compare;
1990 
1991     virtual UCollationResult compare(const UnicodeString& source,
1992                                       const UnicodeString& target,
1993                                       UErrorCode& status) const;
1994     virtual UCollationResult compare(const UnicodeString& source,
1995                                       const UnicodeString& target,
1996                                       int32_t length,
1997                                       UErrorCode& status) const;
1998     virtual UCollationResult compare(const UChar* source,
1999                                       int32_t sourceLength,
2000                                       const UChar* target,
2001                                       int32_t targetLength,
2002                                       UErrorCode& status) const;
2003     virtual CollationKey& getCollationKey(const UnicodeString&  source,
2004                                           CollationKey& key,
2005                                           UErrorCode& status) const;
2006     virtual CollationKey& getCollationKey(const UChar*source,
2007                                           int32_t sourceLength,
2008                                           CollationKey& key,
2009                                           UErrorCode& status) const;
2010     virtual int32_t hashCode(void) const;
2011     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
2012     virtual ECollationStrength getStrength(void) const;
2013     virtual void setStrength(ECollationStrength newStrength);
2014     virtual UClassID getDynamicClassID(void) const;
2015     virtual void getVersion(UVersionInfo info) const;
2016     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
2017                               UErrorCode &status);
2018     virtual UColAttributeValue getAttribute(UColAttribute attr,
2019                                             UErrorCode &status) const;
2020     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len,
2021                                     UErrorCode &status);
2022     virtual uint32_t setVariableTop(const UnicodeString &varTop,
2023                                     UErrorCode &status);
2024     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
2025     virtual uint32_t getVariableTop(UErrorCode &status) const;
2026     virtual int32_t getSortKey(const UnicodeString& source,
2027                             uint8_t* result,
2028                             int32_t resultLength) const;
2029     virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
2030                              uint8_t*result, int32_t resultLength) const;
2031     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
2032     virtual UBool operator==(const Collator& other) const;
2033     // Collator::operator!= calls !Collator::operator== which works for all subclasses.
2034     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
TestCollator()2035     TestCollator() : Collator() {};
TestCollator(UCollationStrength collationStrength,UNormalizationMode decompositionMode)2036     TestCollator(UCollationStrength collationStrength,
2037            UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {};
2038 };
2039 
operator ==(const Collator & other) const2040 inline UBool TestCollator::operator==(const Collator& other) const {
2041     // TestCollator has no fields, so we test for identity.
2042     return this == &other;
2043 
2044     // Normally, subclasses should do something like the following:
2045     //    if (this == &other) { return TRUE; }
2046     //    if (!Collator::operator==(other)) { return FALSE; }  // not the same class
2047     //
2048     //    const TestCollator &o = (const TestCollator&)other;
2049     //    (compare this vs. o's subclass fields)
2050 }
2051 
clone() const2052 Collator* TestCollator::clone() const
2053 {
2054     return new TestCollator();
2055 }
2056 
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const2057 UCollationResult TestCollator::compare(const UnicodeString& source,
2058                                         const UnicodeString& target,
2059                                         UErrorCode& status) const
2060 {
2061   if(U_SUCCESS(status)) {
2062     return UCollationResult(source.compare(target));
2063   } else {
2064     return UCOL_EQUAL;
2065   }
2066 }
2067 
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const2068 UCollationResult TestCollator::compare(const UnicodeString& source,
2069                                         const UnicodeString& target,
2070                                         int32_t length,
2071                                         UErrorCode& status) const
2072 {
2073   if(U_SUCCESS(status)) {
2074     return UCollationResult(source.compare(0, length, target));
2075   } else {
2076     return UCOL_EQUAL;
2077   }
2078 }
2079 
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const2080 UCollationResult TestCollator::compare(const UChar* source,
2081                                         int32_t sourceLength,
2082                                         const UChar* target,
2083                                         int32_t targetLength,
2084                                         UErrorCode& status) const
2085 {
2086     UnicodeString s(source, sourceLength);
2087     UnicodeString t(target, targetLength);
2088     return compare(s, t, status);
2089 }
2090 
getCollationKey(const UnicodeString & source,CollationKey & key,UErrorCode & status) const2091 CollationKey& TestCollator::getCollationKey(const UnicodeString& source,
2092                                             CollationKey& key,
2093                                             UErrorCode& status) const
2094 {
2095     char temp[100];
2096     int length = 100;
2097     length = source.extract(temp, length, NULL, status);
2098     temp[length] = 0;
2099     CollationKey tempkey((uint8_t*)temp, length);
2100     key = tempkey;
2101     return key;
2102 }
2103 
getCollationKey(const UChar * source,int32_t sourceLength,CollationKey & key,UErrorCode & status) const2104 CollationKey& TestCollator::getCollationKey(const UChar*source,
2105                                           int32_t sourceLength,
2106                                           CollationKey& key,
2107                                           UErrorCode& status) const
2108 {
2109     //s tack allocation used since collationkey does not keep the unicodestring
2110     UnicodeString str(source, sourceLength);
2111     return getCollationKey(str, key, status);
2112 }
2113 
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const2114 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result,
2115                                  int32_t resultLength) const
2116 {
2117     UErrorCode status = U_ZERO_ERROR;
2118     int32_t length = source.extract((char *)result, resultLength, NULL,
2119                                     status);
2120     result[length] = 0;
2121     return length;
2122 }
2123 
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const2124 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength,
2125                                  uint8_t*result, int32_t resultLength) const
2126 {
2127     UnicodeString str(source, sourceLength);
2128     return getSortKey(str, result, resultLength);
2129 }
2130 
hashCode() const2131 int32_t TestCollator::hashCode() const
2132 {
2133     return 0;
2134 }
2135 
getLocale(ULocDataLocaleType type,UErrorCode & status) const2136 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const
2137 {
2138     // api not used, this is to make the compiler happy
2139     if (U_FAILURE(status)) {
2140         (void)type;
2141     }
2142     return NULL;
2143 }
2144 
getStrength() const2145 Collator::ECollationStrength TestCollator::getStrength() const
2146 {
2147     return TERTIARY;
2148 }
2149 
setStrength(Collator::ECollationStrength newStrength)2150 void TestCollator::setStrength(Collator::ECollationStrength newStrength)
2151 {
2152     // api not used, this is to make the compiler happy
2153     (void)newStrength;
2154 }
2155 
getDynamicClassID(void) const2156 UClassID TestCollator::getDynamicClassID(void) const
2157 {
2158     return 0;
2159 }
2160 
getVersion(UVersionInfo info) const2161 void TestCollator::getVersion(UVersionInfo info) const
2162 {
2163     // api not used, this is to make the compiler happy
2164     memset(info, 0, U_MAX_VERSION_LENGTH);
2165 }
2166 
setAttribute(UColAttribute,UColAttributeValue,UErrorCode &)2167 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/,
2168                                 UErrorCode & /*status*/)
2169 {
2170 }
2171 
getAttribute(UColAttribute attr,UErrorCode & status) const2172 UColAttributeValue TestCollator::getAttribute(UColAttribute attr,
2173                                               UErrorCode &status) const
2174 {
2175     // api not used, this is to make the compiler happy
2176     if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) {
2177         return UCOL_OFF;
2178     }
2179     return UCOL_DEFAULT;
2180 }
2181 
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)2182 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len,
2183                                   UErrorCode &status)
2184 {
2185     // api not used, this is to make the compiler happy
2186     if (U_SUCCESS(status) && (varTop == 0 || len < -1)) {
2187         status = U_ILLEGAL_ARGUMENT_ERROR;
2188     }
2189     return 0;
2190 }
2191 
setVariableTop(const UnicodeString & varTop,UErrorCode & status)2192 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop,
2193                                   UErrorCode &status)
2194 {
2195     // api not used, this is to make the compiler happy
2196     if (U_SUCCESS(status) && varTop.length() == 0) {
2197         status = U_ILLEGAL_ARGUMENT_ERROR;
2198     }
2199     return 0;
2200 }
2201 
setVariableTop(uint32_t varTop,UErrorCode & status)2202 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status)
2203 {
2204     // api not used, this is to make the compiler happy
2205     if (U_SUCCESS(status) && varTop == 0) {
2206         status = U_ILLEGAL_ARGUMENT_ERROR;
2207     }
2208 }
2209 
getVariableTop(UErrorCode & status) const2210 uint32_t TestCollator::getVariableTop(UErrorCode &status) const
2211 {
2212 
2213     // api not used, this is to make the compiler happy
2214     if (U_SUCCESS(status)) {
2215         return 0;
2216     }
2217     return (uint32_t)(0xFFFFFFFFu);
2218 }
2219 
getTailoredSet(UErrorCode & status) const2220 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const
2221 {
2222     return Collator::getTailoredSet(status);
2223 }
2224 
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)2225 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale)
2226 {
2227     Collator::setLocales(requestedLocale, validLocale, actualLocale);
2228 }
2229 
2230 
TestSubclass()2231 void CollationAPITest::TestSubclass()
2232 {
2233     TestCollator col1;
2234     TestCollator col2;
2235     doAssert(col1 != col2, "2 instances of TestCollator should be different");
2236     if (col1.hashCode() != col2.hashCode()) {
2237         errln("Every TestCollator has the same hashcode");
2238     }
2239     UnicodeString abc("abc", 3);
2240     UnicodeString bcd("bcd", 3);
2241     if (col1.compare(abc, bcd) != abc.compare(bcd)) {
2242         errln("TestCollator compare should be the same as the default "
2243               "string comparison");
2244     }
2245     CollationKey key;
2246     UErrorCode status = U_ZERO_ERROR;
2247     col1.getCollationKey(abc, key, status);
2248     int32_t length = 0;
2249     const char* bytes = (const char *)key.getByteArray(length);
2250     UnicodeString keyarray(bytes, length, NULL, status);
2251     if (abc != keyarray) {
2252         errln("TestCollator collationkey API is returning wrong values");
2253     }
2254 
2255     UnicodeSet expectedset(0, 0x10FFFF);
2256     UnicodeSet *defaultset = col1.getTailoredSet(status);
2257     if (!defaultset->containsAll(expectedset)
2258         || !expectedset.containsAll(*defaultset)) {
2259         errln("Error: expected default tailoring to be 0 to 0x10ffff");
2260     }
2261     delete defaultset;
2262 
2263     // use base class implementation
2264     Locale loc1 = Locale::getGermany();
2265     Locale loc2 = Locale::getFrance();
2266     col1.setLocales(loc1, loc2, loc2); // default implementation has no effect
2267 
2268     UnicodeString displayName;
2269     col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale
2270 
2271     TestCollator col3(UCOL_TERTIARY, UNORM_NONE);
2272     UnicodeString a("a");
2273     UnicodeString b("b");
2274     Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b));
2275     if(col1.compare(a, b) != result) {
2276       errln("Collator doesn't give default result");
2277     }
2278     if(col1.compare(a, b, 1) != result) {
2279       errln("Collator doesn't give default result");
2280     }
2281     if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) {
2282       errln("Collator doesn't give default result");
2283     }
2284 }
2285 
TestNULLCharTailoring()2286 void CollationAPITest::TestNULLCharTailoring()
2287 {
2288     UErrorCode status = U_ZERO_ERROR;
2289     UChar buf[256] = {0};
2290     int32_t len = u_unescape("&a < '\\u0000'", buf, 256);
2291     UnicodeString first((UChar)0x0061);
2292     UnicodeString second((UChar)0);
2293     RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status);
2294     if(U_FAILURE(status)) {
2295         delete coll;
2296         errcheckln(status, "Failed to open collator - %s", u_errorName(status));
2297         return;
2298     }
2299     UCollationResult res = coll->compare(first, second, status);
2300     if(res != UCOL_LESS) {
2301         errln("a should be less then NULL after tailoring");
2302     }
2303     delete coll;
2304 }
2305 
TestClone()2306 void CollationAPITest::TestClone() {
2307     logln("\ninit c0");
2308     UErrorCode status = U_ZERO_ERROR;
2309     RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status);
2310 
2311     if (U_FAILURE(status)) {
2312         errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status));
2313         return;
2314     }
2315 
2316     c0->setStrength(Collator::TERTIARY);
2317     dump("c0", c0, status);
2318 
2319     logln("\ninit c1");
2320     RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status);
2321     c1->setStrength(Collator::TERTIARY);
2322     UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status);
2323     if(val == UCOL_LOWER_FIRST){
2324         c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2325     }else{
2326         c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2327     }
2328     dump("c0", c0, status);
2329     dump("c1", c1, status);
2330 
2331     logln("\ninit c2");
2332     RuleBasedCollator* c2 = (RuleBasedCollator*)c1->clone();
2333     val = c2->getAttribute(UCOL_CASE_FIRST, status);
2334     if(val == UCOL_LOWER_FIRST){
2335         c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2336     }else{
2337         c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2338     }
2339     if(U_FAILURE(status)){
2340         errln("set and get attributes of collator failed. %s\n", u_errorName(status));
2341         return;
2342     }
2343     dump("c0", c0, status);
2344     dump("c1", c1, status);
2345     dump("c2", c2, status);
2346     if(*c1 == *c2){
2347         errln("The cloned objects refer to same data");
2348     }
2349     delete c0;
2350     delete c1;
2351     delete c2;
2352 }
2353 
TestCloneBinary()2354 void CollationAPITest::TestCloneBinary() {
2355     IcuTestErrorCode errorCode(*this, "TestCloneBinary");
2356     LocalPointer<Collator> root(Collator::createInstance(Locale::getRoot(), errorCode));
2357     LocalPointer<Collator> coll(Collator::createInstance("de@collation=phonebook", errorCode));
2358     if(errorCode.logDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) {
2359         return;
2360     }
2361     RuleBasedCollator *rbRoot = dynamic_cast<RuleBasedCollator *>(root.getAlias());
2362     RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias());
2363     if(rbRoot == NULL || rbc == NULL) {
2364         infoln("root or de@collation=phonebook is not a RuleBasedCollator");
2365         return;
2366     }
2367     rbc->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
2368     UnicodeString uUmlaut((UChar)0xfc);
2369     UnicodeString ue = UNICODE_STRING_SIMPLE("ue");
2370     assertEquals("rbc/primary: u-umlaut==ue", UCOL_EQUAL, rbc->compare(uUmlaut, ue, errorCode));
2371     uint8_t bin[25000];
2372     int32_t binLength = rbc->cloneBinary(bin, LENGTHOF(bin), errorCode);
2373     if(errorCode.logDataIfFailureAndReset("rbc->cloneBinary()")) {
2374         return;
2375     }
2376     logln("rbc->cloneBinary() -> %d bytes", (int)binLength);
2377 
2378     RuleBasedCollator rbc2(bin, binLength, rbRoot, errorCode);
2379     if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) {
2380         return;
2381     }
2382     assertEquals("rbc2.strength==primary", UCOL_PRIMARY, rbc2.getAttribute(UCOL_STRENGTH, errorCode));
2383     assertEquals("rbc2: u-umlaut==ue", UCOL_EQUAL, rbc2.compare(uUmlaut, ue, errorCode));
2384     assertTrue("rbc==rbc2", *rbc == rbc2);
2385     uint8_t bin2[25000];
2386     int32_t bin2Length = rbc2.cloneBinary(bin2, LENGTHOF(bin2), errorCode);
2387     assertEquals("len(rbc binary)==len(rbc2 binary)", binLength, bin2Length);
2388     assertTrue("rbc binary==rbc2 binary", binLength == bin2Length && memcmp(bin, bin2, binLength) == 0);
2389 }
2390 
TestIterNumeric()2391 void CollationAPITest::TestIterNumeric() {
2392     // Regression test for ticket #9915.
2393     // The collation code sometimes masked the continuation marker away
2394     // but later tested the result for isContinuation().
2395     // This test case failed because the third bytes of the computed numeric-collation primaries
2396     // were permutated with the script reordering table.
2397     // It should have been possible to reproduce this with the root collator
2398     // and characters with appropriate 3-byte primary weights.
2399     // The effectiveness of this test depends completely on the collation elements
2400     // and on the implementation code.
2401     IcuTestErrorCode errorCode(*this, "TestIterNumeric");
2402     RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode);
2403     if(errorCode.logDataIfFailureAndReset("RuleBasedCollator constructor")) {
2404         return;
2405     }
2406     coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode);
2407     UCharIterator iter40, iter72;
2408     uiter_setUTF8(&iter40, "\x34\x30", 2);
2409     uiter_setUTF8(&iter72, "\x37\x32", 2);
2410     UCollationResult result = coll.compare(iter40, iter72, errorCode);
2411     assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result);
2412 }
2413 
TestBadKeywords()2414 void CollationAPITest::TestBadKeywords() {
2415     // Test locale IDs with errors.
2416     // Valid locale IDs are tested via data-driven tests.
2417     UErrorCode errorCode = U_ZERO_ERROR;
2418     Locale bogusLocale(Locale::getRoot());
2419     bogusLocale.setToBogus();
2420     LocalPointer<Collator> coll(Collator::createInstance(bogusLocale, errorCode));
2421     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2422         errln("Collator::createInstance(bogus locale) did not fail as expected - %s",
2423               u_errorName(errorCode));
2424     }
2425 
2426     // Unknown value.
2427     const char *localeID = "it-u-ks-xyz";
2428     errorCode = U_ZERO_ERROR;
2429     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2430     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2431         errln("Collator::createInstance(%s) did not fail as expected - %s",
2432               localeID, u_errorName(errorCode));
2433     }
2434 
2435     // Unsupported attributes.
2436     localeID = "it@colHiraganaQuaternary=true";
2437     errorCode = U_ZERO_ERROR;
2438     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2439     if(errorCode != U_UNSUPPORTED_ERROR) {
2440         errln("Collator::createInstance(%s) did not fail as expected - %s",
2441               localeID, u_errorName(errorCode));
2442     }
2443 
2444     localeID = "it-u-vt-u24";
2445     errorCode = U_ZERO_ERROR;
2446     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2447     if(errorCode != U_UNSUPPORTED_ERROR) {
2448         errln("Collator::createInstance(%s) did not fail as expected - %s",
2449               localeID, u_errorName(errorCode));
2450     }
2451 }
2452 
dump(UnicodeString msg,RuleBasedCollator * c,UErrorCode & status)2453  void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) {
2454     const char* bigone = "One";
2455     const char* littleone = "one";
2456 
2457     logln(msg + " " + c->compare(bigone, littleone) +
2458                         " s: " + c->getStrength() +
2459                         " u: " + c->getAttribute(UCOL_CASE_FIRST, status));
2460 }
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)2461 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
2462 {
2463     if (exec) logln("TestSuite CollationAPITest: ");
2464     TESTCASE_AUTO_BEGIN;
2465     TESTCASE_AUTO(TestProperty);
2466     TESTCASE_AUTO(TestOperators);
2467     TESTCASE_AUTO(TestDuplicate);
2468     TESTCASE_AUTO(TestCompare);
2469     TESTCASE_AUTO(TestHashCode);
2470     TESTCASE_AUTO(TestCollationKey);
2471     TESTCASE_AUTO(TestElemIter);
2472     TESTCASE_AUTO(TestGetAll);
2473     TESTCASE_AUTO(TestRuleBasedColl);
2474     TESTCASE_AUTO(TestDecomposition);
2475     TESTCASE_AUTO(TestSafeClone);
2476     TESTCASE_AUTO(TestSortKey);
2477     TESTCASE_AUTO(TestSortKeyOverflow);
2478     TESTCASE_AUTO(TestMaxExpansion);
2479     TESTCASE_AUTO(TestDisplayName);
2480     TESTCASE_AUTO(TestAttribute);
2481     TESTCASE_AUTO(TestVariableTopSetting);
2482     TESTCASE_AUTO(TestMaxVariable);
2483     TESTCASE_AUTO(TestRules);
2484     TESTCASE_AUTO(TestGetLocale);
2485     TESTCASE_AUTO(TestBounds);
2486     TESTCASE_AUTO(TestGetTailoredSet);
2487     TESTCASE_AUTO(TestUClassID);
2488     TESTCASE_AUTO(TestSubclass);
2489     TESTCASE_AUTO(TestNULLCharTailoring);
2490     TESTCASE_AUTO(TestClone);
2491     TESTCASE_AUTO(TestCloneBinary);
2492     TESTCASE_AUTO(TestIterNumeric);
2493     TESTCASE_AUTO(TestBadKeywords);
2494     TESTCASE_AUTO_END;
2495 }
2496 
2497 #endif /* #if !UCONFIG_NO_COLLATION */
2498