• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 //===============================================================================
9 //
10 // File apicoll.cpp
11 //
12 //
13 //
14 // Created by: Helena Shih
15 //
16 // Modification History:
17 //
18 //  Date         Name          Description
19 //  2/5/97      aliu        Added streamIn and streamOut methods.  Added
20 //                          constructor which reads RuleBasedCollator object from
21 //                          a binary file.  Added writeToFile method which streams
22 //                          RuleBasedCollator out to a binary file.  The streamIn
23 //                          and streamOut methods use istream and ostream objects
24 //                          in binary mode.
25 //  6/30/97     helena      Added tests for CollationElementIterator::setText, getOffset
26 //                          setOffset and DecompositionIterator::getOffset, setOffset.
27 //                          DecompositionIterator is made public so add class scope
28 //                          testing.
29 //  02/10/98    damiba      Added test for compare(UnicodeString&, UnicodeString&, int32_t)
30 //===============================================================================
31 
32 #include "unicode/utypes.h"
33 
34 #if !UCONFIG_NO_COLLATION
35 
36 #include "unicode/localpointer.h"
37 #include "unicode/coll.h"
38 #include "unicode/tblcoll.h"
39 #include "unicode/coleitr.h"
40 #include "unicode/sortkey.h"
41 #include "apicoll.h"
42 #include "unicode/chariter.h"
43 #include "unicode/schriter.h"
44 #include "unicode/ustring.h"
45 #include "unicode/ucol.h"
46 
47 #include "sfwdchit.h"
48 #include "cmemory.h"
49 #include <stdlib.h>
50 
51 void
doAssert(UBool condition,const char * message)52 CollationAPITest::doAssert(UBool condition, const char *message)
53 {
54     if (!condition) {
55         errln(UnicodeString("ERROR : ") + message);
56     }
57 }
58 
59 // Collator Class Properties
60 // ctor, dtor, createInstance, compare, getStrength/setStrength
61 // getDecomposition/setDecomposition, getDisplayName
62 void
TestProperty()63 CollationAPITest::TestProperty(/* char* par */)
64 {
65     UErrorCode success = U_ZERO_ERROR;
66     Collator *col = 0;
67     /*
68      * Expected version of the English collator.
69      * Currently, the major/minor version numbers change when the builder code
70      * changes,
71      * number 2 is from the tailoring data version and
72      * number 3 is the UCA version.
73      * This changes with every UCA version change, and the expected value
74      * needs to be adjusted.
75      * Same in cintltst/capitst.c.
76      */
77     UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A};  // from ICU 4.4/UCA 5.2
78     UVersionInfo versionArray;
79 
80     logln("The property tests begin : ");
81     logln("Test ctors : ");
82     col = Collator::createInstance(Locale::getEnglish(), success);
83     if (U_FAILURE(success)){
84         errcheckln(success, "Default Collator creation failed. - %s", u_errorName(success));
85         return;
86     }
87 
88     StringEnumeration* kwEnum = col->getKeywordValuesForLocale("", Locale::getEnglish(),true,success);
89     if (U_FAILURE(success)){
90         errcheckln(success, "Get Keyword Values for Locale failed. - %s", u_errorName(success));
91         return;
92     }
93     delete kwEnum;
94 
95     col->getVersion(versionArray);
96     // Check for a version greater than some value rather than equality
97     // so that we need not update the expected version each time.
98     if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
99       errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
100             versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
101     } else {
102       logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
103             versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
104     }
105 
106     doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed");
107     doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed");
108     doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed");
109     doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed");
110     doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed");
111     doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed");
112 
113     doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed");
114     success = U_ZERO_ERROR;
115     {
116         UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape();
117         UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape();
118         UCharIterator abauIter, abssIter;
119         uiter_setReplaceable(&abauIter, &abau);
120         uiter_setReplaceable(&abssIter, &abss);
121         doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed");
122         success = U_ZERO_ERROR;
123     }
124 
125     /*start of update [Bertrand A. D. 02/10/98]*/
126     doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed");
127     doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB  with length 2 comparison failed");
128     doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa  with length 1 comparison failed");
129     doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa  with length 2 comparison failed");
130     doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed");
131     doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed");
132     doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed");
133     /*end of update [Bertrand A. D. 02/10/98]*/
134 
135 
136     logln("Test ctors ends.");
137     logln("testing Collator::getStrength() method ...");
138     doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength");
139     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
140 
141 
142     logln("testing Collator::setStrength() method ...");
143     col->setStrength(Collator::SECONDARY);
144     doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference");
145     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
146     doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength");
147 
148     UnicodeString name;
149 
150     logln("Get display name for the US English collation in German : ");
151     logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name));
152     doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed");
153 
154     logln("Get display name for the US English collation in English : ");
155     logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name));
156     doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed");
157 #if 0
158     // weiv : this test is bogus if we're running on any machine that has different default locale than English.
159     // Therefore, it is banned!
160     logln("Get display name for the US English in default locale language : ");
161     logln(Collator::getDisplayName(Locale::US, name));
162     doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine");
163 #endif
164     delete col; col = 0;
165     RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK",
166                                                                             success);
167     if (U_FAILURE(success)) {
168         errcheckln(success, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success));
169         return;
170     }
171     const UnicodeString &daRules = rcol->getRules();
172     if(daRules.isEmpty()) {
173         dataerrln("missing da_DK tailoring rule string");
174     } else {
175         doAssert(daRules.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'");
176     }
177     delete rcol;
178 
179     col = Collator::createInstance(Locale::getFrench(), success);
180     if (U_FAILURE(success))
181     {
182         errln("Creating French collation failed.");
183         return;
184     }
185 
186     col->setStrength(Collator::PRIMARY);
187     logln("testing Collator::getStrength() method again ...");
188     doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength");
189     doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference");
190 
191     logln("testing French Collator::setStrength() method ...");
192     col->setStrength(Collator::TERTIARY);
193     doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference");
194     doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference");
195     doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference");
196     delete col;
197 
198     logln("Create junk collation: ");
199     Locale abcd("ab", "CD", "");
200     success = U_ZERO_ERROR;
201     Collator *junk = 0;
202     junk = Collator::createInstance(abcd, success);
203 
204     if (U_FAILURE(success))
205     {
206         errln("Junk collation creation failed, should at least return default.");
207         return;
208     }
209 
210     doAssert(((RuleBasedCollator *)junk)->getRules().isEmpty(),
211                "The root collation should be returned for an unsupported language.");
212     Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success);
213     if (U_FAILURE(success))
214     {
215         errln("Creating fr_CA collator failed.");
216         delete junk;
217         return;
218     }
219 
220     // If the default locale isn't French, the French and non-French collators
221     // should be different
222     if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) {
223         doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator.");
224     }
225     Collator *aFrCol = frCol->clone();
226     doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed.");
227     logln("Collator property test ended.");
228 
229     delete frCol;
230     delete aFrCol;
231     delete junk;
232 }
233 
234 void
TestRuleBasedColl()235 CollationAPITest::TestRuleBasedColl()
236 {
237     RuleBasedCollator *col1, *col2, *col3, *col4;
238     UErrorCode status = U_ZERO_ERROR;
239 
240     UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
241     UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
242 
243     col1 = new RuleBasedCollator(ruleset1, status);
244     if (U_FAILURE(status)) {
245         errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status));
246         return;
247     }
248     else {
249         logln("PASS: RuleBased Collator creation passed\n");
250     }
251 
252     status = U_ZERO_ERROR;
253     col2 = new RuleBasedCollator(ruleset2, status);
254     if (U_FAILURE(status)) {
255         errln("RuleBased Collator creation failed.\n");
256         return;
257     }
258     else {
259         logln("PASS: RuleBased Collator creation passed\n");
260     }
261 
262     status = U_ZERO_ERROR;
263     Locale locale("aa", "AA");
264     col3 = (RuleBasedCollator *)Collator::createInstance(locale, status);
265     if (U_FAILURE(status)) {
266         errln("Fallback Collator creation failed.: %s\n");
267         return;
268     }
269     else {
270         logln("PASS: Fallback Collator creation passed\n");
271     }
272     delete col3;
273 
274     status = U_ZERO_ERROR;
275     col3 = (RuleBasedCollator *)Collator::createInstance(status);
276     if (U_FAILURE(status)) {
277         errln("Default Collator creation failed.: %s\n");
278         return;
279     }
280     else {
281         logln("PASS: Default Collator creation passed\n");
282     }
283 
284     UnicodeString rule1 = col1->getRules();
285     UnicodeString rule2 = col2->getRules();
286     UnicodeString rule3 = col3->getRules();
287 
288     doAssert(rule1 != rule2, "Default collator getRules failed");
289     doAssert(rule2 != rule3, "Default collator getRules failed");
290     doAssert(rule1 != rule3, "Default collator getRules failed");
291 
292     col4 = new RuleBasedCollator(rule2, status);
293     if (U_FAILURE(status)) {
294         errln("RuleBased Collator creation failed.\n");
295         return;
296     }
297 
298     UnicodeString rule4 = col4->getRules();
299     doAssert(rule2 == rule4, "Default collator getRules failed");
300     int32_t length4 = 0;
301     uint8_t *clonedrule4 = col4->cloneRuleData(length4, status);
302     if (U_FAILURE(status)) {
303         errln("Cloned rule data failed.\n");
304         return;
305     }
306 
307  //   free(clonedrule4);     BAD API!!!!
308     uprv_free(clonedrule4);
309 
310 
311     delete col1;
312     delete col2;
313     delete col3;
314     delete col4;
315 }
316 
317 void
TestRules()318 CollationAPITest::TestRules()
319 {
320     RuleBasedCollator *coll;
321     UErrorCode status = U_ZERO_ERROR;
322     UnicodeString rules;
323 
324     coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status);
325     if (U_FAILURE(status)) {
326         errcheckln(status, "English Collator creation failed. - %s", u_errorName(status));
327         return;
328     }
329     else {
330         logln("PASS: RuleBased Collator creation passed\n");
331     }
332 
333     coll->getRules(UCOL_TAILORING_ONLY, rules);
334     if (rules.length() != 0x00) {
335       errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00);
336     }
337 
338     coll->getRules(UCOL_FULL_RULES, rules);
339     if (rules.length() < 0) {
340         errln("English full rules failed");
341     }
342     delete coll;
343 }
344 
345 void
TestDecomposition()346 CollationAPITest::TestDecomposition() {
347   UErrorCode status = U_ZERO_ERROR;
348   Collator *en_US = Collator::createInstance("en_US", status),
349     *el_GR = Collator::createInstance("el_GR", status),
350     *vi_VN = Collator::createInstance("vi_VN", status);
351 
352   if (U_FAILURE(status)) {
353     errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status));
354     return;
355   }
356 
357   /* there is no reason to have canonical decomposition in en_US OR default locale */
358   if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
359   {
360     errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
361   }
362 
363   if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON)
364   {
365     errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
366   }
367 
368   if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
369   {
370     errln("ERROR: en_US collation had canonical decomposition for normalization!\n");
371   }
372 
373   delete en_US;
374   delete el_GR;
375   delete vi_VN;
376 }
377 
378 void
TestSafeClone()379 CollationAPITest::TestSafeClone() {
380     static const int CLONETEST_COLLATOR_COUNT = 3;
381     Collator *someCollators [CLONETEST_COLLATOR_COUNT];
382     Collator *col;
383     UErrorCode err = U_ZERO_ERROR;
384     int index;
385 
386     UnicodeString test1("abCda");
387     UnicodeString test2("abcda");
388 
389     /* one default collator & two complex ones */
390     someCollators[0] = Collator::createInstance("en_US", err);
391     someCollators[1] = Collator::createInstance("ko", err);
392     someCollators[2] = Collator::createInstance("ja_JP", err);
393     if(U_FAILURE(err)) {
394       errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err));
395       delete someCollators[0];
396       delete someCollators[1];
397       delete someCollators[2];
398       return;
399     }
400 
401     /* change orig & clone & make sure they are independent */
402 
403     for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++)
404     {
405         col = someCollators[index]->safeClone();
406         if (col == 0) {
407             errln("SafeClone of collator should not return null\n");
408             break;
409         }
410         col->setStrength(Collator::TERTIARY);
411         someCollators[index]->setStrength(Collator::PRIMARY);
412         col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
413         someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err);
414 
415         doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" ");
416         doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\"");
417         delete col;
418         delete someCollators[index];
419     }
420 }
421 
422 void
TestHashCode()423 CollationAPITest::TestHashCode(/* char* par */)
424 {
425     logln("hashCode tests begin.");
426     UErrorCode success = U_ZERO_ERROR;
427     Collator *col1 = 0;
428     col1 = Collator::createInstance(Locale::getEnglish(), success);
429     if (U_FAILURE(success))
430     {
431         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
432         return;
433     }
434 
435     Collator *col2 = 0;
436     Locale dk("da", "DK", "");
437     col2 = Collator::createInstance(dk, success);
438     if (U_FAILURE(success))
439     {
440         errln("Danish collation creation failed.");
441         return;
442     }
443 
444     Collator *col3 = 0;
445     col3 = Collator::createInstance(Locale::getEnglish(), success);
446     if (U_FAILURE(success))
447     {
448         errln("2nd default collation creation failed.");
449         return;
450     }
451 
452     logln("Collator::hashCode() testing ...");
453 
454     doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" );
455     doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" );
456     doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" );
457 
458     logln("hashCode tests end.");
459     delete col1;
460     delete col2;
461 
462     UnicodeString test1("Abcda");
463     UnicodeString test2("abcda");
464 
465     CollationKey sortk1, sortk2, sortk3;
466     UErrorCode status = U_ZERO_ERROR;
467 
468     col3->getCollationKey(test1, sortk1, status);
469     col3->getCollationKey(test2, sortk2, status);
470     col3->getCollationKey(test2, sortk3, status);
471 
472     doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect");
473     doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" );
474 
475     delete col3;
476 }
477 
478 //----------------------------------------------------------------------------
479 // CollationKey -- Tests the CollationKey methods
480 //
481 void
TestCollationKey()482 CollationAPITest::TestCollationKey(/* char* par */)
483 {
484     logln("testing CollationKey begins...");
485     Collator *col = 0;
486     UErrorCode success=U_ZERO_ERROR;
487     col = Collator::createInstance(Locale::getEnglish(), success);
488     if (U_FAILURE(success))
489     {
490         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
491         return;
492     }
493     col->setStrength(Collator::TERTIARY);
494 
495     CollationKey sortk1, sortk2;
496     UnicodeString test1("Abcda"), test2("abcda");
497     UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR;
498 
499     logln("Testing weird arguments");
500     // No string vs. empty string vs. completely-ignorable string:
501     // See ICU ticket #10495.
502     CollationKey sortkNone;
503     int32_t length;
504     sortkNone.getByteArray(length);
505     doAssert(!sortkNone.isBogus() && length == 0,
506              "Default-constructed collation key should be empty");
507     CollationKey sortkEmpty;
508     col->getCollationKey(NULL, 0, sortkEmpty, key1Status);
509     // key gets reset here
510     const uint8_t* byteArrayEmpty = sortkEmpty.getByteArray(length);
511     doAssert(sortkEmpty.isBogus() == FALSE && length == 3 &&
512              byteArrayEmpty[0] == 1 && byteArrayEmpty[1] == 1 && byteArrayEmpty[2] == 0,
513              "Empty string should return a collation key with empty levels");
514     doAssert(sortkNone.compareTo(sortkEmpty) == Collator::LESS,
515              "Expected no collation key < collation key for empty string");
516     doAssert(sortkEmpty.compareTo(sortkNone) == Collator::GREATER,
517              "Expected collation key for empty string > no collation key");
518 
519     CollationKey sortkIgnorable;
520     // Most control codes and CGJ are completely ignorable.
521     // A string with only completely ignorables must compare equal to an empty string.
522     col->getCollationKey(UnicodeString((UChar)1).append((UChar)0x34f), sortkIgnorable, key1Status);
523     sortkIgnorable.getByteArray(length);
524     doAssert(!sortkIgnorable.isBogus() && length == 3,
525              "Completely ignorable string should return a collation key with empty levels");
526     doAssert(sortkIgnorable.compareTo(sortkEmpty) == Collator::EQUAL,
527              "Completely ignorable string should compare equal to empty string");
528 
529     // bogus key returned here
530     key1Status = U_ILLEGAL_ARGUMENT_ERROR;
531     col->getCollationKey(NULL, 0, sortk1, key1Status);
532     doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0,
533         "Error code should return bogus collation key");
534 
535     key1Status = U_ZERO_ERROR;
536     logln("Use tertiary comparison level testing ....");
537 
538     col->getCollationKey(test1, sortk1, key1Status);
539     if (U_FAILURE(key1Status)) {
540         errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status));
541         return;
542     }
543     doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status)))
544                  == Collator::GREATER,
545                 "Result should be \"Abcda\" >>> \"abcda\"");
546 
547     CollationKey sortk3(sortk2), sortkNew;
548 
549     sortkNew = sortk1;
550     doAssert((sortk1 != sortk2), "The sort keys should be different");
551     doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed");
552     doAssert((sortk2 == sortk3), "The sort keys should be the same");
553     doAssert((sortk1 == sortkNew), "The sort keys assignment failed");
554     doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed");
555     doAssert((sortkNew != sortk3), "The sort keys should be different");
556     doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
557     doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\"");
558     doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\"");
559     doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)");
560     doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)");
561     doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\"");
562     doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\"");
563     doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\"");
564     doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)");
565     doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)");
566 
567     int32_t    cnt1, cnt2, cnt3, cnt4;
568 
569     const uint8_t* byteArray1 = sortk1.getByteArray(cnt1);
570     const uint8_t* byteArray2 = sortk2.getByteArray(cnt2);
571 
572     const uint8_t* byteArray3 = 0;
573     byteArray3 = sortk1.getByteArray(cnt3);
574 
575     const uint8_t* byteArray4 = 0;
576     byteArray4 = sortk2.getByteArray(cnt4);
577 
578     CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2);
579     CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4);
580 
581     doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed.");
582     doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed.");
583     doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed");
584     doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed.");
585     doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed.");
586     doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed");
587 
588     logln("Equality tests : ");
589     doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed.");
590     doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed.");
591     doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed.");
592     doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed.");
593     doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed.");
594     doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed.");
595 
596     byteArray1 = 0;
597     byteArray2 = 0;
598 
599     sortk3 = sortk1;
600     doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed.");
601     doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed.");
602     logln("testing sortkey ends...");
603 
604     col->setStrength(Collator::SECONDARY);
605     doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo(
606                                   col->getCollationKey(test2, sortk2, key2Status))
607                                   == Collator::EQUAL,
608                                   "Result should be \"Abcda\" == \"abcda\"");
609     delete col;
610 }
611 
612 //----------------------------------------------------------------------------
613 // Tests the CollatorElementIterator class.
614 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!=
615 //
616 void
TestElemIter()617 CollationAPITest::TestElemIter(/* char* par */)
618 {
619     logln("testing sortkey begins...");
620     Collator *col = 0;
621     UErrorCode success = U_ZERO_ERROR;
622     col = Collator::createInstance(Locale::getEnglish(), success);
623     if (U_FAILURE(success))
624     {
625         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
626         return;
627     }
628 
629     UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
630     UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
631     logln("Constructors and comparison testing....");
632     CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
633 
634     CharacterIterator *chariter=new StringCharacterIterator(testString1);
635     CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter);
636 
637     // copy ctor
638     CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1);
639     CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2);
640 
641     int32_t offset = iterator1->getOffset();
642     if (offset != 0) {
643         errln("Error in getOffset for collation element iterator\n");
644         return;
645     }
646     iterator1->setOffset(6, success);
647     if (U_FAILURE(success)) {
648         errln("Error in setOffset for collation element iterator\n");
649         return;
650     }
651     iterator1->setOffset(0, success);
652     int32_t order1, order2, order3;
653     doAssert((*iterator1 == *iterator2), "The two iterators should be the same");
654     doAssert((*iterator1 != *iterator3), "The two iterators should be different");
655 
656     doAssert((*coliter == *iterator1), "The two iterators should be the same");
657     doAssert((*coliter == *iterator2), "The two iterators should be the same");
658     doAssert((*coliter != *iterator3), "The two iterators should be different");
659 
660     order1 = iterator1->next(success);
661     if (U_FAILURE(success))
662     {
663         errln("Somehow ran out of memory stepping through the iterator.");
664         return;
665     }
666 
667     doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
668     order2 = iterator2->getOffset();
669     doAssert((order1 != order2), "The order result should not be the same");
670     order2 = iterator2->next(success);
671     if (U_FAILURE(success))
672     {
673         errln("Somehow ran out of memory stepping through the iterator.");
674         return;
675     }
676 
677     doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
678     doAssert((order1 == order2), "The order result should be the same");
679     order3 = iterator3->next(success);
680     if (U_FAILURE(success))
681     {
682         errln("Somehow ran out of memory stepping through the iterator.");
683         return;
684     }
685 
686     doAssert((CollationElementIterator::primaryOrder(order1) ==
687         CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
688     doAssert((CollationElementIterator::secondaryOrder(order1) ==
689         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
690     doAssert((CollationElementIterator::tertiaryOrder(order1) ==
691         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
692 
693     order1 = iterator1->next(success); order3 = iterator3->next(success);
694     if (U_FAILURE(success))
695     {
696         errln("Somehow ran out of memory stepping through the iterator.");
697         return;
698     }
699 
700     doAssert((CollationElementIterator::primaryOrder(order1) ==
701         CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
702     doAssert((CollationElementIterator::tertiaryOrder(order1) !=
703         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
704 
705     order1 = iterator1->next(success);
706     order3 = iterator3->next(success);
707     /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
708     /*
709     doAssert((CollationElementIterator::secondaryOrder(order1) !=
710         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
711     */
712     doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
713 
714     iterator1->reset(); iterator2->reset(); iterator3->reset();
715     order1 = iterator1->next(success);
716     if (U_FAILURE(success))
717     {
718         errln("Somehow ran out of memory stepping through the iterator.");
719         return;
720     }
721 
722     doAssert((*iterator1 != *iterator2), "The first iterator advance failed");
723 
724     order2 = iterator2->next(success);
725     if (U_FAILURE(success))
726     {
727         errln("Somehow ran out of memory stepping through the iterator.");
728         return;
729     }
730 
731     doAssert((*iterator1 == *iterator2), "The second iterator advance failed");
732     doAssert((order1 == order2), "The order result should be the same");
733 
734     order3 = iterator3->next(success);
735     if (U_FAILURE(success))
736     {
737         errln("Somehow ran out of memory stepping through the iterator.");
738         return;
739     }
740 
741     doAssert((CollationElementIterator::primaryOrder(order1) ==
742         CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same");
743     doAssert((CollationElementIterator::secondaryOrder(order1) ==
744         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same");
745     doAssert((CollationElementIterator::tertiaryOrder(order1) ==
746         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same");
747 
748     order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success);
749     if (U_FAILURE(success))
750     {
751         errln("Somehow ran out of memory stepping through the iterator.");
752         return;
753     }
754 
755     doAssert((CollationElementIterator::primaryOrder(order1) ==
756         CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical");
757     doAssert((CollationElementIterator::tertiaryOrder(order1) !=
758         CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different");
759 
760     order1 = iterator1->next(success); order3 = iterator3->next(success);
761     if (U_FAILURE(success))
762     {
763         errln("Somehow ran out of memory stepping through the iterator.");
764         return;
765     }
766 
767     /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
768     /*
769     doAssert((CollationElementIterator::secondaryOrder(order1) !=
770         CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
771     */
772     doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached");
773     doAssert((*iterator2 != *iterator3), "The iterators should be different");
774 
775 
776     //test error values
777     success=U_UNSUPPORTED_ERROR;
778     Collator *colerror=NULL;
779     colerror=Collator::createInstance(Locale::getEnglish(), success);
780     if (colerror != 0 || success == U_ZERO_ERROR){
781         errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n");
782     }
783     int32_t position=coliter->previous(success);
784     if(position != CollationElementIterator::NULLORDER){
785         errln((UnicodeString)"Expected NULLORDER got" + position);
786     }
787     coliter->reset();
788     coliter->setText(*chariter, success);
789     if(!U_FAILURE(success)){
790         errln("Expeceted error");
791     }
792     iterator1->setText((UnicodeString)"hello there", success);
793     if(!U_FAILURE(success)){
794         errln("Expeceted error");
795     }
796 
797     delete chariter;
798     delete coliter;
799     delete iterator1;
800     delete iterator2;
801     delete iterator3;
802     delete col;
803 
804 
805 
806     logln("testing CollationElementIterator ends...");
807 }
808 
809 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules
810 void
TestOperators()811 CollationAPITest::TestOperators(/* char* par */)
812 {
813     UErrorCode success = U_ZERO_ERROR;
814     UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
815     UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
816     RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success);
817     if (U_FAILURE(success)) {
818         errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success));
819         return;
820     }
821     success = U_ZERO_ERROR;
822     RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success);
823     if (U_FAILURE(success)) {
824         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set.");
825         return;
826     }
827     logln("The operator tests begin : ");
828     logln("testing operator==, operator!=, clone  methods ...");
829     doAssert((*col1 != *col2), "The two different table collations compared equal");
830     *col1 = *col2;
831     doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)");
832 
833     success = U_ZERO_ERROR;
834     Collator *col3 = Collator::createInstance(Locale::getEnglish(), success);
835     if (U_FAILURE(success)) {
836         errln("Default collation creation failed.");
837         return;
838     }
839     doAssert((*col1 != *col3), "The two different table collations compared equal");
840     Collator* col4 = col1->clone();
841     Collator* col5 = col3->clone();
842     doAssert((*col1 == *col4), "Cloned collation objects not equal");
843     doAssert((*col3 != *col4), "Two different table collations compared equal");
844     doAssert((*col3 == *col5), "Cloned collation objects not equal");
845     doAssert((*col4 != *col5), "Two cloned collations compared equal");
846 
847     const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules();
848     RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success);
849     if (U_FAILURE(success)) {
850         errln("Creating default collation with rules failed.");
851         return;
852     }
853     doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed");
854 
855     success = U_ZERO_ERROR;
856     RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success);
857     if (U_FAILURE(success)) {
858         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength.");
859         return;
860     }
861     success = U_ZERO_ERROR;
862     RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success);
863     if (U_FAILURE(success)) {
864         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP.");
865         return;
866     }
867     success = U_ZERO_ERROR;
868     RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success);
869     if (U_FAILURE(success)) {
870         errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP.");
871         return;
872     }
873   //  doAssert((*col7 == *col8), "The two equal table collations compared different");
874     doAssert((*col7 != *col9), "The two different table collations compared equal");
875     doAssert((*col8 != *col9), "The two different table collations compared equal");
876 
877     logln("operator tests ended.");
878     delete col1;
879     delete col2;
880     delete col3;
881     delete col4;
882     delete col5;
883     delete col6;
884     delete col7;
885     delete col8;
886     delete col9;
887 }
888 
889 // test clone and copy
890 void
TestDuplicate()891 CollationAPITest::TestDuplicate(/* char* par */)
892 {
893     UErrorCode status = U_ZERO_ERROR;
894     Collator *col1 = Collator::createInstance(Locale::getEnglish(), status);
895     if (U_FAILURE(status)) {
896         logln("Default collator creation failed.");
897         return;
898     }
899     Collator *col2 = col1->clone();
900     doAssert((*col1 == *col2), "Cloned object is not equal to the orginal");
901     UnicodeString ruleset("&9 < a, A < b, B < c, C < d, D, e, E");
902     RuleBasedCollator *col3 = new RuleBasedCollator(ruleset, status);
903     if (U_FAILURE(status)) {
904         logln("Collation tailoring failed.");
905         return;
906     }
907     doAssert((*col1 != *col3), "Cloned object is equal to some dummy");
908     *col3 = *((RuleBasedCollator*)col1);
909     doAssert((*col1 == *col3), "Copied object is not equal to the orginal");
910 
911     UCollationResult res;
912     UnicodeString first((UChar)0x0061);
913     UnicodeString second((UChar)0x0062);
914     UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules());
915 
916     delete col1;
917 
918     // Try using the cloned collators after deleting the original data
919     res = col2->compare(first, second, status);
920     if(res != UCOL_LESS) {
921         errln("a should be less then b after tailoring");
922     }
923     if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) {
924         errln(UnicodeString("English rule difference. ")
925             + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules());
926     }
927     res = col3->compare(first, second, status);
928     if(res != UCOL_LESS) {
929         errln("a should be less then b after tailoring");
930     }
931     if (col3->getRules() != copiedEnglishRules) {
932         errln(UnicodeString("English rule difference. ")
933             + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules());
934     }
935 
936     delete col2;
937     delete col3;
938 }
939 
940 void
TestCompare()941 CollationAPITest::TestCompare(/* char* par */)
942 {
943     logln("The compare tests begin : ");
944     Collator *col = 0;
945     UErrorCode success = U_ZERO_ERROR;
946     col = Collator::createInstance(Locale::getEnglish(), success);
947     if (U_FAILURE(success)) {
948         errcheckln(success, "Default collation creation failed. - %s", u_errorName(success));
949         return;
950     }
951     UnicodeString test1("Abcda"), test2("abcda");
952     logln("Use tertiary comparison level testing ....");
953 
954     doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\"");
955     doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
956     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\"");
957 
958     col->setStrength(Collator::SECONDARY);
959     logln("Use secondary comparison level testing ....");
960 
961     doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
962     doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
963     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
964 
965     col->setStrength(Collator::PRIMARY);
966     logln("Use primary comparison level testing ....");
967 
968     doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
969     doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
970     doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\"");
971 
972     // Test different APIs
973     const UChar* t1 = test1.getBuffer();
974     int32_t t1Len = test1.length();
975     const UChar* t2 = test2.getBuffer();
976     int32_t t2Len = test2.length();
977 
978     doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem");
979     doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem");
980     doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem");
981     doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem");
982     doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem");
983     doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem");
984 
985     col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success);
986     doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem");
987     doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem");
988     doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem");
989     doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem");
990     doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem");
991     doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem");
992 
993 
994 
995     logln("The compare tests end.");
996     delete col;
997 }
998 
999 void
TestGetAll()1000 CollationAPITest::TestGetAll(/* char* par */)
1001 {
1002     int32_t count1, count2;
1003     UErrorCode status = U_ZERO_ERROR;
1004 
1005     logln("Trying Collator::getAvailableLocales(int&)");
1006 
1007     const Locale* list = Collator::getAvailableLocales(count1);
1008     for (int32_t i = 0; i < count1; ++i) {
1009         UnicodeString dispName;
1010         logln(UnicodeString("Locale name: ")
1011             + UnicodeString(list[i].getName())
1012             + UnicodeString(" , the display name is : ")
1013             + UnicodeString(list[i].getDisplayName(dispName)));
1014     }
1015 
1016     if (count1 == 0 || list == NULL) {
1017         dataerrln("getAvailableLocales(int&) returned an empty list");
1018     }
1019 
1020     logln("Trying Collator::getAvailableLocales()");
1021     StringEnumeration* localeEnum = Collator::getAvailableLocales();
1022     const UnicodeString* locStr;
1023     const char *locCStr;
1024     count2 = 0;
1025 
1026     if (localeEnum == NULL) {
1027         dataerrln("getAvailableLocales() returned NULL");
1028         return;
1029     }
1030 
1031     while ((locStr = localeEnum->snext(status)) != NULL)
1032     {
1033         logln(UnicodeString("Locale name is: ") + *locStr);
1034         count2++;
1035     }
1036     if (count1 != count2) {
1037         errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2);
1038     }
1039 
1040     logln("Trying Collator::getAvailableLocales() clone");
1041     count1 = 0;
1042     StringEnumeration* localeEnum2 = localeEnum->clone();
1043     localeEnum2->reset(status);
1044     while ((locCStr = localeEnum2->next(NULL, status)) != NULL)
1045     {
1046         logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr));
1047         count1++;
1048     }
1049     if (count1 != count2) {
1050         errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2);
1051     }
1052     if (localeEnum->count(status) != count1) {
1053         errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1);
1054     }
1055     delete localeEnum;
1056     delete localeEnum2;
1057 }
1058 
TestSortKey()1059 void CollationAPITest::TestSortKey()
1060 {
1061     UErrorCode status = U_ZERO_ERROR;
1062     /*
1063     this is supposed to open default date format, but later on it treats
1064     it like it is "en_US"
1065     - very bad if you try to run the tests on machine where default
1066       locale is NOT "en_US"
1067     */
1068     Collator *col = Collator::createInstance(Locale::getEnglish(), status);
1069     if (U_FAILURE(status)) {
1070         errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status));
1071         return;
1072     }
1073 
1074     if (col->getStrength() != Collator::TERTIARY)
1075     {
1076         errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1077     }
1078 
1079     /* Need to use identical strength */
1080     col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status);
1081 
1082     UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
1083           test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
1084           test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
1085 
1086     uint8_t sortkey1[64];
1087     uint8_t sortkey2[64];
1088     uint8_t sortkey3[64];
1089 
1090     logln("Use tertiary comparison level testing ....\n");
1091 
1092     CollationKey key1;
1093     col->getCollationKey(test1, u_strlen(test1), key1, status);
1094 
1095     CollationKey key2;
1096     col->getCollationKey(test2, u_strlen(test2), key2, status);
1097 
1098     CollationKey key3;
1099     col->getCollationKey(test3, u_strlen(test3), key3, status);
1100 
1101     doAssert(key1.compareTo(key2) == Collator::GREATER,
1102         "Result should be \"Abcda\" > \"abcda\"");
1103     doAssert(key2.compareTo(key1) == Collator::LESS,
1104         "Result should be \"abcda\" < \"Abcda\"");
1105     doAssert(key2.compareTo(key3) == Collator::EQUAL,
1106         "Result should be \"abcda\" ==  \"abcda\"");
1107 
1108     // Clone the key2 sortkey for later.
1109     int32_t keylength = 0;
1110     const uint8_t *key2primary_alias = key2.getByteArray(keylength);
1111     LocalArray<uint8_t> key2primary(new uint8_t[keylength]);
1112     memcpy(key2primary.getAlias(), key2primary_alias, keylength);
1113 
1114     col->getSortKey(test1, sortkey1, 64);
1115     col->getSortKey(test2, sortkey2, 64);
1116     col->getSortKey(test3, sortkey3, 64);
1117 
1118     const uint8_t *tempkey = key1.getByteArray(keylength);
1119     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1120         "Test1 string should have the same collation key and sort key");
1121     tempkey = key2.getByteArray(keylength);
1122     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1123         "Test2 string should have the same collation key and sort key");
1124     tempkey = key3.getByteArray(keylength);
1125     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1126         "Test3 string should have the same collation key and sort key");
1127 
1128     col->getSortKey(test1, 5, sortkey1, 64);
1129     col->getSortKey(test2, 5, sortkey2, 64);
1130     col->getSortKey(test3, 5, sortkey3, 64);
1131 
1132     tempkey = key1.getByteArray(keylength);
1133     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1134         "Test1 string should have the same collation key and sort key");
1135     tempkey = key2.getByteArray(keylength);
1136     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1137         "Test2 string should have the same collation key and sort key");
1138     tempkey = key3.getByteArray(keylength);
1139     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1140         "Test3 string should have the same collation key and sort key");
1141 
1142     UnicodeString strtest1(test1);
1143     col->getSortKey(strtest1, sortkey1, 64);
1144     UnicodeString strtest2(test2);
1145     col->getSortKey(strtest2, sortkey2, 64);
1146     UnicodeString strtest3(test3);
1147     col->getSortKey(strtest3, sortkey3, 64);
1148 
1149     tempkey = key1.getByteArray(keylength);
1150     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1151         "Test1 string should have the same collation key and sort key");
1152     tempkey = key2.getByteArray(keylength);
1153     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1154         "Test2 string should have the same collation key and sort key");
1155     tempkey = key3.getByteArray(keylength);
1156     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1157         "Test3 string should have the same collation key and sort key");
1158 
1159     logln("Use secondary comparision level testing ...\n");
1160     col->setStrength(Collator::SECONDARY);
1161 
1162     col->getCollationKey(test1, u_strlen(test1), key1, status);
1163     col->getCollationKey(test2, u_strlen(test2), key2, status);
1164     col->getCollationKey(test3, u_strlen(test3), key3, status);
1165 
1166     doAssert(key1.compareTo(key2) == Collator::EQUAL,
1167         "Result should be \"Abcda\" == \"abcda\"");
1168     doAssert(key2.compareTo(key3) == Collator::EQUAL,
1169         "Result should be \"abcda\" ==  \"abcda\"");
1170 
1171     tempkey = key2.getByteArray(keylength);
1172     doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0,
1173              "Binary format for 'abcda' sortkey different for secondary strength!");
1174 
1175     col->getSortKey(test1, sortkey1, 64);
1176     col->getSortKey(test2, sortkey2, 64);
1177     col->getSortKey(test3, sortkey3, 64);
1178 
1179     tempkey = key1.getByteArray(keylength);
1180     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1181         "Test1 string should have the same collation key and sort key");
1182     tempkey = key2.getByteArray(keylength);
1183     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1184         "Test2 string should have the same collation key and sort key");
1185     tempkey = key3.getByteArray(keylength);
1186     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1187         "Test3 string should have the same collation key and sort key");
1188 
1189     col->getSortKey(test1, 5, sortkey1, 64);
1190     col->getSortKey(test2, 5, sortkey2, 64);
1191     col->getSortKey(test3, 5, sortkey3, 64);
1192 
1193     tempkey = key1.getByteArray(keylength);
1194     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1195         "Test1 string should have the same collation key and sort key");
1196     tempkey = key2.getByteArray(keylength);
1197     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1198         "Test2 string should have the same collation key and sort key");
1199     tempkey = key3.getByteArray(keylength);
1200     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1201         "Test3 string should have the same collation key and sort key");
1202 
1203     col->getSortKey(strtest1, sortkey1, 64);
1204     col->getSortKey(strtest2, sortkey2, 64);
1205     col->getSortKey(strtest3, sortkey3, 64);
1206 
1207     tempkey = key1.getByteArray(keylength);
1208     doAssert(memcmp(tempkey, sortkey1, keylength) == 0,
1209         "Test1 string should have the same collation key and sort key");
1210     tempkey = key2.getByteArray(keylength);
1211     doAssert(memcmp(tempkey, sortkey2, keylength) == 0,
1212         "Test2 string should have the same collation key and sort key");
1213     tempkey = key3.getByteArray(keylength);
1214     doAssert(memcmp(tempkey, sortkey3, keylength) == 0,
1215         "Test3 string should have the same collation key and sort key");
1216 
1217     logln("testing sortkey ends...");
1218     delete col;
1219 }
1220 
TestSortKeyOverflow()1221 void CollationAPITest::TestSortKeyOverflow() {
1222     IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()");
1223     LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode));
1224     if (errorCode.logDataIfFailureAndReset("Collator::createInstance(English) failed")) {
1225         return;
1226     }
1227     col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
1228     UChar i_and_phi[] = { 0x438, 0x3c6 };  // Cyrillic small i & Greek small phi.
1229     // The sort key should be 6 bytes:
1230     // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator,
1231     // 2 bytes for the Greek phi, and 1 byte for the NUL terminator.
1232     uint8_t sortKey[12];
1233     int32_t length = col->getSortKey(i_and_phi, 2, sortKey, UPRV_LENGTHOF(sortKey));
1234     uint8_t sortKey2[12];
1235     for (int32_t capacity = 0; capacity < length; ++capacity) {
1236         uprv_memset(sortKey2, 2, UPRV_LENGTHOF(sortKey2));
1237         int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity);
1238         if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) {
1239             errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity);
1240         } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) {
1241             errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity);
1242         }
1243     }
1244 
1245     // Now try to break getCollationKey().
1246     // Internally, it always starts with a large stack buffer.
1247     // Since we cannot control the initial capacity, we throw an increasing number
1248     // of characters at it, with the problematic part at the end.
1249     const int32_t longCapacity = 2000;
1250     // Each 'a' in the prefix should result in one primary sort key byte.
1251     // For i_and_phi we expect 6 bytes, then the NUL terminator.
1252     const int32_t maxPrefixLength = longCapacity - 6 - 1;
1253     LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]);
1254     UnicodeString s(FALSE, i_and_phi, 2);
1255     for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) {
1256         length = col->getSortKey(s, longSortKey.getAlias(), longCapacity);
1257         CollationKey collKey;
1258         col->getCollationKey(s, collKey, errorCode);
1259         int32_t collKeyLength;
1260         const uint8_t *collSortKey = collKey.getByteArray(collKeyLength);
1261         if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) {
1262             errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength);
1263         }
1264 
1265         // Insert an 'a' to match ++prefixLength.
1266         s.insert(prefixLength, (UChar)0x61);
1267     }
1268 }
1269 
TestMaxExpansion()1270 void CollationAPITest::TestMaxExpansion()
1271 {
1272     UErrorCode          status = U_ZERO_ERROR;
1273     UChar               ch     = 0;
1274     UChar32             unassigned = 0xEFFFD;
1275     uint32_t            sorder = 0;
1276     uint32_t            temporder = 0;
1277 
1278     UnicodeString rule("&a < ab < c/aba < d < z < ch");
1279     RuleBasedCollator coll(rule, status);
1280     if(U_FAILURE(status)) {
1281       errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1282       return;
1283     }
1284     UnicodeString str(ch);
1285     CollationElementIterator *iter =
1286                                   coll.createCollationElementIterator(str);
1287 
1288     while (ch < 0xFFFF && U_SUCCESS(status)) {
1289         int      count = 1;
1290         uint32_t order;
1291         int32_t  size = 0;
1292 
1293         ch ++;
1294 
1295         str.setCharAt(0, ch);
1296         iter->setText(str, status);
1297         order = iter->previous(status);
1298 
1299         /* thai management */
1300         if (order == 0)
1301             order = iter->previous(status);
1302 
1303         while (U_SUCCESS(status) && iter->previous(status) != CollationElementIterator::NULLORDER) {
1304             count ++;
1305         }
1306 
1307         size = coll.getMaxExpansion(order);
1308         if (U_FAILURE(status) || size < count) {
1309             errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1310                   ch, size, count);
1311         }
1312     }
1313 
1314     /* testing for exact max expansion */
1315     int32_t size;
1316     ch = 0;
1317     while (ch < 0x61) {
1318         uint32_t order;
1319         str.setCharAt(0, ch);
1320         iter->setText(str, status);
1321         order = iter->previous(status);
1322         size  = coll.getMaxExpansion(order);
1323         if (U_FAILURE(status) || size != 1) {
1324             errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1325                   ch, size, 1);
1326         }
1327         ch ++;
1328     }
1329 
1330     ch = 0x63;
1331     str.setTo(ch);
1332     iter->setText(str, status);
1333     temporder = iter->previous(status);
1334     size = coll.getMaxExpansion(temporder);
1335     if (U_FAILURE(status) || size != 3) {
1336         errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1337               ch, temporder, size, 3);
1338     }
1339 
1340     ch = 0x64;
1341     str.setTo(ch);
1342     iter->setText(str, status);
1343     temporder = iter->previous(status);
1344     size = coll.getMaxExpansion(temporder);
1345     if (U_FAILURE(status) || size != 1) {
1346         errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1347               ch, temporder, size, 1);
1348     }
1349 
1350     str.setTo(unassigned);
1351     iter->setText(str, status);
1352     sorder = iter->previous(status);
1353     size = coll.getMaxExpansion(sorder);
1354     if (U_FAILURE(status) || size != 2) {
1355         errln("Failure at supplementary codepoints, maximum expansion count %d < %d",
1356               size, 2);
1357     }
1358 
1359     /* testing jamo */
1360     ch = 0x1165;
1361     str.setTo(ch);
1362     iter->setText(str, status);
1363     temporder = iter->previous(status);
1364     size = coll.getMaxExpansion(temporder);
1365     if (U_FAILURE(status) || size > 3) {
1366         errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1367               ch, size, 3);
1368     }
1369 
1370     delete iter;
1371 
1372     /* testing special jamo &a<\u1160 */
1373     rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071");
1374 
1375     RuleBasedCollator jamocoll(rule, status);
1376     iter = jamocoll.createCollationElementIterator(str);
1377     temporder = iter->previous(status);
1378     size = iter->getMaxExpansion(temporder);
1379     if (U_FAILURE(status) || size != 6) {
1380         errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1381               ch, size, 5);
1382     }
1383 
1384     delete iter;
1385 }
1386 
TestDisplayName()1387 void CollationAPITest::TestDisplayName()
1388 {
1389     UErrorCode error = U_ZERO_ERROR;
1390     Collator *coll = Collator::createInstance("en_US", error);
1391     if (U_FAILURE(error)) {
1392         errcheckln(error, "Failure creating english collator - %s", u_errorName(error));
1393         return;
1394     }
1395     UnicodeString name;
1396     UnicodeString result;
1397     coll->getDisplayName(Locale::getCanadaFrench(), result);
1398     Locale::getCanadaFrench().getDisplayName(name);
1399     if (result.compare(name)) {
1400         errln("Failure getting the correct name for locale en_US");
1401     }
1402 
1403     coll->getDisplayName(Locale::getSimplifiedChinese(), result);
1404     Locale::getSimplifiedChinese().getDisplayName(name);
1405     if (result.compare(name)) {
1406         errln("Failure getting the correct name for locale zh_SG");
1407     }
1408     delete coll;
1409 }
1410 
TestAttribute()1411 void CollationAPITest::TestAttribute()
1412 {
1413     UErrorCode error = U_ZERO_ERROR;
1414     Collator *coll = Collator::createInstance(error);
1415 
1416     if (U_FAILURE(error)) {
1417         errcheckln(error, "Creation of default collator failed - %s", u_errorName(error));
1418         return;
1419     }
1420 
1421     coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error);
1422     if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF ||
1423         U_FAILURE(error)) {
1424         errln("Setting and retrieving of the french collation failed");
1425     }
1426 
1427     coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error);
1428     if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON ||
1429         U_FAILURE(error)) {
1430         errln("Setting and retrieving of the french collation failed");
1431     }
1432 
1433     coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error);
1434     if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED ||
1435         U_FAILURE(error)) {
1436         errln("Setting and retrieving of the alternate handling failed");
1437     }
1438 
1439     coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error);
1440     if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE ||
1441         U_FAILURE(error)) {
1442         errln("Setting and retrieving of the alternate handling failed");
1443     }
1444 
1445     coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error);
1446     if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST ||
1447         U_FAILURE(error)) {
1448         errln("Setting and retrieving of the case first attribute failed");
1449     }
1450 
1451     coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error);
1452     if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST ||
1453         U_FAILURE(error)) {
1454         errln("Setting and retrieving of the case first attribute failed");
1455     }
1456 
1457     coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error);
1458     if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON ||
1459         U_FAILURE(error)) {
1460         errln("Setting and retrieving of the case level attribute failed");
1461     }
1462 
1463     coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error);
1464     if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF ||
1465         U_FAILURE(error)) {
1466         errln("Setting and retrieving of the case level attribute failed");
1467     }
1468 
1469     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error);
1470     if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON ||
1471         U_FAILURE(error)) {
1472         errln("Setting and retrieving of the normalization on/off attribute failed");
1473     }
1474 
1475     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error);
1476     if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF ||
1477         U_FAILURE(error)) {
1478         errln("Setting and retrieving of the normalization on/off attribute failed");
1479     }
1480 
1481     coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error);
1482     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY ||
1483         U_FAILURE(error)) {
1484         errln("Setting and retrieving of the collation strength failed");
1485     }
1486 
1487     coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error);
1488     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY ||
1489         U_FAILURE(error)) {
1490         errln("Setting and retrieving of the collation strength failed");
1491     }
1492 
1493     coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error);
1494     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY ||
1495         U_FAILURE(error)) {
1496         errln("Setting and retrieving of the collation strength failed");
1497     }
1498 
1499     coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error);
1500     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY ||
1501         U_FAILURE(error)) {
1502         errln("Setting and retrieving of the collation strength failed");
1503     }
1504 
1505     coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error);
1506     if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL ||
1507         U_FAILURE(error)) {
1508         errln("Setting and retrieving of the collation strength failed");
1509     }
1510 
1511     delete coll;
1512 }
1513 
TestVariableTopSetting()1514 void CollationAPITest::TestVariableTopSetting() {
1515   UErrorCode status = U_ZERO_ERROR;
1516 
1517   UChar vt[256] = { 0 };
1518 
1519   // Use the root collator, not the default collator.
1520   // This test fails with en_US_POSIX which tailors the dollar sign after 'A'.
1521   Collator *coll = Collator::createInstance(Locale::getRoot(), status);
1522   if(U_FAILURE(status)) {
1523     delete coll;
1524     errcheckln(status, "Collator creation failed with error %s", u_errorName(status));
1525     return;
1526   }
1527 
1528   uint32_t oldVarTop = coll->getVariableTop(status);
1529 
1530   // ICU 53+: The character must be in a supported reordering group,
1531   // and the variable top is pinned to the end of that group.
1532   vt[0] = 0x0041;
1533 
1534   (void)coll->setVariableTop(vt, 1, status);
1535   if(status != U_ILLEGAL_ARGUMENT_ERROR) {
1536     errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status));
1537   }
1538 
1539   status = U_ZERO_ERROR;
1540   vt[0] = 0x24;  // dollar sign (currency symbol)
1541   uint32_t newVarTop = coll->setVariableTop(vt, 1, status);
1542   if(U_FAILURE(status)) {
1543     errln("setVariableTop(dollar sign) failed: %s", u_errorName(status));
1544     return;
1545   }
1546   if(newVarTop != coll->getVariableTop(status)) {
1547     errln("setVariableTop(dollar sign) != following getVariableTop()");
1548   }
1549 
1550   UnicodeString dollar((UChar)0x24);
1551   UnicodeString euro((UChar)0x20AC);
1552   uint32_t newVarTop2 = coll->setVariableTop(euro, status);
1553   assertEquals("setVariableTop(Euro sign) == following getVariableTop()",
1554                (int64_t)newVarTop2, (int64_t)coll->getVariableTop(status));
1555   assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)",
1556                (int64_t)newVarTop2, (int64_t)newVarTop);
1557 
1558   coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
1559   assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), dollar));
1560   assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), euro));
1561   assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(dollar, UnicodeString((UChar)0x30)));
1562 
1563   coll->setVariableTop(oldVarTop, status);
1564 
1565   uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status);
1566 
1567   if(newVarTop != newerVarTop) {
1568     errln("Didn't set vartop properly from UnicodeString!\n");
1569   }
1570 
1571   delete coll;
1572 
1573 }
1574 
TestMaxVariable()1575 void CollationAPITest::TestMaxVariable() {
1576   UErrorCode errorCode = U_ZERO_ERROR;
1577   LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), errorCode));
1578   if(U_FAILURE(errorCode)) {
1579     errcheckln(errorCode, "Collator creation failed with error %s", u_errorName(errorCode));
1580     return;
1581   }
1582 
1583   (void)coll->setMaxVariable(UCOL_REORDER_CODE_OTHERS, errorCode);
1584   if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
1585     errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode));
1586   }
1587 
1588   errorCode = U_ZERO_ERROR;
1589   (void)coll->setMaxVariable(UCOL_REORDER_CODE_CURRENCY, errorCode);
1590 
1591   if(UCOL_REORDER_CODE_CURRENCY != coll->getMaxVariable()) {
1592     errln("setMaxVariable(currency) != following getMaxVariable()");
1593   }
1594 
1595   coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, errorCode);
1596   assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x24)));
1597   assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x20AC)));
1598   assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(UnicodeString((UChar)0x24), UnicodeString((UChar)0x30)));
1599 }
1600 
TestGetLocale()1601 void CollationAPITest::TestGetLocale() {
1602   UErrorCode status = U_ZERO_ERROR;
1603   const char *rules = "&a<x<y<z";
1604   UChar rlz[256] = {0};
1605 
1606   Collator *coll = Collator::createInstance("root", status);
1607   if(U_FAILURE(status)) {
1608     dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status));
1609     return;
1610   }
1611   Locale locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1612   if(locale != Locale::getRoot()) {
1613     errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); "
1614           "getLocale().getName() = \"%s\"",
1615           locale.getName());
1616   }
1617   delete coll;
1618 
1619   coll = Collator::createInstance("", status);
1620   if(U_FAILURE(status)) {
1621     dataerrln("Failed to open collator for \"\" with %s", u_errorName(status));
1622     return;
1623   }
1624   locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1625   if(locale != Locale::getRoot()) {
1626     errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); "
1627           "getLocale().getName() = \"%s\"",
1628           locale.getName());
1629   }
1630   delete coll;
1631 
1632   int32_t i = 0;
1633 
1634   static const struct {
1635     const char* requestedLocale;
1636     const char* validLocale;
1637     const char* actualLocale;
1638   } testStruct[] = {
1639     // Note: Locale::getRoot().getName() == "" not "root".
1640     { "de_DE", "de", "" },
1641     { "sr_RS", "sr_Cyrl_RS", "sr" },
1642     { "en_US_CALIFORNIA", "en_US", "" },
1643     { "fr_FR_NONEXISTANT", "fr", "" },
1644     // pinyin is the default, therefore suppressed.
1645     { "zh_CN", "zh_Hans_CN", "zh" },
1646     // zh_Hant has default=stroke but the data is in zh.
1647     { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" },
1648     { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" },
1649     { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" }
1650   };
1651 
1652   u_unescape(rules, rlz, 256);
1653 
1654   /* test opening collators for different locales */
1655   for(i = 0; i<UPRV_LENGTHOF(testStruct); i++) {
1656     status = U_ZERO_ERROR;
1657     coll = Collator::createInstance(testStruct[i].requestedLocale, status);
1658     if(U_FAILURE(status)) {
1659       errln("Failed to open collator for %s with %s", testStruct[i].requestedLocale, u_errorName(status));
1660       delete coll;
1661       continue;
1662     }
1663     // The requested locale may be the same as the valid locale,
1664     // or may not be supported at all. See ticket #10477.
1665     locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1666     if(U_SUCCESS(status) &&
1667         locale != testStruct[i].requestedLocale && locale != testStruct[i].validLocale) {
1668       errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s",
1669             testStruct[i].requestedLocale,
1670             testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1671     }
1672     status = U_ZERO_ERROR;
1673     locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1674     if(locale != testStruct[i].validLocale) {
1675       errln("[Coll %s]: Error in valid locale, expected %s, got %s",
1676             testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName());
1677     }
1678     locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1679     if(locale != testStruct[i].actualLocale) {
1680       errln("[Coll %s]: Error in actual locale, expected %s, got %s",
1681             testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName());
1682     }
1683     // If we open a collator for the actual locale, we should get an equivalent one again.
1684     LocalPointer<Collator> coll2(Collator::createInstance(locale, status));
1685     if(U_FAILURE(status)) {
1686       errln("Failed to open collator for actual locale \"%s\" with %s",
1687             locale.getName(), u_errorName(status));
1688     } else {
1689       Locale actual2 = coll2->getLocale(ULOC_ACTUAL_LOCALE, status);
1690       if(actual2 != locale) {
1691         errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"",
1692               locale.getName(), actual2.getName());
1693       }
1694       if(*coll2 != *coll) {
1695         errln("[Coll actual \"%s\"]: Got different collator than before", locale.getName());
1696       }
1697     }
1698     delete coll;
1699   }
1700 
1701   /* completely non-existent locale for collator should get a root collator */
1702   {
1703     LocalPointer<Collator> coll(Collator::createInstance("blahaha", status));
1704     if(U_FAILURE(status)) {
1705       errln("Failed to open collator with %s", u_errorName(status));
1706       return;
1707     }
1708     Locale valid = coll->getLocale(ULOC_VALID_LOCALE, status);
1709     const char *name = valid.getName();
1710     if(*name != 0 && strcmp(name, "root") != 0) {
1711       errln("Valid locale for nonexisting-locale collator is \"%s\" not root", name);
1712     }
1713     Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1714     name = actual.getName();
1715     if(*name != 0 && strcmp(name, "root") != 0) {
1716       errln("Actual locale for nonexisting-locale collator is \"%s\" not root", name);
1717     }
1718   }
1719 
1720 
1721 
1722   /* collator instantiated from rules should have all three locales NULL */
1723   coll = new RuleBasedCollator(rlz, status);
1724   locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status);
1725   if(U_SUCCESS(status) && !locale.isBogus()) {
1726     errln("For collator instantiated from rules, requested locale %s is not bogus", locale.getName());
1727   }
1728   status = U_ZERO_ERROR;
1729   locale = coll->getLocale(ULOC_VALID_LOCALE, status);
1730   if(!locale.isBogus()) {
1731     errln("For collator instantiated from rules, valid locale %s is not bogus", locale.getName());
1732   }
1733   locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status);
1734   if(!locale.isBogus()) {
1735     errln("For collator instantiated from rules, actual locale %s is not bogus", locale.getName());
1736   }
1737   delete coll;
1738 }
1739 
1740 struct teststruct {
1741     const char *original;
1742     uint8_t key[256];
1743 };
1744 
1745 
1746 
1747 U_CDECL_BEGIN
1748 static int U_CALLCONV
compare_teststruct(const void * string1,const void * string2)1749 compare_teststruct(const void *string1, const void *string2) {
1750   return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key));
1751 }
1752 U_CDECL_END
1753 
1754 
TestBounds(void)1755 void CollationAPITest::TestBounds(void) {
1756     UErrorCode status = U_ZERO_ERROR;
1757 
1758     Collator *coll = Collator::createInstance(Locale("sh"), status);
1759     if(U_FAILURE(status)) {
1760       delete coll;
1761       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1762       return;
1763     }
1764 
1765     uint8_t sortkey[512], lower[512], upper[512];
1766     UChar buffer[512];
1767 
1768     static const char * const test[] = {
1769         "John Smith",
1770         "JOHN SMITH",
1771         "john SMITH",
1772         "j\\u00F6hn sm\\u00EFth",
1773         "J\\u00F6hn Sm\\u00EFth",
1774         "J\\u00D6HN SM\\u00CFTH",
1775         "john smithsonian",
1776         "John Smithsonian"
1777     };
1778 
1779     struct teststruct tests[] = {
1780         {"\\u010CAKI MIHALJ", {0}},
1781         {"\\u010CAKI MIHALJ", {0}},
1782         {"\\u010CAKI PIRO\\u0160KA", {0}},
1783         {"\\u010CABAI ANDRIJA", {0}},
1784         {"\\u010CABAI LAJO\\u0160", {0}},
1785         {"\\u010CABAI MARIJA", {0}},
1786         {"\\u010CABAI STEVAN", {0}},
1787         {"\\u010CABAI STEVAN", {0}},
1788         {"\\u010CABARKAPA BRANKO", {0}},
1789         {"\\u010CABARKAPA MILENKO", {0}},
1790         {"\\u010CABARKAPA MIROSLAV", {0}},
1791         {"\\u010CABARKAPA SIMO", {0}},
1792         {"\\u010CABARKAPA STANKO", {0}},
1793         {"\\u010CABARKAPA TAMARA", {0}},
1794         {"\\u010CABARKAPA TOMA\\u0160", {0}},
1795         {"\\u010CABDARI\\u0106 NIKOLA", {0}},
1796         {"\\u010CABDARI\\u0106 ZORICA", {0}},
1797         {"\\u010CABI NANDOR", {0}},
1798         {"\\u010CABOVI\\u0106 MILAN", {0}},
1799         {"\\u010CABRADI AGNEZIJA", {0}},
1800         {"\\u010CABRADI IVAN", {0}},
1801         {"\\u010CABRADI JELENA", {0}},
1802         {"\\u010CABRADI LJUBICA", {0}},
1803         {"\\u010CABRADI STEVAN", {0}},
1804         {"\\u010CABRDA MARTIN", {0}},
1805         {"\\u010CABRILO BOGDAN", {0}},
1806         {"\\u010CABRILO BRANISLAV", {0}},
1807         {"\\u010CABRILO LAZAR", {0}},
1808         {"\\u010CABRILO LJUBICA", {0}},
1809         {"\\u010CABRILO SPASOJA", {0}},
1810         {"\\u010CADE\\u0160 ZDENKA", {0}},
1811         {"\\u010CADESKI BLAGOJE", {0}},
1812         {"\\u010CADOVSKI VLADIMIR", {0}},
1813         {"\\u010CAGLJEVI\\u0106 TOMA", {0}},
1814         {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}},
1815         {"\\u010CAJA VANKA", {0}},
1816         {"\\u010CAJI\\u0106 BOGOLJUB", {0}},
1817         {"\\u010CAJI\\u0106 BORISLAV", {0}},
1818         {"\\u010CAJI\\u0106 RADOSLAV", {0}},
1819         {"\\u010CAK\\u0160IRAN MILADIN", {0}},
1820         {"\\u010CAKAN EUGEN", {0}},
1821         {"\\u010CAKAN EVGENIJE", {0}},
1822         {"\\u010CAKAN IVAN", {0}},
1823         {"\\u010CAKAN JULIJAN", {0}},
1824         {"\\u010CAKAN MIHAJLO", {0}},
1825         {"\\u010CAKAN STEVAN", {0}},
1826         {"\\u010CAKAN VLADIMIR", {0}},
1827         {"\\u010CAKAN VLADIMIR", {0}},
1828         {"\\u010CAKAN VLADIMIR", {0}},
1829         {"\\u010CAKARA ANA", {0}},
1830         {"\\u010CAKAREVI\\u0106 MOMIR", {0}},
1831         {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}},
1832         {"\\u010CAKI \\u0160ANDOR", {0}},
1833         {"\\u010CAKI AMALIJA", {0}},
1834         {"\\u010CAKI ANDRA\\u0160", {0}},
1835         {"\\u010CAKI LADISLAV", {0}},
1836         {"\\u010CAKI LAJO\\u0160", {0}},
1837         {"\\u010CAKI LASLO", {0}}
1838     };
1839 
1840 
1841 
1842     int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0;
1843     int32_t arraySize = UPRV_LENGTHOF(tests);
1844 
1845     (void)lowerSize;  // Suppress unused variable warnings.
1846     (void)upperSize;
1847 
1848     for(i = 0; i<arraySize; i++) {
1849         buffSize = u_unescape(tests[i].original, buffer, 512);
1850         skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512);
1851     }
1852 
1853     qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct);
1854 
1855     for(i = 0; i < arraySize-1; i++) {
1856         for(j = i+1; j < arraySize; j++) {
1857             lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status);
1858             upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status);
1859             for(k = i; k <= j; k++) {
1860                 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) {
1861                     errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original);
1862                 }
1863                 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) {
1864                     errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original);
1865                 }
1866             }
1867         }
1868     }
1869 
1870 
1871     for(i = 0; i<UPRV_LENGTHOF(test); i++) {
1872         buffSize = u_unescape(test[i], buffer, 512);
1873         skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1874         lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1875         upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1876         for(j = i+1; j<UPRV_LENGTHOF(test); j++) {
1877             buffSize = u_unescape(test[j], buffer, 512);
1878             skSize = coll->getSortKey(buffer, buffSize, sortkey, 512);
1879             if(strcmp((const char *)lower, (const char *)sortkey) > 0) {
1880                 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1881             }
1882             if(strcmp((const char *)upper, (const char *)sortkey) <= 0) {
1883                 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]);
1884             }
1885         }
1886     }
1887     delete coll;
1888 }
1889 
1890 
TestGetTailoredSet()1891 void CollationAPITest::TestGetTailoredSet()
1892 {
1893   struct {
1894     const char *rules;
1895     const char *tests[20];
1896     int32_t testsize;
1897   } setTest[] = {
1898     { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1899     { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1900   };
1901 
1902   int32_t i = 0, j = 0;
1903   UErrorCode status = U_ZERO_ERROR;
1904 
1905   UnicodeString buff;
1906   UnicodeSet *set = NULL;
1907 
1908   for(i = 0; i < UPRV_LENGTHOF(setTest); i++) {
1909     buff = UnicodeString(setTest[i].rules, -1, US_INV).unescape();
1910     RuleBasedCollator coll(buff, status);
1911     if(U_SUCCESS(status)) {
1912       set = coll.getTailoredSet(status);
1913       if(set->size() < setTest[i].testsize) {
1914         errln("Tailored set size smaller (%d) than expected (%d)", set->size(), setTest[i].testsize);
1915       }
1916       for(j = 0; j < setTest[i].testsize; j++) {
1917         buff = UnicodeString(setTest[i].tests[j], -1, US_INV).unescape();
1918         if(!set->contains(buff)) {
1919           errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]);
1920         }
1921       }
1922       delete set;
1923     } else {
1924       errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status));
1925     }
1926   }
1927 }
1928 
TestUClassID()1929 void CollationAPITest::TestUClassID()
1930 {
1931     char id = *((char *)RuleBasedCollator::getStaticClassID());
1932     if (id != 0) {
1933         errln("Static class id for RuleBasedCollator should be 0");
1934     }
1935     UErrorCode status = U_ZERO_ERROR;
1936     RuleBasedCollator *coll
1937         = (RuleBasedCollator *)Collator::createInstance(status);
1938     if(U_FAILURE(status)) {
1939       delete coll;
1940       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
1941       return;
1942     }
1943     id = *((char *)coll->getDynamicClassID());
1944     if (id != 0) {
1945         errln("Dynamic class id for RuleBasedCollator should be 0");
1946     }
1947     id = *((char *)CollationKey::getStaticClassID());
1948     if (id != 0) {
1949         errln("Static class id for CollationKey should be 0");
1950     }
1951     CollationKey *key = new CollationKey();
1952     id = *((char *)key->getDynamicClassID());
1953     if (id != 0) {
1954         errln("Dynamic class id for CollationKey should be 0");
1955     }
1956     id = *((char *)CollationElementIterator::getStaticClassID());
1957     if (id != 0) {
1958         errln("Static class id for CollationElementIterator should be 0");
1959     }
1960     UnicodeString str("testing");
1961     CollationElementIterator *iter = coll->createCollationElementIterator(str);
1962     id = *((char *)iter->getDynamicClassID());
1963     if (id != 0) {
1964         errln("Dynamic class id for CollationElementIterator should be 0");
1965     }
1966     delete key;
1967     delete iter;
1968     delete coll;
1969 }
1970 
1971 class TestCollator  : public Collator
1972 {
1973 public:
1974     virtual Collator* clone(void) const;
1975 
1976     using Collator::compare;
1977 
1978     virtual UCollationResult compare(const UnicodeString& source,
1979                                       const UnicodeString& target,
1980                                       UErrorCode& status) const;
1981     virtual UCollationResult compare(const UnicodeString& source,
1982                                       const UnicodeString& target,
1983                                       int32_t length,
1984                                       UErrorCode& status) const;
1985     virtual UCollationResult compare(const UChar* source,
1986                                       int32_t sourceLength,
1987                                       const UChar* target,
1988                                       int32_t targetLength,
1989                                       UErrorCode& status) const;
1990     virtual CollationKey& getCollationKey(const UnicodeString&  source,
1991                                           CollationKey& key,
1992                                           UErrorCode& status) const;
1993     virtual CollationKey& getCollationKey(const UChar*source,
1994                                           int32_t sourceLength,
1995                                           CollationKey& key,
1996                                           UErrorCode& status) const;
1997     virtual int32_t hashCode(void) const;
1998     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
1999     virtual ECollationStrength getStrength(void) const;
2000     virtual void setStrength(ECollationStrength newStrength);
2001     virtual UClassID getDynamicClassID(void) const;
2002     virtual void getVersion(UVersionInfo info) const;
2003     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
2004                               UErrorCode &status);
2005     virtual UColAttributeValue getAttribute(UColAttribute attr,
2006                                             UErrorCode &status) const;
2007     virtual uint32_t setVariableTop(const UChar *varTop, int32_t len,
2008                                     UErrorCode &status);
2009     virtual uint32_t setVariableTop(const UnicodeString &varTop,
2010                                     UErrorCode &status);
2011     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
2012     virtual uint32_t getVariableTop(UErrorCode &status) const;
2013     virtual int32_t getSortKey(const UnicodeString& source,
2014                             uint8_t* result,
2015                             int32_t resultLength) const;
2016     virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
2017                              uint8_t*result, int32_t resultLength) const;
2018     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
2019     virtual UBool operator==(const Collator& other) const;
2020     // Collator::operator!= calls !Collator::operator== which works for all subclasses.
2021     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
TestCollator()2022     TestCollator() : Collator() {};
TestCollator(UCollationStrength collationStrength,UNormalizationMode decompositionMode)2023     TestCollator(UCollationStrength collationStrength,
2024            UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {};
2025 };
2026 
operator ==(const Collator & other) const2027 inline UBool TestCollator::operator==(const Collator& other) const {
2028     // TestCollator has no fields, so we test for identity.
2029     return this == &other;
2030 
2031     // Normally, subclasses should do something like the following:
2032     //    if (this == &other) { return TRUE; }
2033     //    if (!Collator::operator==(other)) { return FALSE; }  // not the same class
2034     //
2035     //    const TestCollator &o = (const TestCollator&)other;
2036     //    (compare this vs. o's subclass fields)
2037 }
2038 
clone() const2039 Collator* TestCollator::clone() const
2040 {
2041     return new TestCollator();
2042 }
2043 
compare(const UnicodeString & source,const UnicodeString & target,UErrorCode & status) const2044 UCollationResult TestCollator::compare(const UnicodeString& source,
2045                                         const UnicodeString& target,
2046                                         UErrorCode& status) const
2047 {
2048   if(U_SUCCESS(status)) {
2049     return UCollationResult(source.compare(target));
2050   } else {
2051     return UCOL_EQUAL;
2052   }
2053 }
2054 
compare(const UnicodeString & source,const UnicodeString & target,int32_t length,UErrorCode & status) const2055 UCollationResult TestCollator::compare(const UnicodeString& source,
2056                                         const UnicodeString& target,
2057                                         int32_t length,
2058                                         UErrorCode& status) const
2059 {
2060   if(U_SUCCESS(status)) {
2061     return UCollationResult(source.compare(0, length, target));
2062   } else {
2063     return UCOL_EQUAL;
2064   }
2065 }
2066 
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength,UErrorCode & status) const2067 UCollationResult TestCollator::compare(const UChar* source,
2068                                         int32_t sourceLength,
2069                                         const UChar* target,
2070                                         int32_t targetLength,
2071                                         UErrorCode& status) const
2072 {
2073     UnicodeString s(source, sourceLength);
2074     UnicodeString t(target, targetLength);
2075     return compare(s, t, status);
2076 }
2077 
getCollationKey(const UnicodeString & source,CollationKey & key,UErrorCode & status) const2078 CollationKey& TestCollator::getCollationKey(const UnicodeString& source,
2079                                             CollationKey& key,
2080                                             UErrorCode& status) const
2081 {
2082     char temp[100];
2083     int length = 100;
2084     length = source.extract(temp, length, NULL, status);
2085     temp[length] = 0;
2086     CollationKey tempkey((uint8_t*)temp, length);
2087     key = tempkey;
2088     return key;
2089 }
2090 
getCollationKey(const UChar * source,int32_t sourceLength,CollationKey & key,UErrorCode & status) const2091 CollationKey& TestCollator::getCollationKey(const UChar*source,
2092                                           int32_t sourceLength,
2093                                           CollationKey& key,
2094                                           UErrorCode& status) const
2095 {
2096     //s tack allocation used since collationkey does not keep the unicodestring
2097     UnicodeString str(source, sourceLength);
2098     return getCollationKey(str, key, status);
2099 }
2100 
getSortKey(const UnicodeString & source,uint8_t * result,int32_t resultLength) const2101 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result,
2102                                  int32_t resultLength) const
2103 {
2104     UErrorCode status = U_ZERO_ERROR;
2105     int32_t length = source.extract((char *)result, resultLength, NULL,
2106                                     status);
2107     result[length] = 0;
2108     return length;
2109 }
2110 
getSortKey(const UChar * source,int32_t sourceLength,uint8_t * result,int32_t resultLength) const2111 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength,
2112                                  uint8_t*result, int32_t resultLength) const
2113 {
2114     UnicodeString str(source, sourceLength);
2115     return getSortKey(str, result, resultLength);
2116 }
2117 
hashCode() const2118 int32_t TestCollator::hashCode() const
2119 {
2120     return 0;
2121 }
2122 
getLocale(ULocDataLocaleType type,UErrorCode & status) const2123 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const
2124 {
2125     // api not used, this is to make the compiler happy
2126     if (U_FAILURE(status)) {
2127         (void)type;
2128     }
2129     return NULL;
2130 }
2131 
getStrength() const2132 Collator::ECollationStrength TestCollator::getStrength() const
2133 {
2134     return TERTIARY;
2135 }
2136 
setStrength(Collator::ECollationStrength newStrength)2137 void TestCollator::setStrength(Collator::ECollationStrength newStrength)
2138 {
2139     // api not used, this is to make the compiler happy
2140     (void)newStrength;
2141 }
2142 
getDynamicClassID(void) const2143 UClassID TestCollator::getDynamicClassID(void) const
2144 {
2145     return 0;
2146 }
2147 
getVersion(UVersionInfo info) const2148 void TestCollator::getVersion(UVersionInfo info) const
2149 {
2150     // api not used, this is to make the compiler happy
2151     memset(info, 0, U_MAX_VERSION_LENGTH);
2152 }
2153 
setAttribute(UColAttribute,UColAttributeValue,UErrorCode &)2154 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/,
2155                                 UErrorCode & /*status*/)
2156 {
2157 }
2158 
getAttribute(UColAttribute attr,UErrorCode & status) const2159 UColAttributeValue TestCollator::getAttribute(UColAttribute attr,
2160                                               UErrorCode &status) const
2161 {
2162     // api not used, this is to make the compiler happy
2163     if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) {
2164         return UCOL_OFF;
2165     }
2166     return UCOL_DEFAULT;
2167 }
2168 
setVariableTop(const UChar * varTop,int32_t len,UErrorCode & status)2169 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len,
2170                                   UErrorCode &status)
2171 {
2172     // api not used, this is to make the compiler happy
2173     if (U_SUCCESS(status) && (varTop == 0 || len < -1)) {
2174         status = U_ILLEGAL_ARGUMENT_ERROR;
2175     }
2176     return 0;
2177 }
2178 
setVariableTop(const UnicodeString & varTop,UErrorCode & status)2179 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop,
2180                                   UErrorCode &status)
2181 {
2182     // api not used, this is to make the compiler happy
2183     if (U_SUCCESS(status) && varTop.length() == 0) {
2184         status = U_ILLEGAL_ARGUMENT_ERROR;
2185     }
2186     return 0;
2187 }
2188 
setVariableTop(uint32_t varTop,UErrorCode & status)2189 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status)
2190 {
2191     // api not used, this is to make the compiler happy
2192     if (U_SUCCESS(status) && varTop == 0) {
2193         status = U_ILLEGAL_ARGUMENT_ERROR;
2194     }
2195 }
2196 
getVariableTop(UErrorCode & status) const2197 uint32_t TestCollator::getVariableTop(UErrorCode &status) const
2198 {
2199 
2200     // api not used, this is to make the compiler happy
2201     if (U_SUCCESS(status)) {
2202         return 0;
2203     }
2204     return (uint32_t)(0xFFFFFFFFu);
2205 }
2206 
getTailoredSet(UErrorCode & status) const2207 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const
2208 {
2209     return Collator::getTailoredSet(status);
2210 }
2211 
setLocales(const Locale & requestedLocale,const Locale & validLocale,const Locale & actualLocale)2212 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale)
2213 {
2214     Collator::setLocales(requestedLocale, validLocale, actualLocale);
2215 }
2216 
2217 
TestSubclass()2218 void CollationAPITest::TestSubclass()
2219 {
2220     TestCollator col1;
2221     TestCollator col2;
2222     doAssert(col1 != col2, "2 instances of TestCollator should be different");
2223     if (col1.hashCode() != col2.hashCode()) {
2224         errln("Every TestCollator has the same hashcode");
2225     }
2226     UnicodeString abc("abc", 3);
2227     UnicodeString bcd("bcd", 3);
2228     if (col1.compare(abc, bcd) != abc.compare(bcd)) {
2229         errln("TestCollator compare should be the same as the default "
2230               "string comparison");
2231     }
2232     CollationKey key;
2233     UErrorCode status = U_ZERO_ERROR;
2234     col1.getCollationKey(abc, key, status);
2235     int32_t length = 0;
2236     const char* bytes = (const char *)key.getByteArray(length);
2237     UnicodeString keyarray(bytes, length, NULL, status);
2238     if (abc != keyarray) {
2239         errln("TestCollator collationkey API is returning wrong values");
2240     }
2241 
2242     UnicodeSet expectedset(0, 0x10FFFF);
2243     UnicodeSet *defaultset = col1.getTailoredSet(status);
2244     if (!defaultset->containsAll(expectedset)
2245         || !expectedset.containsAll(*defaultset)) {
2246         errln("Error: expected default tailoring to be 0 to 0x10ffff");
2247     }
2248     delete defaultset;
2249 
2250     // use base class implementation
2251     Locale loc1 = Locale::getGermany();
2252     Locale loc2 = Locale::getFrance();
2253     col1.setLocales(loc1, loc2, loc2); // default implementation has no effect
2254 
2255     UnicodeString displayName;
2256     col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale
2257 
2258     TestCollator col3(UCOL_TERTIARY, UNORM_NONE);
2259     UnicodeString a("a");
2260     UnicodeString b("b");
2261     Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b));
2262     if(col1.compare(a, b) != result) {
2263       errln("Collator doesn't give default result");
2264     }
2265     if(col1.compare(a, b, 1) != result) {
2266       errln("Collator doesn't give default result");
2267     }
2268     if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) {
2269       errln("Collator doesn't give default result");
2270     }
2271 }
2272 
TestNULLCharTailoring()2273 void CollationAPITest::TestNULLCharTailoring()
2274 {
2275     UErrorCode status = U_ZERO_ERROR;
2276     UChar buf[256] = {0};
2277     int32_t len = u_unescape("&a < '\\u0000'", buf, 256);
2278     UnicodeString first((UChar)0x0061);
2279     UnicodeString second((UChar)0);
2280     RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status);
2281     if(U_FAILURE(status)) {
2282         delete coll;
2283         errcheckln(status, "Failed to open collator - %s", u_errorName(status));
2284         return;
2285     }
2286     UCollationResult res = coll->compare(first, second, status);
2287     if(res != UCOL_LESS) {
2288         errln("a should be less then NULL after tailoring");
2289     }
2290     delete coll;
2291 }
2292 
TestClone()2293 void CollationAPITest::TestClone() {
2294     logln("\ninit c0");
2295     UErrorCode status = U_ZERO_ERROR;
2296     RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status);
2297 
2298     if (U_FAILURE(status)) {
2299         errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status));
2300         return;
2301     }
2302 
2303     c0->setStrength(Collator::TERTIARY);
2304     dump("c0", c0, status);
2305 
2306     logln("\ninit c1");
2307     RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status);
2308     c1->setStrength(Collator::TERTIARY);
2309     UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status);
2310     if(val == UCOL_LOWER_FIRST){
2311         c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2312     }else{
2313         c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2314     }
2315     dump("c0", c0, status);
2316     dump("c1", c1, status);
2317 
2318     logln("\ninit c2");
2319     RuleBasedCollator* c2 = (RuleBasedCollator*)c1->clone();
2320     val = c2->getAttribute(UCOL_CASE_FIRST, status);
2321     if(val == UCOL_LOWER_FIRST){
2322         c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
2323     }else{
2324         c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
2325     }
2326     if(U_FAILURE(status)){
2327         errln("set and get attributes of collator failed. %s\n", u_errorName(status));
2328         return;
2329     }
2330     dump("c0", c0, status);
2331     dump("c1", c1, status);
2332     dump("c2", c2, status);
2333     if(*c1 == *c2){
2334         errln("The cloned objects refer to same data");
2335     }
2336     delete c0;
2337     delete c1;
2338     delete c2;
2339 }
2340 
TestCloneBinary()2341 void CollationAPITest::TestCloneBinary() {
2342     IcuTestErrorCode errorCode(*this, "TestCloneBinary");
2343     LocalPointer<Collator> root(Collator::createInstance(Locale::getRoot(), errorCode));
2344     LocalPointer<Collator> coll(Collator::createInstance("de@collation=phonebook", errorCode));
2345     if(errorCode.logDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) {
2346         return;
2347     }
2348     RuleBasedCollator *rbRoot = dynamic_cast<RuleBasedCollator *>(root.getAlias());
2349     RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias());
2350     if(rbRoot == NULL || rbc == NULL) {
2351         infoln("root or de@collation=phonebook is not a RuleBasedCollator");
2352         return;
2353     }
2354     rbc->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode);
2355     UnicodeString uUmlaut((UChar)0xfc);
2356     UnicodeString ue = UNICODE_STRING_SIMPLE("ue");
2357     assertEquals("rbc/primary: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc->compare(uUmlaut, ue, errorCode));
2358     uint8_t bin[25000];
2359     int32_t binLength = rbc->cloneBinary(bin, UPRV_LENGTHOF(bin), errorCode);
2360     if(errorCode.logDataIfFailureAndReset("rbc->cloneBinary()")) {
2361         return;
2362     }
2363     logln("rbc->cloneBinary() -> %d bytes", (int)binLength);
2364 
2365     RuleBasedCollator rbc2(bin, binLength, rbRoot, errorCode);
2366     if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) {
2367         return;
2368     }
2369     assertEquals("rbc2.strength==primary", (int32_t)UCOL_PRIMARY, rbc2.getAttribute(UCOL_STRENGTH, errorCode));
2370     assertEquals("rbc2: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc2.compare(uUmlaut, ue, errorCode));
2371     assertTrue("rbc==rbc2", *rbc == rbc2);
2372     uint8_t bin2[25000];
2373     int32_t bin2Length = rbc2.cloneBinary(bin2, UPRV_LENGTHOF(bin2), errorCode);
2374     assertEquals("len(rbc binary)==len(rbc2 binary)", binLength, bin2Length);
2375     assertTrue("rbc binary==rbc2 binary", binLength == bin2Length && memcmp(bin, bin2, binLength) == 0);
2376 
2377     RuleBasedCollator rbc3(bin, -1, rbRoot, errorCode);
2378     if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary, length<0)")) {
2379         return;
2380     }
2381     assertEquals("rbc3.strength==primary", (int32_t)UCOL_PRIMARY, rbc3.getAttribute(UCOL_STRENGTH, errorCode));
2382     assertEquals("rbc3: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc3.compare(uUmlaut, ue, errorCode));
2383     assertTrue("rbc==rbc3", *rbc == rbc3);
2384 }
2385 
TestIterNumeric()2386 void CollationAPITest::TestIterNumeric() {
2387     // Regression test for ticket #9915.
2388     // The collation code sometimes masked the continuation marker away
2389     // but later tested the result for isContinuation().
2390     // This test case failed because the third bytes of the computed numeric-collation primaries
2391     // were permutated with the script reordering table.
2392     // It should have been possible to reproduce this with the root collator
2393     // and characters with appropriate 3-byte primary weights.
2394     // The effectiveness of this test depends completely on the collation elements
2395     // and on the implementation code.
2396     IcuTestErrorCode errorCode(*this, "TestIterNumeric");
2397     RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode);
2398     if(errorCode.logDataIfFailureAndReset("RuleBasedCollator constructor")) {
2399         return;
2400     }
2401     coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode);
2402     UCharIterator iter40, iter72;
2403     uiter_setUTF8(&iter40, "\x34\x30", 2);
2404     uiter_setUTF8(&iter72, "\x37\x32", 2);
2405     UCollationResult result = coll.compare(iter40, iter72, errorCode);
2406     assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result);
2407 }
2408 
TestBadKeywords()2409 void CollationAPITest::TestBadKeywords() {
2410     // Test locale IDs with errors.
2411     // Valid locale IDs are tested via data-driven tests.
2412     UErrorCode errorCode = U_ZERO_ERROR;
2413     Locale bogusLocale(Locale::getRoot());
2414     bogusLocale.setToBogus();
2415     LocalPointer<Collator> coll(Collator::createInstance(bogusLocale, errorCode));
2416     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2417         errln("Collator::createInstance(bogus locale) did not fail as expected - %s",
2418               u_errorName(errorCode));
2419     }
2420 
2421     // Unknown value.
2422     const char *localeID = "it-u-ks-xyz";
2423     errorCode = U_ZERO_ERROR;
2424     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2425     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2426         dataerrln("Collator::createInstance(%s) did not fail as expected - %s",
2427               localeID, u_errorName(errorCode));
2428     }
2429 
2430     // Unsupported attributes.
2431     localeID = "it@colHiraganaQuaternary=true";
2432     errorCode = U_ZERO_ERROR;
2433     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2434     if(errorCode != U_UNSUPPORTED_ERROR) {
2435         if (errorCode == U_FILE_ACCESS_ERROR) {
2436             dataerrln("Collator::createInstance(it@colHiraganaQuaternary=true) : %s", u_errorName(errorCode));
2437         } else {
2438             errln("Collator::createInstance(%s) did not fail as expected - %s",
2439                   localeID, u_errorName(errorCode));
2440         }
2441     }
2442 
2443     localeID = "it-u-vt-u24";
2444     errorCode = U_ZERO_ERROR;
2445     coll.adoptInstead(Collator::createInstance(localeID, errorCode));
2446     if(errorCode != U_UNSUPPORTED_ERROR) {
2447         if (errorCode == U_ILLEGAL_ARGUMENT_ERROR || errorCode == U_FILE_ACCESS_ERROR) {
2448             dataerrln("Collator::createInstance(it-u-vt-u24) : %s", u_errorName(errorCode));
2449         } else {
2450            errln("Collator::createInstance(%s) did not fail as expected - %s",
2451                   localeID, u_errorName(errorCode));
2452         }
2453     }
2454 }
2455 
dump(UnicodeString msg,RuleBasedCollator * c,UErrorCode & status)2456  void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) {
2457     const char* bigone = "One";
2458     const char* littleone = "one";
2459 
2460     logln(msg + " " + c->compare(bigone, littleone) +
2461                         " s: " + c->getStrength() +
2462                         " u: " + c->getAttribute(UCOL_CASE_FIRST, status));
2463 }
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)2464 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
2465 {
2466     if (exec) logln("TestSuite CollationAPITest: ");
2467     TESTCASE_AUTO_BEGIN;
2468     TESTCASE_AUTO(TestProperty);
2469     TESTCASE_AUTO(TestOperators);
2470     TESTCASE_AUTO(TestDuplicate);
2471     TESTCASE_AUTO(TestCompare);
2472     TESTCASE_AUTO(TestHashCode);
2473     TESTCASE_AUTO(TestCollationKey);
2474     TESTCASE_AUTO(TestElemIter);
2475     TESTCASE_AUTO(TestGetAll);
2476     TESTCASE_AUTO(TestRuleBasedColl);
2477     TESTCASE_AUTO(TestDecomposition);
2478     TESTCASE_AUTO(TestSafeClone);
2479     TESTCASE_AUTO(TestSortKey);
2480     TESTCASE_AUTO(TestSortKeyOverflow);
2481     TESTCASE_AUTO(TestMaxExpansion);
2482     TESTCASE_AUTO(TestDisplayName);
2483     TESTCASE_AUTO(TestAttribute);
2484     TESTCASE_AUTO(TestVariableTopSetting);
2485     TESTCASE_AUTO(TestMaxVariable);
2486     TESTCASE_AUTO(TestRules);
2487     TESTCASE_AUTO(TestGetLocale);
2488     TESTCASE_AUTO(TestBounds);
2489     TESTCASE_AUTO(TestGetTailoredSet);
2490     TESTCASE_AUTO(TestUClassID);
2491     TESTCASE_AUTO(TestSubclass);
2492     TESTCASE_AUTO(TestNULLCharTailoring);
2493     TESTCASE_AUTO(TestClone);
2494     TESTCASE_AUTO(TestCloneBinary);
2495     TESTCASE_AUTO(TestIterNumeric);
2496     TESTCASE_AUTO(TestBadKeywords);
2497     TESTCASE_AUTO_END;
2498 }
2499 
2500 #endif /* #if !UCONFIG_NO_COLLATION */
2501