• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 #include "unicode/utypes.h"
10 
11 #if !UCONFIG_NO_COLLATION
12 
13 #include "unicode/coll.h"
14 #include "unicode/tblcoll.h"
15 #include "unicode/unistr.h"
16 #include "unicode/sortkey.h"
17 #include "itercoll.h"
18 #include "unicode/schriter.h"
19 #include "unicode/chariter.h"
20 #include "unicode/uchar.h"
21 #include "cmemory.h"
22 
23 static UErrorCode status = U_ZERO_ERROR;
24 
CollationIteratorTest()25 CollationIteratorTest::CollationIteratorTest()
26  : test1("What subset of all possible test cases?", ""),
27    test2("has the highest probability of detecting", "")
28 {
29     en_us = dynamic_cast<RuleBasedCollator*>(Collator::createInstance(Locale::getUS(), status));
30     if(U_FAILURE(status)) {
31       delete en_us;
32       en_us = nullptr;
33       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
34       return;
35     }
36 
37 }
38 
~CollationIteratorTest()39 CollationIteratorTest::~CollationIteratorTest()
40 {
41     delete en_us;
42 }
43 
44 /**
45  * Test for CollationElementIterator previous and next for the whole set of
46  * unicode characters.
47  */
TestUnicodeChar()48 void CollationIteratorTest::TestUnicodeChar()
49 {
50     CollationElementIterator *iter;
51     char16_t codepoint;
52     UnicodeString source;
53 
54     for (codepoint = 1; codepoint < 0xFFFE;)
55     {
56       source.remove();
57 
58       while (codepoint % 0xFF != 0)
59       {
60         if (u_isdefined(codepoint))
61           source += codepoint;
62         codepoint ++;
63       }
64 
65       if (u_isdefined(codepoint))
66         source += codepoint;
67 
68       if (codepoint != 0xFFFF)
69         codepoint ++;
70 
71       iter = en_us->createCollationElementIterator(source);
72       /* A basic test to see if it's working at all */
73       backAndForth(*iter);
74       delete iter;
75     }
76 }
77 
78 /**
79  * Test for CollationElementIterator.previous()
80  *
81  * @bug 4108758 - Make sure it works with contracting characters
82  *
83  */
TestPrevious()84 void CollationIteratorTest::TestPrevious(/* char* par */)
85 {
86     UErrorCode status = U_ZERO_ERROR;
87     CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
88 
89     // A basic test to see if it's working at all
90     backAndForth(*iter);
91     delete iter;
92 
93     // Test with a contracting character sequence
94     UnicodeString source;
95     RuleBasedCollator *c1 = nullptr;
96     c1 = new RuleBasedCollator(
97         UnicodeString("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH"), status);
98 
99     if (c1 == nullptr || U_FAILURE(status))
100     {
101         errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
102         delete c1;
103         return;
104     }
105 
106     source = "abchdcba";
107     iter = c1->createCollationElementIterator(source);
108     backAndForth(*iter);
109     delete iter;
110     delete c1;
111 
112     // Test with an expanding character sequence
113     RuleBasedCollator *c2 = nullptr;
114     c2 = new RuleBasedCollator(UnicodeString("&a < b < c/abd < d"), status);
115 
116     if (c2 == nullptr || U_FAILURE(status))
117     {
118         errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
119         delete c2;
120         return;
121     }
122 
123     source = "abcd";
124     iter = c2->createCollationElementIterator(source);
125     backAndForth(*iter);
126     delete iter;
127     delete c2;
128 
129     // Now try both
130     RuleBasedCollator *c3 = nullptr;
131     c3 = new RuleBasedCollator(UnicodeString("&a < b < c/aba < d < z < ch"), status);
132 
133     if (c3 == nullptr || U_FAILURE(status))
134     {
135         errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
136         delete c3;
137         return;
138     }
139 
140     source = "abcdbchdc";
141     iter = c3->createCollationElementIterator(source);
142     backAndForth(*iter);
143     delete iter;
144     delete c3;
145 
146     status=U_ZERO_ERROR;
147     source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
148 
149     Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status);
150     if(U_FAILURE(status)){
151         errln("Couldn't create a collator");
152     }
153     iter = (dynamic_cast<RuleBasedCollator*>(c4))->createCollationElementIterator(source);
154     backAndForth(*iter);
155     delete iter;
156     delete c4;
157 
158     source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
159     Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status);
160 
161     iter = (dynamic_cast<RuleBasedCollator*>(c5))->createCollationElementIterator(source);
162     if(U_FAILURE(status)){
163         errln("Couldn't create Japanese collator\n");
164     }
165     backAndForth(*iter);
166     delete iter;
167     delete c5;
168 }
169 
170 /**
171  * Test for getOffset() and setOffset()
172  */
TestOffset()173 void CollationIteratorTest::TestOffset(/* char* par */)
174 {
175     CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
176     UErrorCode status = U_ZERO_ERROR;
177     // testing boundaries
178     iter->setOffset(0, status);
179     if (U_FAILURE(status) || iter->previous(status) != CollationElementIterator::NULLORDER) {
180         errln("Error: After setting offset to 0, we should be at the end "
181                 "of the backwards iteration");
182     }
183     iter->setOffset(test1.length(), status);
184     if (U_FAILURE(status) || iter->next(status) != CollationElementIterator::NULLORDER) {
185         errln("Error: After setting offset to end of the string, we should "
186                 "be at the end of the backwards iteration");
187     }
188 
189     // Run all the way through the iterator, then get the offset
190     int32_t orderLength = 0;
191     Order *orders = getOrders(*iter, orderLength);
192 
193     int32_t offset = iter->getOffset();
194 
195     if (offset != test1.length())
196     {
197         UnicodeString msg1("offset at end != length: ");
198         UnicodeString msg2(" vs ");
199 
200         errln(msg1 + offset + msg2 + test1.length());
201     }
202 
203     // Now set the offset back to the beginning and see if it works
204     CollationElementIterator *pristine = en_us->createCollationElementIterator(test1);
205 
206     iter->setOffset(0, status);
207 
208     if (U_FAILURE(status))
209     {
210         errln("setOffset failed.");
211     }
212     else
213     {
214         assertEqual(*iter, *pristine);
215     }
216 
217     delete pristine;
218     delete[] orders;
219     delete iter;
220 
221     // setting offset in the middle of a contraction
222     UnicodeString contraction = "change";
223     status = U_ZERO_ERROR;
224     RuleBasedCollator tailored("& a < ch", status);
225     if (U_FAILURE(status)) {
226         errln("Error: in creation of Spanish collator - %s", u_errorName(status));
227         return;
228     }
229     iter = tailored.createCollationElementIterator(contraction);
230     Order *order = getOrders(*iter, orderLength);
231     iter->setOffset(1, status); // sets offset in the middle of ch
232     int32_t order2Length = 0;
233     Order *order2 = getOrders(*iter, order2Length);
234     if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
235         errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
236     }
237     delete[] order;
238     delete[] order2;
239     delete iter;
240     contraction = "peache";
241     iter = tailored.createCollationElementIterator(contraction);
242     iter->setOffset(3, status);
243     order = getOrders(*iter, orderLength);
244     iter->setOffset(4, status); // sets offset in the middle of ch
245     order2 = getOrders(*iter, order2Length);
246     if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
247         errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
248     }
249     delete[] order;
250     delete[] order2;
251     delete iter;
252     // setting offset in the middle of a surrogate pair
253     UnicodeString surrogate = UNICODE_STRING_SIMPLE("\\ud800\\udc00str").unescape();
254     iter = tailored.createCollationElementIterator(surrogate);
255     order = getOrders(*iter, orderLength);
256     iter->setOffset(1, status); // sets offset in the middle of surrogate
257     order2 = getOrders(*iter, order2Length);
258     if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
259         errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
260     }
261     delete[] order;
262     delete[] order2;
263     delete iter;
264     surrogate = UNICODE_STRING_SIMPLE("simple\\ud800\\udc00str").unescape();
265     iter = tailored.createCollationElementIterator(surrogate);
266     iter->setOffset(6, status);
267     order = getOrders(*iter, orderLength);
268     iter->setOffset(7, status); // sets offset in the middle of surrogate
269     order2 = getOrders(*iter, order2Length);
270     if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
271         errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
272     }
273     delete[] order;
274     delete[] order2;
275     delete iter;
276     // TODO: try iterating halfway through a messy string.
277 }
278 
279 /**
280  * Test for setText()
281  */
TestSetText()282 void CollationIteratorTest::TestSetText(/* char* par */)
283 {
284     CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1);
285     CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2);
286     UErrorCode status = U_ZERO_ERROR;
287 
288     // Run through the second iterator just to exercise it
289     int32_t c = iter2->next(status);
290     int32_t i = 0;
291 
292     while ( ++i < 10 && c != CollationElementIterator::NULLORDER)
293     {
294         if (U_FAILURE(status))
295         {
296             errln("iter2->next() returned an error.");
297             delete iter2;
298             delete iter1;
299         }
300 
301         c = iter2->next(status);
302     }
303 
304     // Now set it to point to the same string as the first iterator
305     iter2->setText(test1, status);
306 
307     if (U_FAILURE(status))
308     {
309         errln("call to iter2->setText(test1) failed.");
310     }
311     else
312     {
313         assertEqual(*iter1, *iter2);
314     }
315     iter1->reset();
316     //now use the overloaded setText(CharacterIterator&, UErrorCode) function to set the text
317     CharacterIterator* chariter = new StringCharacterIterator(test1);
318     iter2->setText(*chariter, status);
319     if (U_FAILURE(status))
320     {
321         errln("call to iter2->setText(chariter(test1)) failed.");
322     }
323     else
324     {
325         assertEqual(*iter1, *iter2);
326     }
327 
328     // test for an empty string
329     UnicodeString empty("");
330     iter1->setText(empty, status);
331     if (U_FAILURE(status)
332         || iter1->next(status) != static_cast<int32_t>(CollationElementIterator::NULLORDER)) {
333         errln("Empty string should have no CEs.");
334     }
335     (dynamic_cast<StringCharacterIterator*>(chariter))->setText(empty);
336     iter1->setText(*chariter, status);
337     if (U_FAILURE(status)
338         || iter1->next(status) != static_cast<int32_t>(CollationElementIterator::NULLORDER)) {
339         errln("Empty string should have no CEs.");
340     }
341     delete chariter;
342     delete iter2;
343     delete iter1;
344 }
345 
346 /** @bug 4108762
347  * Test for getMaxExpansion()
348  */
TestMaxExpansion()349 void CollationIteratorTest::TestMaxExpansion(/* char* par */)
350 {
351     UErrorCode          status = U_ZERO_ERROR;
352     UnicodeString rule("&a < ab < c/aba < d < z < ch");
353     RuleBasedCollator  *coll   = new RuleBasedCollator(rule, status);
354     char16_t            ch     = 0;
355     UnicodeString       str(ch);
356 
357     CollationElementIterator *iter   = coll->createCollationElementIterator(str);
358 
359     while (ch < 0xFFFF && U_SUCCESS(status)) {
360         int      count = 1;
361         uint32_t order;
362         ch ++;
363         UnicodeString str(ch);
364         iter->setText(str, status);
365         order = iter->previous(status);
366 
367         /* thai management */
368         if (CollationElementIterator::isIgnorable(order))
369             order = iter->previous(status);
370 
371         while (U_SUCCESS(status)
372             && iter->previous(status) != static_cast<int32_t>(CollationElementIterator::NULLORDER))
373         {
374             count ++;
375         }
376 
377         if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) {
378             errln("Failure at codepoint %d, maximum expansion count < %d\n",
379                 ch, count);
380         }
381     }
382 
383     delete iter;
384     delete coll;
385 }
386 
387 /*
388  * @bug 4157299
389  */
TestClearBuffers()390 void CollationIteratorTest::TestClearBuffers(/* char* par */)
391 {
392     UErrorCode status = U_ZERO_ERROR;
393     RuleBasedCollator* c = new RuleBasedCollator(UnicodeString("&a < b < c & ab = d"), status);
394 
395     if (c == nullptr || U_FAILURE(status))
396     {
397         errln("Couldn't create a RuleBasedCollator.");
398         delete c;
399         return;
400     }
401 
402     UnicodeString source("abcd");
403     CollationElementIterator *i = c->createCollationElementIterator(source);
404     int32_t e0 = i->next(status);    // save the first collation element
405 
406     if (U_FAILURE(status))
407     {
408         errln("call to i->next() failed. err=%s", u_errorName(status));
409     }
410     else
411     {
412         i->setOffset(3, status);        // go to the expanding character
413 
414         if (U_FAILURE(status))
415         {
416             errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
417         }
418         else
419         {
420             i->next(status);                // but only use up half of it
421 
422             if (U_FAILURE(status))
423             {
424                 errln("call to i->next() failed. err=%s", u_errorName(status));
425             }
426             else
427             {
428                 i->setOffset(0, status);        // go back to the beginning
429 
430                 if (U_FAILURE(status))
431                 {
432                     errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
433                 }
434                 else
435                 {
436                     int32_t e = i->next(status);    // and get this one again
437 
438                     if (U_FAILURE(status))
439                     {
440                         errln("call to i->next() failed. err=%s", u_errorName(status));
441                     }
442                     else if (e != e0)
443                     {
444                         errln("got 0x%X, expected 0x%X", e, e0);
445                     }
446                 }
447             }
448         }
449     }
450 
451     delete i;
452     delete c;
453 }
454 
455 /**
456  * Testing the assignment operator
457  */
TestAssignment()458 void CollationIteratorTest::TestAssignment()
459 {
460     UErrorCode status = U_ZERO_ERROR;
461     RuleBasedCollator *coll =
462         dynamic_cast<RuleBasedCollator*>(Collator::createInstance(status));
463 
464     if (coll == nullptr || U_FAILURE(status))
465     {
466         errln("Couldn't create a default collator.");
467         return;
468     }
469 
470     UnicodeString source("abcd");
471     CollationElementIterator *iter1 =
472         coll->createCollationElementIterator(source);
473 
474     CollationElementIterator iter2 = *iter1;
475 
476     if (*iter1 != iter2) {
477         errln("Fail collation iterator assignment does not produce the same elements");
478     }
479 
480     CollationElementIterator iter3(*iter1);
481 
482     if (*iter1 != iter3) {
483         errln("Fail collation iterator copy constructor does not produce the same elements");
484     }
485 
486     source = CharsToUnicodeString("a\\u0300\\u0325");
487     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
488     CollationElementIterator *iter4
489                         = coll->createCollationElementIterator(source);
490     CollationElementIterator iter5(*iter4);
491     int32_t order4, order5;
492     if (*iter4 != iter5) {
493         errln("collation iterator assignment does not produce the same elements");
494     }
495     order4 = iter4->next(status);
496     if (U_FAILURE(status) || *iter4 == iter5) {
497         errln("collation iterator not equal");
498     }
499     order5 = iter5.next(status);
500     if (U_FAILURE(status) || *iter4 != iter5) {
501         errln("collation iterator equal");
502     }
503     order4 = iter4->next(status);
504     if (U_FAILURE(status) || *iter4 == iter5) {
505         errln("collation iterator not equal");
506     }
507     order5 = iter5.next(status);
508     if (U_FAILURE(status) || *iter4 != iter5) {
509         errln("collation iterator equal");
510     }
511     CollationElementIterator iter6(*iter4);
512     if (*iter4 != iter6) {
513         errln("collation iterator equal");
514     }
515     order4 = iter4->next(status);
516     if (U_FAILURE(status) || *iter4 == iter5) {
517         errln("collation iterator not equal");
518     }
519     order5 = iter5.next(status);
520     if (U_FAILURE(status) || *iter4 != iter5) {
521         errln("collation iterator equal");
522     }
523     if (!(order4 == CollationElementIterator::NULLORDER &&
524           order5 == CollationElementIterator::NULLORDER)) {
525         order4 = iter4->next(status);
526         if (U_FAILURE(status) || *iter4 == iter5) {
527             errln("collation iterator not equal");
528         }
529         order5 = iter5.next(status);
530         if (U_FAILURE(status) || *iter4 != iter5) {
531             errln("collation iterator equal");
532         }
533     }
534     delete iter1;
535     delete iter4;
536     delete coll;
537 }
538 
539 /**
540  * Testing the constructors
541  */
TestConstructors()542 void CollationIteratorTest::TestConstructors()
543 {
544     UErrorCode status = U_ZERO_ERROR;
545     RuleBasedCollator *coll =
546         dynamic_cast<RuleBasedCollator*>(Collator::createInstance(status));
547     if (coll == nullptr || U_FAILURE(status))
548     {
549         errln("Couldn't create a default collator.");
550         return;
551     }
552 
553     // testing protected constructor with character iterator as argument
554     StringCharacterIterator chariter(test1);
555     CollationElementIterator *iter1 =
556         coll->createCollationElementIterator(chariter);
557     if (U_FAILURE(status)) {
558         errln("Couldn't create collation element iterator with character iterator.");
559         return;
560     }
561     CollationElementIterator *iter2 =
562         coll->createCollationElementIterator(test1);
563 
564     // initially the 2 collation element iterators should be the same
565     if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
566         || *iter2 != *iter1) {
567         errln("CollationElementIterators constructed with the same string data should be the same at the start");
568     }
569     assertEqual(*iter1, *iter2);
570 
571     delete iter1;
572     delete iter2;
573 
574     // tests empty strings
575     UnicodeString empty("");
576     iter1 = coll->createCollationElementIterator(empty);
577     chariter.setText(empty);
578     iter2 = coll->createCollationElementIterator(chariter);
579     if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
580         || *iter2 != *iter1) {
581         errln("CollationElementIterators constructed with the same string data should be the same at the start");
582     }
583     if (iter1->next(status) != static_cast<int32_t>(CollationElementIterator::NULLORDER)) {
584         errln("Empty string should have no CEs.");
585     }
586     if (iter2->next(status) != static_cast<int32_t>(CollationElementIterator::NULLORDER)) {
587         errln("Empty string should have no CEs.");
588     }
589     delete iter1;
590     delete iter2;
591     delete coll;
592 }
593 
594 /**
595  * Testing the strength order
596  */
TestStrengthOrder()597 void CollationIteratorTest::TestStrengthOrder()
598 {
599     int order = 0x0123ABCD;
600 
601     UErrorCode status = U_ZERO_ERROR;
602     RuleBasedCollator *coll =
603         dynamic_cast<RuleBasedCollator*>(Collator::createInstance(status));
604     if (coll == nullptr || U_FAILURE(status))
605     {
606         errln("Couldn't create a default collator.");
607         return;
608     }
609 
610     coll->setStrength(Collator::PRIMARY);
611     CollationElementIterator *iter =
612         coll->createCollationElementIterator(test1);
613 
614     if (iter == nullptr) {
615         errln("Couldn't create a collation element iterator from default collator");
616         return;
617     }
618 
619     if (iter->strengthOrder(order) != 0x01230000) {
620         errln("Strength order for a primary strength collator should be the first 2 bytes");
621         return;
622     }
623 
624     coll->setStrength(Collator::SECONDARY);
625     if (iter->strengthOrder(order) != 0x0123AB00) {
626         errln("Strength order for a secondary strength collator should be the third byte");
627         return;
628     }
629 
630     coll->setStrength(Collator::TERTIARY);
631     if (iter->strengthOrder(order) != order) {
632         errln("Strength order for a tertiary strength collator should be the third byte");
633         return;
634     }
635     delete iter;
636     delete coll;
637 }
638 
639 /**
640  * Return a string containing all of the collation orders
641  * returned by calls to next on the specified iterator
642  */
orderString(CollationElementIterator & iter,UnicodeString & target)643 UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target)
644 {
645     int32_t order;
646     UErrorCode status = U_ZERO_ERROR;
647 
648     while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
649     {
650         target += "0x";
651         appendHex(order, 8, target);
652         target += " ";
653     }
654 
655     return target;
656 }
657 
assertEqual(CollationElementIterator & i1,CollationElementIterator & i2)658 void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
659 {
660     int32_t c1, c2, count = 0;
661     UErrorCode status = U_ZERO_ERROR;
662 
663     do
664     {
665         c1 = i1.next(status);
666         c2 = i2.next(status);
667 
668         if (c1 != c2)
669         {
670             errln("    %d: strength(0x%X) != strength(0x%X)", count, c1, c2);
671             break;
672         }
673 
674         count += 1;
675     }
676     while (c1 != CollationElementIterator::NULLORDER);
677 }
678 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)679 void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/)
680 {
681     if (exec)
682     {
683         logln("Collation Iteration Tests: ");
684     }
685 
686     if(en_us) {
687       switch (index)
688       {
689           case  0: name = "TestPrevious";      if (exec) TestPrevious(/* par */);     break;
690           case  1: name = "TestOffset";        if (exec) TestOffset(/* par */);       break;
691           case  2: name = "TestSetText";       if (exec) TestSetText(/* par */);      break;
692           case  3: name = "TestMaxExpansion";  if (exec) TestMaxExpansion(/* par */); break;
693           case  4: name = "TestClearBuffers";  if (exec) TestClearBuffers(/* par */); break;
694           case  5: name = "TestUnicodeChar";   if (exec) TestUnicodeChar(/* par */);  break;
695           case  6: name = "TestAssignment";    if (exec) TestAssignment(/* par */);    break;
696           case  7: name = "TestConstructors";  if (exec) TestConstructors(/* par */); break;
697           case  8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break;
698           default: name = ""; break;
699       }
700     } else {
701       dataerrln("Class iterator not instantiated");
702       name = "";
703     }
704 }
705 
706 #endif /* #if !UCONFIG_NO_COLLATION */
707