1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 #include "unicode/utypes.h"
10 
11 #if !UCONFIG_NO_COLLATION
12 
13 #include "unicode/coll.h"
14 #include "unicode/tblcoll.h"
15 #include "unicode/unistr.h"
16 #include "unicode/sortkey.h"
17 #include "g7coll.h"
18 #include "sfwdchit.h"
19 #include "cmemory.h"
20 
21 static const UChar testCases[][G7CollationTest::MAX_TOKEN_LEN] = {
22     {  0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
23         0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000},                    /* 9 */
24     { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000},                                                    /* 1 */
25     { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000},                                    /* 2 */
26     { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000},                           /* 3 */
27     { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 4 */
28     { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 5 */
29     { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000},                                                    /* 6 */
30     { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000},                                            /* 7 */
31     { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000},                                   /* 8 */
32     { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
33       0x0062  /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                                    /* 12 */
34     { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000},                                                    /* 10 */
35     { 0x0050  /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000},                                                    /* 11 */
36     { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
37         0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                /* 13 */
38     { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
39         0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000},  /* 0 */
40     {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000},                                                    /* 14 */
41     /* Additional tests */
42     { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 },                                 /* 15 */
43     { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 },                  /* 16 */
44     { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 },                                                    /* 17 */
45     { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 },                                 /* 18 */
46     { 0x003f /*'?'*/, 0x0000 },                                                                                /* 19 */
47     { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 },                  /* 20 */
48     { 0x0023 /*'#'*/, 0x0000 },                                                                                /* 21 */
49     { 0x0026 /*'&'*/, 0x0000 },                                                                                /* 22 */
50     {  0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
51                 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 24 */
52     { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
53                 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 23 */
54     { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000},                   /* 25 */
55     { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 27 */
56     { 0x0063 /*'c'*/, 0x006f  /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                                /* 28 */
57     { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 26 */
58     { 0x007a /*'z'*/, 0x0065  /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000}                    /* 29 */
59 };
60 
61 static const int32_t results[G7CollationTest::TESTLOCALES][G7CollationTest::TOTALTESTSET] = {
62     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */
63     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */
64     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */
65     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */
66     { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */
67     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */
68     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */
69     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */
70     /* new table collation with rules "& Z < p, P"  loop to FIXEDTESTSET */
71     { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 },
72     /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */
73     { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 },
74     /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&'  " loop to TOTALTESTSET */
75     { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 },
76     /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */  /* loop to TOTALTESTSET */
77     { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }
78 };
79 
~G7CollationTest()80 G7CollationTest::~G7CollationTest() {}
81 
TestG7Locales()82 void G7CollationTest::TestG7Locales(/* char* par */)
83 {
84     int32_t i;
85     const Locale locales[8] = {
86         Locale("en", "US", ""),
87         Locale("en", "GB", ""),
88         Locale("en", "CA", ""),
89         Locale("fr", "FR", ""),
90         Locale("fr", "CA", ""),
91         Locale("de", "DE", ""),
92         Locale("it", "IT", ""),
93         Locale("ja", "JP", "")
94     };
95 
96     for (i = 0; i < UPRV_LENGTHOF(locales); i++)
97     {
98         UnicodeString dispName;
99         UErrorCode status = U_ZERO_ERROR;
100 
101         const Locale &locale = locales[i];
102         LocalPointer<Collator> myCollation(Collator::createInstance(locale, status));
103         if(U_FAILURE(status)) {
104           errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
105           return;
106         }
107         myCollation->setStrength(Collator::QUATERNARY);
108         myCollation->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
109         if (U_FAILURE(status)) {
110             errln("Locale %s creation failed - %s", locale.getName(), u_errorName(status));
111             continue;
112         }
113 
114         const UnicodeString &rules = ((RuleBasedCollator*)myCollation.getAlias())->getRules();
115         if (rules.isEmpty() &&
116                 (locale == Locale::getCanadaFrench() || locale == Locale::getJapanese())) {
117             dataerrln("%s Collator missing rule string", locale.getName());
118             if (logKnownIssue("10671", "TestG7Locales does not test ignore-punctuation")) {
119                 continue;
120             }
121         } else {
122             status = U_ZERO_ERROR;
123             RuleBasedCollator *tblColl1 = new RuleBasedCollator(rules, status);
124             if (U_FAILURE(status)) {
125                 errln("Recreate %s collation failed - %s", locale.getName(), u_errorName(status));
126                 continue;
127             }
128             myCollation.adoptInstead(tblColl1);
129         }
130 
131         UnicodeString msg;
132 
133         msg += "Locale ";
134         msg += locales[i].getDisplayName(dispName);
135         msg += "tests start :";
136         logln(msg);
137 
138         int32_t j, n;
139         for (j = 0; j < FIXEDTESTSET; j++)
140         {
141             for (n = j+1; n < FIXEDTESTSET; n++)
142             {
143                 doTest(myCollation.getAlias(), testCases[results[i][j]], testCases[results[i][n]], Collator::LESS);
144             }
145         }
146     }
147 }
148 
TestDemo1()149 void G7CollationTest::TestDemo1(/* char* par */)
150 {
151     logln("Demo Test 1 : Create a new table collation with rules \"& Z < p, P\"");
152     UErrorCode status = U_ZERO_ERROR;
153     Collator *col = Collator::createInstance("en_US", status);
154     if(U_FAILURE(status)) {
155       delete col;
156       errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
157       return;
158     }
159     const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
160     UnicodeString newRules(" & Z < p, P");
161     newRules.insert(0, baseRules);
162     RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
163 
164     if (U_FAILURE(status))
165     {
166         errln( "Demo Test 1 Table Collation object creation failed.");
167         return;
168     }
169 
170     int32_t j, n;
171     for (j = 0; j < FIXEDTESTSET; j++)
172     {
173         for (n = j+1; n < FIXEDTESTSET; n++)
174         {
175             doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], Collator::LESS);
176         }
177     }
178 
179     delete myCollation;
180     delete col;
181 }
182 
TestDemo2()183 void G7CollationTest::TestDemo2(/* char* par */)
184 {
185     logln("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\"");
186     UErrorCode status = U_ZERO_ERROR;
187     Collator *col = Collator::createInstance("en_US", status);
188     if(U_FAILURE(status)) {
189       delete col;
190       errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
191       return;
192     }
193     const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
194     UnicodeString newRules("& C < ch , cH, Ch, CH");
195     newRules.insert(0, baseRules);
196     RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
197 
198     if (U_FAILURE(status))
199     {
200         errln("Demo Test 2 Table Collation object creation failed.");
201         return;
202     }
203 
204     int32_t j, n;
205     for (j = 0; j < TOTALTESTSET; j++)
206     {
207         for (n = j+1; n < TOTALTESTSET; n++)
208         {
209             doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], Collator::LESS);
210         }
211     }
212 
213     delete myCollation;
214     delete col;
215 }
216 
TestDemo3()217 void G7CollationTest::TestDemo3(/* char* par */)
218 {
219     logln("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\"");
220     UErrorCode status = U_ZERO_ERROR;
221     Collator *col = Collator::createInstance("en_US", status);
222     if(U_FAILURE(status)) {
223       errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
224       delete col;
225       return;
226     }
227     const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
228     UnicodeString newRules = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'";
229     newRules.insert(0, baseRules);
230     RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
231 
232     if (U_FAILURE(status))
233     {
234         errln("Demo Test 3 Table Collation object creation failed.");
235         return;
236     }
237 
238     int32_t j, n;
239     for (j = 0; j < TOTALTESTSET; j++)
240     {
241         for (n = j+1; n < TOTALTESTSET; n++)
242         {
243             doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], Collator::LESS);
244         }
245     }
246 
247     delete myCollation;
248     delete col;
249 }
250 
TestDemo4()251 void G7CollationTest::TestDemo4(/* char* par */)
252 {
253     logln("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \"");
254     UErrorCode status = U_ZERO_ERROR;
255     Collator *col = Collator::createInstance("en_US", status);
256     if(U_FAILURE(status)) {
257       delete col;
258       errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
259       return;
260     }
261 
262     const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
263     UnicodeString newRules = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' ";
264     newRules.insert(0, baseRules);
265     RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
266 
267     int32_t j, n;
268     for (j = 0; j < TOTALTESTSET; j++)
269     {
270         for (n = j+1; n < TOTALTESTSET; n++)
271         {
272             doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], Collator::LESS);
273         }
274     }
275 
276     delete myCollation;
277     delete col;
278 }
279 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)280 void G7CollationTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
281 {
282     if (exec) logln("TestSuite G7CollationTest: ");
283     switch (index) {
284         case 0: name = "TestG7Locales"; if (exec)   TestG7Locales(/* par */); break;
285         case 1: name = "TestDemo1"; if (exec)   TestDemo1(/* par */); break;
286         case 2: name = "TestDemo2"; if (exec)   TestDemo2(/* par */); break;
287         case 3: name = "TestDemo3"; if (exec)   TestDemo3(/* par */); break;
288         case 4: name = "TestDemo4"; if (exec)   TestDemo4(/* par */); break;
289         default: name = ""; break;
290     }
291 }
292 
293 #endif /* #if !UCONFIG_NO_COLLATION */
294