1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9 #include "unicode/utypes.h"
10
11 #if !UCONFIG_NO_COLLATION
12
13 #include "unicode/coll.h"
14 #include "unicode/tblcoll.h"
15 #include "unicode/unistr.h"
16 #include "unicode/sortkey.h"
17 #include "g7coll.h"
18 #include "sfwdchit.h"
19 #include "cmemory.h"
20
21 static const UChar testCases[][G7CollationTest::MAX_TOKEN_LEN] = {
22 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
23 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000}, /* 9 */
24 { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000}, /* 1 */
25 { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000}, /* 2 */
26 { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000}, /* 3 */
27 { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 4 */
28 { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 5 */
29 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000}, /* 6 */
30 { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000}, /* 7 */
31 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000}, /* 8 */
32 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
33 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 12 */
34 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000}, /* 10 */
35 { 0x0050 /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000}, /* 11 */
36 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
37 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 13 */
38 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
39 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000}, /* 0 */
40 {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000}, /* 14 */
41 /* Additional tests */
42 { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 }, /* 15 */
43 { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 }, /* 16 */
44 { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 }, /* 17 */
45 { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 }, /* 18 */
46 { 0x003f /*'?'*/, 0x0000 }, /* 19 */
47 { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 }, /* 20 */
48 { 0x0023 /*'#'*/, 0x0000 }, /* 21 */
49 { 0x0026 /*'&'*/, 0x0000 }, /* 22 */
50 { 0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
51 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 24 */
52 { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
53 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 23 */
54 { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000}, /* 25 */
55 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000}, /* 27 */
56 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 28 */
57 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 26 */
58 { 0x007a /*'z'*/, 0x0065 /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000} /* 29 */
59 };
60
61 static const int32_t results[G7CollationTest::TESTLOCALES][G7CollationTest::TOTALTESTSET] = {
62 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */
63 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */
64 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */
65 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */
66 { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */
67 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */
68 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */
69 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */
70 /* new table collation with rules "& Z < p, P" loop to FIXEDTESTSET */
71 { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 },
72 /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */
73 { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 },
74 /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&' " loop to TOTALTESTSET */
75 { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 },
76 /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */ /* loop to TOTALTESTSET */
77 { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }
78 };
79
~G7CollationTest()80 G7CollationTest::~G7CollationTest() {}
81
TestG7Locales()82 void G7CollationTest::TestG7Locales(/* char* par */)
83 {
84 int32_t i;
85 const Locale locales[8] = {
86 Locale("en", "US", ""),
87 Locale("en", "GB", ""),
88 Locale("en", "CA", ""),
89 Locale("fr", "FR", ""),
90 Locale("fr", "CA", ""),
91 Locale("de", "DE", ""),
92 Locale("it", "IT", ""),
93 Locale("ja", "JP", "")
94 };
95
96 for (i = 0; i < UPRV_LENGTHOF(locales); i++)
97 {
98 UnicodeString dispName;
99 UErrorCode status = U_ZERO_ERROR;
100
101 const Locale &locale = locales[i];
102 LocalPointer<Collator> myCollation(Collator::createInstance(locale, status));
103 if(U_FAILURE(status)) {
104 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
105 return;
106 }
107 myCollation->setStrength(Collator::QUATERNARY);
108 myCollation->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
109 if (U_FAILURE(status)) {
110 errln("Locale %s creation failed - %s", locale.getName(), u_errorName(status));
111 continue;
112 }
113
114 const UnicodeString &rules = ((RuleBasedCollator*)myCollation.getAlias())->getRules();
115 if (rules.isEmpty() &&
116 (locale == Locale::getCanadaFrench() || locale == Locale::getJapanese())) {
117 dataerrln("%s Collator missing rule string", locale.getName());
118 if (logKnownIssue("10671", "TestG7Locales does not test ignore-punctuation")) {
119 continue;
120 }
121 } else {
122 status = U_ZERO_ERROR;
123 RuleBasedCollator *tblColl1 = new RuleBasedCollator(rules, status);
124 if (U_FAILURE(status)) {
125 errln("Recreate %s collation failed - %s", locale.getName(), u_errorName(status));
126 continue;
127 }
128 myCollation.adoptInstead(tblColl1);
129 }
130
131 UnicodeString msg;
132
133 msg += "Locale ";
134 msg += locales[i].getDisplayName(dispName);
135 msg += "tests start :";
136 logln(msg);
137
138 int32_t j, n;
139 for (j = 0; j < FIXEDTESTSET; j++)
140 {
141 for (n = j+1; n < FIXEDTESTSET; n++)
142 {
143 doTest(myCollation.getAlias(), testCases[results[i][j]], testCases[results[i][n]], Collator::LESS);
144 }
145 }
146 }
147 }
148
TestDemo1()149 void G7CollationTest::TestDemo1(/* char* par */)
150 {
151 logln("Demo Test 1 : Create a new table collation with rules \"& Z < p, P\"");
152 UErrorCode status = U_ZERO_ERROR;
153 Collator *col = Collator::createInstance("en_US", status);
154 if(U_FAILURE(status)) {
155 delete col;
156 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
157 return;
158 }
159 const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
160 UnicodeString newRules(" & Z < p, P");
161 newRules.insert(0, baseRules);
162 RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
163
164 if (U_FAILURE(status))
165 {
166 errln( "Demo Test 1 Table Collation object creation failed.");
167 return;
168 }
169
170 int32_t j, n;
171 for (j = 0; j < FIXEDTESTSET; j++)
172 {
173 for (n = j+1; n < FIXEDTESTSET; n++)
174 {
175 doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], Collator::LESS);
176 }
177 }
178
179 delete myCollation;
180 delete col;
181 }
182
TestDemo2()183 void G7CollationTest::TestDemo2(/* char* par */)
184 {
185 logln("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\"");
186 UErrorCode status = U_ZERO_ERROR;
187 Collator *col = Collator::createInstance("en_US", status);
188 if(U_FAILURE(status)) {
189 delete col;
190 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
191 return;
192 }
193 const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
194 UnicodeString newRules("& C < ch , cH, Ch, CH");
195 newRules.insert(0, baseRules);
196 RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
197
198 if (U_FAILURE(status))
199 {
200 errln("Demo Test 2 Table Collation object creation failed.");
201 return;
202 }
203
204 int32_t j, n;
205 for (j = 0; j < TOTALTESTSET; j++)
206 {
207 for (n = j+1; n < TOTALTESTSET; n++)
208 {
209 doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], Collator::LESS);
210 }
211 }
212
213 delete myCollation;
214 delete col;
215 }
216
TestDemo3()217 void G7CollationTest::TestDemo3(/* char* par */)
218 {
219 logln("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\"");
220 UErrorCode status = U_ZERO_ERROR;
221 Collator *col = Collator::createInstance("en_US", status);
222 if(U_FAILURE(status)) {
223 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
224 delete col;
225 return;
226 }
227 const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
228 UnicodeString newRules = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'";
229 newRules.insert(0, baseRules);
230 RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
231
232 if (U_FAILURE(status))
233 {
234 errln("Demo Test 3 Table Collation object creation failed.");
235 return;
236 }
237
238 int32_t j, n;
239 for (j = 0; j < TOTALTESTSET; j++)
240 {
241 for (n = j+1; n < TOTALTESTSET; n++)
242 {
243 doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], Collator::LESS);
244 }
245 }
246
247 delete myCollation;
248 delete col;
249 }
250
TestDemo4()251 void G7CollationTest::TestDemo4(/* char* par */)
252 {
253 logln("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \"");
254 UErrorCode status = U_ZERO_ERROR;
255 Collator *col = Collator::createInstance("en_US", status);
256 if(U_FAILURE(status)) {
257 delete col;
258 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
259 return;
260 }
261
262 const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
263 UnicodeString newRules = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' ";
264 newRules.insert(0, baseRules);
265 RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
266
267 int32_t j, n;
268 for (j = 0; j < TOTALTESTSET; j++)
269 {
270 for (n = j+1; n < TOTALTESTSET; n++)
271 {
272 doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], Collator::LESS);
273 }
274 }
275
276 delete myCollation;
277 delete col;
278 }
279
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)280 void G7CollationTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
281 {
282 if (exec) logln("TestSuite G7CollationTest: ");
283 switch (index) {
284 case 0: name = "TestG7Locales"; if (exec) TestG7Locales(/* par */); break;
285 case 1: name = "TestDemo1"; if (exec) TestDemo1(/* par */); break;
286 case 2: name = "TestDemo2"; if (exec) TestDemo2(/* par */); break;
287 case 3: name = "TestDemo3"; if (exec) TestDemo3(/* par */); break;
288 case 4: name = "TestDemo4"; if (exec) TestDemo4(/* par */); break;
289 default: name = ""; break;
290 }
291 }
292
293 #endif /* #if !UCONFIG_NO_COLLATION */
294