1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File CG7COLL.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda Ported for C API
15 *********************************************************************************/
16 /**
17 * G7CollationTest is a third level test class. This test performs the examples
18 * mentioned on the IBM Java international demos web site.
19 * Sample Rules: & Z < p , P
20 * Effect : Making P sort after Z.
21 *
22 * Sample Rules: & c < ch , cH, Ch, CH
23 * Effect : As well as adding sequences of characters that act as a single character (this is
24 * known as contraction), you can also add characters that act like a sequence of
25 * characters (this is known as expansion).
26 *
27 * Sample Rules: & Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'
28 * Effect : Expansion and contraction can actually be combined.
29 *
30 * Sample Rules: & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-'
31 * Effect : sorted sequence as the following,
32 * aardvark
33 * a-rdvark
34 * abbot
35 * coop
36 * co-p
37 * cop
38 */
39 #include <stdlib.h>
40 #include <string.h>
41 #include <stdio.h>
42
43 #include "unicode/utypes.h"
44
45 #if !UCONFIG_NO_COLLATION
46
47 #include "unicode/ucol.h"
48 #include "unicode/uloc.h"
49 #include "cintltst.h"
50 #include "cg7coll.h"
51 #include "ccolltst.h"
52 #include "callcoll.h"
53 #include "unicode/ustring.h"
54
55 const char* locales[8] = {
56 "en_US",
57 "en_GB",
58 "en_CA",
59 "fr_FR",
60 "fr_CA",
61 "de_DE",
62 "it_IT",
63 "ja_JP"
64 };
65
66
67
68 const static UChar testCases[][MAX_TOKEN_LEN] = {
69 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
70 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000}, /* 9 */
71 { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000}, /* 1 */
72 { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000}, /* 2 */
73 { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000}, /* 3 */
74 { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 4 */
75 { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 5 */
76 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000}, /* 6 */
77 { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000}, /* 7 */
78 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000}, /* 8 */
79 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
80 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 12 */
81 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000}, /* 10 */
82 { 0x0050 /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000}, /* 11 */
83 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
84 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 13 */
85 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
86 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000}, /* 0 */
87 {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000}, /* 14 */
88 /* Additional tests */
89 { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 }, /* 15 */
90 { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 }, /* 16 */
91 { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 }, /* 17 */
92 { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 }, /* 18 */
93 { 0x003f /*'?'*/, 0x0000 }, /* 19 */
94 { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 }, /* 20 */
95 { 0x0023 /*'#'*/, 0x0000 }, /* 21 */
96 { 0x0026 /*'&'*/, 0x0000 }, /* 22 */
97 { 0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
98 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 24 */
99 { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
100 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 23 */
101 { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000}, /* 25 */
102 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000}, /* 27 */
103 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 28 */
104 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 26 */
105 { 0x007a /*'z'*/, 0x0065 /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000} /* 29 */
106 };
107
108 const static int32_t results[TESTLOCALES][TOTALTESTSET] = {
109 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */
110 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */
111 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */
112 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */
113 { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */
114 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */
115 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */
116 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */
117 /* new table collation with rules "& Z < p, P" loop to FIXEDTESTSET */
118 { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 },
119 /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */
120 { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 },
121 /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&' " loop to TOTALTESTSET */
122 { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 },
123 /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */ /* loop to TOTALTESTSET */
124 { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }
125 };
126
addRuleBasedCollTest(TestNode ** root)127 void addRuleBasedCollTest(TestNode** root)
128 {
129 addTest(root, &TestG7Locales, "tscoll/cg7coll/TestG7Locales");
130 addTest(root, &TestDemo1, "tscoll/cg7coll/TestDemo1");
131 addTest(root, &TestDemo2, "tscoll/cg7coll/TestDemo2");
132 addTest(root, &TestDemo3, "tscoll/cg7coll/TestDemo3");
133 addTest(root, &TestDemo4, "tscoll/cg7coll/TestDemo4");
134
135
136 }
137
TestG7Locales()138 static void TestG7Locales()
139 {
140 UCollator *myCollation;
141 UErrorCode status = U_ZERO_ERROR;
142 const UChar *defRules;
143 int32_t i, rlen, j, n;
144 log_verbose("Testing ucol_openRules for all the locales\n");
145 for (i = 0; i < UPRV_LENGTHOF(locales); i++)
146 {
147 const char *locale = locales[i];
148 status = U_ZERO_ERROR;
149 myCollation = ucol_open(locale, &status);
150 ucol_setAttribute(myCollation, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
151 ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
152
153 if (U_FAILURE(status))
154 {
155 log_err_status(status, "Error in creating collator in %s: %s\n", locale, myErrorName(status));
156 ucol_close(myCollation);
157 continue;
158 }
159
160 defRules = ucol_getRules(myCollation, &rlen);
161 if (rlen == 0 && (strcmp(locale, "fr_CA") == 0 || strcmp(locale, "ja_JP") == 0)) {
162 log_data_err("%s UCollator missing rule string\n", locale);
163 if (log_knownIssue("10671", "TestG7Locales does not test ignore-punctuation")) {
164 ucol_close(myCollation);
165 continue;
166 }
167 } else {
168 UCollator *tblColl1;
169 status = U_ZERO_ERROR;
170 tblColl1 = ucol_openRules(defRules, rlen, UCOL_OFF,
171 UCOL_DEFAULT_STRENGTH,NULL, &status);
172 ucol_close(myCollation);
173 if (U_FAILURE(status))
174 {
175 log_err_status(status, "Error in creating collator in %s: %s\n", locale, myErrorName(status));
176 continue;
177 }
178 myCollation = tblColl1;
179 }
180
181 log_verbose("Locale %s\n", locales[i]);
182 log_verbose(" tests start...\n");
183
184 j = 0;
185 n = 0;
186 for (j = 0; j < FIXEDTESTSET; j++)
187 {
188 for (n = j+1; n < FIXEDTESTSET; n++)
189 {
190 doTest(myCollation, testCases[results[i][j]], testCases[results[i][n]], UCOL_LESS);
191 }
192 }
193
194 ucol_close(myCollation);
195 }
196 }
197
TestDemo1()198 static void TestDemo1()
199 {
200 UCollator *myCollation;
201 int32_t j, n;
202 static const char rules[] = "& Z < p, P";
203 int32_t len=(int32_t)strlen(rules);
204 UChar temp[sizeof(rules)];
205 UErrorCode status = U_ZERO_ERROR;
206 u_uastrcpy(temp, rules);
207
208 log_verbose("Demo Test 1 : Create a new table collation with rules \" & Z < p, P \" \n");
209
210 myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
211
212 if (U_FAILURE(status))
213 {
214 log_err_status(status, "Demo Test 1 Rule collation object creation failed. : %s\n", myErrorName(status));
215 return;
216 }
217
218 for (j = 0; j < FIXEDTESTSET; j++)
219 {
220 for (n = j+1; n < FIXEDTESTSET; n++)
221 {
222 doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], UCOL_LESS);
223 }
224 }
225
226 ucol_close(myCollation);
227 }
228
TestDemo2()229 static void TestDemo2()
230 {
231 UCollator *myCollation;
232 int32_t j, n;
233 static const char rules[] = "& C < ch , cH, Ch, CH";
234 int32_t len=(int32_t)strlen(rules);
235 UChar temp[sizeof(rules)];
236 UErrorCode status = U_ZERO_ERROR;
237 u_uastrcpy(temp, rules);
238
239 log_verbose("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\"");
240
241 myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
242
243 if (U_FAILURE(status))
244 {
245 log_err_status(status, "Demo Test 2 Rule collation object creation failed.: %s\n", myErrorName(status));
246 return;
247 }
248 for (j = 0; j < TOTALTESTSET; j++)
249 {
250 for (n = j+1; n < TOTALTESTSET; n++)
251 {
252 doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], UCOL_LESS);
253 }
254 }
255 ucol_close(myCollation);
256 }
257
TestDemo3()258 static void TestDemo3()
259 {
260 UCollator *myCollation;
261 int32_t j, n;
262 static const char rules[] = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'";
263 int32_t len=(int32_t)strlen(rules);
264 UChar temp[sizeof(rules)];
265 UErrorCode status = U_ZERO_ERROR;
266 u_uastrcpy(temp, rules);
267
268 log_verbose("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\" \n");
269
270 myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
271
272 if (U_FAILURE(status))
273 {
274 log_err_status(status, "Demo Test 3 Rule collation object creation failed.: %s\n", myErrorName(status));
275 return;
276 }
277
278 for (j = 0; j < TOTALTESTSET; j++)
279 {
280 for (n = j+1; n < TOTALTESTSET; n++)
281 {
282 doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], UCOL_LESS);
283 }
284 }
285 ucol_close(myCollation);
286 }
287
TestDemo4()288 static void TestDemo4()
289 {
290 UCollator *myCollation;
291 int32_t j, n;
292 static const char rules[] = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' ";
293 int32_t len=(int32_t)strlen(rules);
294 UChar temp[sizeof(rules)];
295 UErrorCode status = U_ZERO_ERROR;
296 u_uastrcpy(temp, rules);
297
298 log_verbose("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \"\n");
299
300 myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
301
302 if (U_FAILURE(status))
303 {
304 log_err_status(status, "Demo Test 4 Rule collation object creation failed.: %s\n", myErrorName(status));
305 return;
306 }
307 for (j = 0; j < TOTALTESTSET; j++)
308 {
309 for (n = j+1; n < TOTALTESTSET; n++)
310 {
311 doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], UCOL_LESS);
312 }
313 }
314 ucol_close(myCollation);
315 }
316
317 #endif /* #if !UCONFIG_NO_COLLATION */
318