1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2002-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 /**
8 * UCAConformanceTest performs conformance tests defined in the data
9 * files. ICU ships with stub data files, as the whole test are too
10 * long. To do the whole test, download the test files.
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_COLLATION
16
17 #include "ucaconf.h"
18 #include "unicode/ustring.h"
19 #include "cmemory.h"
20 #include "cstring.h"
21 #include "uparse.h"
22
UCAConformanceTest()23 UCAConformanceTest::UCAConformanceTest() :
24 rbUCA(NULL),
25 testFile(NULL),
26 status(U_ZERO_ERROR)
27 {
28 UCA = (RuleBasedCollator *)Collator::createInstance(Locale::getRoot(), status);
29 if(U_FAILURE(status)) {
30 dataerrln("Error - UCAConformanceTest: Unable to open UCA collator! - %s", u_errorName(status));
31 }
32
33 const char *srcDir = IntlTest::getSourceTestData(status);
34 if (U_FAILURE(status)) {
35 dataerrln("Could not open test data %s", u_errorName(status));
36 return;
37 }
38 uprv_strcpy(testDataPath, srcDir);
39 uprv_strcat(testDataPath, "CollationTest_");
40
41 UVersionInfo uniVersion;
42 static const UVersionInfo v62 = { 6, 2, 0, 0 };
43 u_getUnicodeVersion(uniVersion);
44 isAtLeastUCA62 = uprv_memcmp(uniVersion, v62, 4) >= 0;
45 }
46
~UCAConformanceTest()47 UCAConformanceTest::~UCAConformanceTest()
48 {
49 delete UCA;
50 delete rbUCA;
51 if (testFile) {
52 fclose(testFile);
53 }
54 }
55
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)56 void UCAConformanceTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
57 {
58 if(exec) {
59 logln("TestSuite UCAConformanceTest: ");
60 }
61 TESTCASE_AUTO_BEGIN;
62 TESTCASE_AUTO(TestTableNonIgnorable);
63 TESTCASE_AUTO(TestTableShifted);
64 TESTCASE_AUTO(TestRulesNonIgnorable);
65 TESTCASE_AUTO(TestRulesShifted);
66 TESTCASE_AUTO_END;
67 }
68
initRbUCA()69 void UCAConformanceTest::initRbUCA()
70 {
71 if(!rbUCA) {
72 UnicodeString ucarules;
73 if (UCA) {
74 UCA->getRules(UCOL_FULL_RULES, ucarules);
75 rbUCA = new RuleBasedCollator(ucarules, status);
76 if (U_FAILURE(status)) {
77 dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status));
78 return;
79 }
80 } else {
81 dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status));
82 return;
83 }
84 }
85 }
86
setCollNonIgnorable(Collator * coll)87 void UCAConformanceTest::setCollNonIgnorable(Collator *coll)
88 {
89 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
90 coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status);
91 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status);
92 coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_TERTIARY, status);
93 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, status);
94 }
95
setCollShifted(Collator * coll)96 void UCAConformanceTest::setCollShifted(Collator *coll)
97 {
98 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
99 coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status);
100 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status);
101 coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_QUATERNARY, status);
102 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
103 }
104
openTestFile(const char * type)105 void UCAConformanceTest::openTestFile(const char *type)
106 {
107 const char *ext = ".txt";
108 if(testFile) {
109 fclose(testFile);
110 }
111 char buffer[1024];
112 uprv_strcpy(buffer, testDataPath);
113 uprv_strcat(buffer, type);
114 int32_t bufLen = (int32_t)uprv_strlen(buffer);
115
116 // we try to open 3 files:
117 // path/CollationTest_type.txt
118 // path/CollationTest_type_SHORT.txt
119 // path/CollationTest_type_STUB.txt
120 // we are going to test with the first one that we manage to open.
121
122 uprv_strcpy(buffer+bufLen, ext);
123
124 testFile = fopen(buffer, "rb");
125
126 if(testFile == 0) {
127 uprv_strcpy(buffer+bufLen, "_SHORT");
128 uprv_strcat(buffer, ext);
129 testFile = fopen(buffer, "rb");
130
131 if(testFile == 0) {
132 uprv_strcpy(buffer+bufLen, "_STUB");
133 uprv_strcat(buffer, ext);
134 testFile = fopen(buffer, "rb");
135
136 if (testFile == 0) {
137 *(buffer+bufLen) = 0;
138 dataerrln("Could not open any of the conformance test files, tried opening base %s\n", buffer);
139 return;
140 } else {
141 infoln(
142 "INFO: Working with the stub file.\n"
143 "If you need the full conformance test, please\n"
144 "download the appropriate data files from:\n"
145 "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/");
146 }
147 }
148 }
149 }
150
151 static const uint32_t IS_SHIFTED = 1;
152 static const uint32_t FROM_RULES = 2;
153
154 static UBool
skipLineBecauseOfBug(const UChar * s,int32_t length,uint32_t flags)155 skipLineBecauseOfBug(const UChar *s, int32_t length, uint32_t flags) {
156 // TODO: Fix ICU ticket #8052
157 if(length >= 3 &&
158 (s[0] == 0xfb2 || s[0] == 0xfb3) &&
159 s[1] == 0x334 &&
160 (s[2] == 0xf73 || s[2] == 0xf75 || s[2] == 0xf81)) {
161 return TRUE;
162 }
163 // TODO: Fix ICU ticket #9361
164 if((flags & IS_SHIFTED) != 0 && length >= 2 && s[0] == 0xfffe) {
165 return TRUE;
166 }
167 // TODO: Fix tailoring builder, ICU ticket #9593.
168 UChar c;
169 if((flags & FROM_RULES) != 0 && length >= 2 && ((c = s[1]) == 0xedc || c == 0xedd)) {
170 return TRUE;
171 }
172 return FALSE;
173 }
174
175 static UCollationResult
normalizeResult(int32_t result)176 normalizeResult(int32_t result) {
177 return result<0 ? UCOL_LESS : result==0 ? UCOL_EQUAL : UCOL_GREATER;
178 }
179
testConformance(const Collator * coll)180 void UCAConformanceTest::testConformance(const Collator *coll)
181 {
182 if(testFile == 0) {
183 return;
184 }
185 uint32_t skipFlags = 0;
186 if(coll->getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED) {
187 skipFlags |= IS_SHIFTED;
188 }
189 if(coll == rbUCA) {
190 skipFlags |= FROM_RULES;
191 }
192
193 int32_t line = 0;
194
195 UChar b1[1024], b2[1024];
196 UChar *buffer = b1, *oldB = NULL;
197
198 char lineB1[1024], lineB2[1024];
199 char *lineB = lineB1, *oldLineB = lineB2;
200
201 uint8_t sk1[1024], sk2[1024];
202 uint8_t *oldSk = NULL, *newSk = sk1;
203
204 int32_t oldLen = 0;
205 int32_t oldBlen = 0;
206 uint32_t first = 0;
207
208 while (fgets(lineB, 1024, testFile) != NULL) {
209 // remove trailing whitespace
210 u_rtrim(lineB);
211
212 line++;
213 if(*lineB == 0 || lineB[0] == '#') {
214 continue;
215 }
216 int32_t buflen = u_parseString(lineB, buffer, 1024, &first, &status);
217 if(U_FAILURE(status)) {
218 errln("Error parsing line %ld (%s): %s\n",
219 (long)line, u_errorName(status), lineB);
220 status = U_ZERO_ERROR;
221 }
222 buffer[buflen] = 0;
223
224 if(skipLineBecauseOfBug(buffer, buflen, skipFlags)) {
225 logln("Skipping line %i because of a known bug", line);
226 continue;
227 }
228
229 int32_t resLen = coll->getSortKey(buffer, buflen, newSk, 1024);
230
231 if(oldSk != NULL) {
232 int32_t skres = strcmp((char *)oldSk, (char *)newSk);
233 int32_t cmpres = coll->compare(oldB, oldBlen, buffer, buflen, status);
234 int32_t cmpres2 = coll->compare(buffer, buflen, oldB, oldBlen, status);
235
236 if(cmpres != -cmpres2) {
237 errln("Compare result not symmetrical on line %i", line);
238 }
239
240 if(cmpres != normalizeResult(skres)) {
241 errln("Difference between coll->compare (%d) and sortkey compare (%d) on line %i",
242 cmpres, skres, line);
243 errln(" Previous data line %s", oldLineB);
244 errln(" Current data line %s", lineB);
245 }
246
247 int32_t res = cmpres;
248 if(res == 0 && !isAtLeastUCA62) {
249 // Up to UCA 6.1, the collation test files use a custom tie-breaker,
250 // comparing the raw input strings.
251 res = u_strcmpCodePointOrder(oldB, buffer);
252 // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker,
253 // comparing the NFD versions of the input strings,
254 // which we do via setting strength=identical.
255 }
256 if(res > 0) {
257 errln("Line %i is not greater or equal than previous line", line);
258 errln(" Previous data line %s", oldLineB);
259 errln(" Current data line %s", lineB);
260 UnicodeString oldS, newS;
261 prettify(CollationKey(oldSk, oldLen), oldS);
262 prettify(CollationKey(newSk, resLen), newS);
263 errln(" Previous key: "+oldS);
264 errln(" Current key: "+newS);
265 }
266 }
267
268 // swap buffers
269 oldLineB = lineB;
270 oldB = buffer;
271 oldSk = newSk;
272 if(lineB == lineB1) {
273 lineB = lineB2;
274 buffer = b2;
275 newSk = sk2;
276 } else {
277 lineB = lineB1;
278 buffer = b1;
279 newSk = sk1;
280 }
281 oldLen = resLen;
282 oldBlen = buflen;
283 }
284 }
285
TestTableNonIgnorable()286 void UCAConformanceTest::TestTableNonIgnorable(/* par */) {
287 if (U_FAILURE(status)) {
288 dataerrln("Error running UCA Conformance Test: %s", u_errorName(status));
289 return;
290 }
291 setCollNonIgnorable(UCA);
292 openTestFile("NON_IGNORABLE");
293 testConformance(UCA);
294 }
295
TestTableShifted()296 void UCAConformanceTest::TestTableShifted(/* par */) {
297 if (U_FAILURE(status)) {
298 dataerrln("Error running UCA Conformance Test: %s", u_errorName(status));
299 return;
300 }
301 setCollShifted(UCA);
302 openTestFile("SHIFTED");
303 testConformance(UCA);
304 }
305
TestRulesNonIgnorable()306 void UCAConformanceTest::TestRulesNonIgnorable(/* par */) {
307 initRbUCA();
308
309 if(U_SUCCESS(status)) {
310 setCollNonIgnorable(rbUCA);
311 openTestFile("NON_IGNORABLE");
312 testConformance(rbUCA);
313 }
314 }
315
TestRulesShifted()316 void UCAConformanceTest::TestRulesShifted(/* par */) {
317 logln("This test is currently disabled, as it is impossible to "
318 "wholly represent fractional UCA using tailoring rules.");
319 return;
320
321 initRbUCA();
322
323 if(U_SUCCESS(status)) {
324 setCollShifted(rbUCA);
325 openTestFile("SHIFTED");
326 testConformance(rbUCA);
327 }
328 }
329
330 #endif /* #if !UCONFIG_NO_COLLATION */
331