• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 2002-2014, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  ********************************************************************/
7 
8 /**
9  * UCAConformanceTest performs conformance tests defined in the data
10  * files. ICU ships with stub data files, as the whole test are too
11  * long. To do the whole test, download the test files.
12  */
13 
14 package com.ibm.icu.dev.test.collator;
15 
16 import java.io.BufferedReader;
17 import java.io.IOException;
18 
19 import org.junit.Before;
20 import org.junit.Test;
21 import org.junit.runner.RunWith;
22 import org.junit.runners.JUnit4;
23 
24 import com.ibm.icu.dev.test.TestFmwk;
25 import com.ibm.icu.dev.test.TestUtil;
26 import com.ibm.icu.lang.UCharacter;
27 import com.ibm.icu.text.Collator;
28 import com.ibm.icu.text.RawCollationKey;
29 import com.ibm.icu.text.RuleBasedCollator;
30 import com.ibm.icu.text.UTF16;
31 import com.ibm.icu.util.ULocale;
32 import com.ibm.icu.util.VersionInfo;
33 
34 @RunWith(JUnit4.class)
35 public class UCAConformanceTest extends TestFmwk {
36 
UCAConformanceTest()37     public UCAConformanceTest() {
38     }
39 
40     @Before
init()41     public void init() throws Exception {
42         UCA = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
43         comparer = new UTF16.StringComparator(true, false, UTF16.StringComparator.FOLD_CASE_DEFAULT);
44     }
45 
46     private RuleBasedCollator UCA;
47     private RuleBasedCollator rbUCA;
48     private UTF16.StringComparator comparer;
49     private boolean isAtLeastUCA62 = UCharacter.getUnicodeVersion().compareTo(VersionInfo.UNICODE_6_2) >= 0;
50 
51     @Test
TestTableNonIgnorable()52     public void TestTableNonIgnorable() {
53         setCollNonIgnorable(UCA);
54         openTestFile("NON_IGNORABLE");
55         conformanceTest(UCA);
56     }
57 
58     @Test
TestTableShifted()59     public void TestTableShifted() {
60         setCollShifted(UCA);
61         openTestFile("SHIFTED");
62         conformanceTest(UCA);
63     }
64 
65     @Test
TestRulesNonIgnorable()66     public void TestRulesNonIgnorable() {
67         if (logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) {
68             return;
69         }
70         initRbUCA();
71         if (rbUCA == null) {
72             return;
73         }
74 
75         setCollNonIgnorable(rbUCA);
76         openTestFile("NON_IGNORABLE");
77         conformanceTest(rbUCA);
78     }
79 
80     @Test
TestRulesShifted()81     public void TestRulesShifted() {
82         logln("This test is currently disabled, as it is impossible to "
83                 + "wholly represent fractional UCA using tailoring rules.");
84         return;
85         /*
86          * initRbUCA(); if(rbUCA == null) { return; }
87          *
88          * setCollShifted(rbUCA); openTestFile("SHIFTED"); testConformance(rbUCA);
89          */
90     }
91 
92     BufferedReader in;
93 
openTestFile(String type)94     private void openTestFile(String type) {
95         String collationTest = "CollationTest_";
96         String ext = ".txt";
97         try {
98             in = TestUtil.getDataReader(collationTest + type + "_SHORT" + ext);
99         } catch (Exception e) {
100             try {
101                 in = TestUtil.getDataReader(collationTest + type + ext);
102             } catch (Exception e1) {
103                 try {
104                     in = TestUtil.getDataReader(collationTest + type + "_STUB" + ext);
105                     logln("INFO: Working with the stub file.\n" + "If you need the full conformance test, please\n"
106                             + "download the appropriate data files from:\n"
107                             + "http://unicode.org/cldr/trac/browser/trunk/common/uca");
108                 } catch (Exception e11) {
109                     errln("ERROR: Could not find any of the test files");
110                 }
111             }
112         }
113     }
114 
setCollNonIgnorable(RuleBasedCollator coll)115     private void setCollNonIgnorable(RuleBasedCollator coll) {
116         if (coll != null) {
117             coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
118             coll.setLowerCaseFirst(false);
119             coll.setCaseLevel(false);
120             coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.TERTIARY);
121             coll.setAlternateHandlingShifted(false);
122         }
123     }
124 
setCollShifted(RuleBasedCollator coll)125     private void setCollShifted(RuleBasedCollator coll) {
126         if (coll != null) {
127             coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
128             coll.setLowerCaseFirst(false);
129             coll.setCaseLevel(false);
130             coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.QUATERNARY);
131             coll.setAlternateHandlingShifted(true);
132         }
133     }
134 
initRbUCA()135     private void initRbUCA() {
136         if (rbUCA == null) {
137             String ucarules = UCA.getRules(true);
138             try {
139                 rbUCA = new RuleBasedCollator(ucarules);
140             } catch (Exception e) {
141                 errln("Failure creating UCA rule-based collator: " + e);
142             }
143         }
144     }
145 
parseString(String line)146     private String parseString(String line) {
147         int i = 0, value;
148         StringBuilder result = new StringBuilder(), buffer = new StringBuilder();
149 
150         for (;;) {
151             while (i < line.length() && Character.isWhitespace(line.charAt(i))) {
152                 i++;
153             }
154             while (i < line.length() && Character.isLetterOrDigit(line.charAt(i))) {
155                 buffer.append(line.charAt(i));
156                 i++;
157             }
158             if (buffer.length() == 0) {
159                 // We hit something that was not whitespace/letter/digit.
160                 // Should be ';' or end of string.
161                 return result.toString();
162             }
163             /* read one code point */
164             value = Integer.parseInt(buffer.toString(), 16);
165             buffer.setLength(0);
166             result.appendCodePoint(value);
167         }
168 
169     }
170 
171     private static final int IS_SHIFTED = 1;
172     private static final int FROM_RULES = 2;
173 
skipLineBecauseOfBug(String s, int flags)174     private static boolean skipLineBecauseOfBug(String s, int flags) {
175         // Add temporary exceptions here if there are ICU bugs, until we can fix them.
176         // For examples see the ICU 52 version of this file.
177         return false;
178     }
179 
normalizeResult(int result)180     private static int normalizeResult(int result) {
181         return result < 0 ? -1 : result == 0 ? 0 : 1;
182     }
183 
conformanceTest(RuleBasedCollator coll)184     private void conformanceTest(RuleBasedCollator coll) {
185         if (in == null || coll == null) {
186             return;
187         }
188         int skipFlags = 0;
189         if (coll.isAlternateHandlingShifted()) {
190             skipFlags |= IS_SHIFTED;
191         }
192         if (coll == rbUCA) {
193             skipFlags |= FROM_RULES;
194         }
195 
196         logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest");
197         boolean withSortKeys = getProperty("ucaconfnosortkeys") == null;
198 
199         int lineNo = 0;
200 
201         String line = null, oldLine = null, buffer = null, oldB = null;
202         RawCollationKey sk1 = new RawCollationKey(), sk2 = new RawCollationKey();
203         RawCollationKey oldSk = null, newSk = sk1;
204 
205         try {
206             while ((line = in.readLine()) != null) {
207                 lineNo++;
208                 if (line.length() == 0 || line.charAt(0) == '#') {
209                     continue;
210                 }
211                 buffer = parseString(line);
212 
213                 if (skipLineBecauseOfBug(buffer, skipFlags)) {
214                     logln("Skipping line " + lineNo + " because of a known bug");
215                     continue;
216                 }
217 
218                 if (withSortKeys) {
219                     coll.getRawCollationKey(buffer, newSk);
220                 }
221                 if (oldSk != null) {
222                     boolean ok = true;
223                     int skres = withSortKeys ? oldSk.compareTo(newSk) : 0;
224                     int cmpres = coll.compare(oldB, buffer);
225                     int cmpres2 = coll.compare(buffer, oldB);
226 
227                     if (cmpres != -cmpres2) {
228                         errln(String.format(
229                                 "Compare result not symmetrical on line %d: "
230                                         + "previous vs. current (%d) / current vs. previous (%d)",
231                                 lineNo, cmpres, cmpres2));
232                         ok = false;
233                     }
234 
235                     // TODO: Compare with normalization turned off if the input passes the FCD test.
236 
237                     if (withSortKeys && cmpres != normalizeResult(skres)) {
238                         errln("Difference between coll.compare (" + cmpres + ") and sortkey compare (" + skres
239                                 + ") on line " + lineNo);
240                         ok = false;
241                     }
242 
243                     int res = cmpres;
244                     if (res == 0 && !isAtLeastUCA62) {
245                         // Up to UCA 6.1, the collation test files use a custom tie-breaker,
246                         // comparing the raw input strings.
247                         res = comparer.compare(oldB, buffer);
248                         // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker,
249                         // comparing the NFD versions of the input strings,
250                         // which we do via setting strength=identical.
251                     }
252                     if (res > 0) {
253                         errln("Line " + lineNo + " is not greater or equal than previous line");
254                         ok = false;
255                     }
256 
257                     if (!ok) {
258                         errln("  Previous data line " + oldLine);
259                         errln("  Current data line  " + line);
260                         if (withSortKeys) {
261                             errln("  Previous key: " + CollationTest.prettify(oldSk));
262                             errln("  Current key:  " + CollationTest.prettify(newSk));
263                         }
264                     }
265                 }
266 
267                 oldSk = newSk;
268                 oldB = buffer;
269                 oldLine = line;
270                 if (oldSk == sk1) {
271                     newSk = sk2;
272                 } else {
273                     newSk = sk1;
274                 }
275             }
276         } catch (Exception e) {
277             errln("Unexpected exception " + e);
278         } finally {
279             try {
280                 in.close();
281             } catch (IOException ignored) {
282             }
283             in = null;
284         }
285     }
286 }
287