• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.ibm.icu.impl.Relation;
5 import com.ibm.icu.impl.Row;
6 import com.ibm.icu.impl.Utility;
7 import com.ibm.icu.lang.UCharacter;
8 import com.ibm.icu.lang.UProperty;
9 import com.ibm.icu.lang.UScript;
10 import com.ibm.icu.text.UTF16;
11 import com.ibm.icu.text.UnicodeSet;
12 import com.ibm.icu.util.VersionInfo;
13 import java.util.Arrays;
14 import java.util.BitSet;
15 import java.util.Collections;
16 import java.util.EnumMap;
17 import java.util.HashSet;
18 import java.util.Iterator;
19 import java.util.LinkedHashSet;
20 import java.util.Map.Entry;
21 import java.util.Set;
22 import java.util.TreeSet;
23 import org.unicode.cldr.draft.EnumLookup;
24 import org.unicode.cldr.draft.ScriptMetadata;
25 import org.unicode.cldr.draft.ScriptMetadata.IdUsage;
26 import org.unicode.cldr.draft.ScriptMetadata.Info;
27 import org.unicode.cldr.draft.ScriptMetadata.Shaping;
28 import org.unicode.cldr.draft.ScriptMetadata.Trinary;
29 import org.unicode.cldr.util.CLDRConfig;
30 import org.unicode.cldr.util.CLDRFile;
31 import org.unicode.cldr.util.Containment;
32 import org.unicode.cldr.util.StandardCodes;
33 import org.unicode.cldr.util.With;
34 import org.unicode.cldr.util.XPathParts;
35 
36 public class TestScriptMetadata extends TestFmwkPlus {
37     private static final VersionInfo ICU_UNICODE_VERSION = UCharacter.getUnicodeVersion();
38     static CLDRConfig testInfo = CLDRConfig.getInstance();
39 
main(String[] args)40     public static void main(String[] args) {
41         new TestScriptMetadata().run(args);
42     }
43 
TestLookup()44     public void TestLookup() {
45         EnumLookup<IdUsage> temp = EnumLookup.of(IdUsage.class);
46         assertEquals("", IdUsage.LIMITED_USE, temp.forString("limited Use"));
47     }
48 
TestScriptOfSample()49     public void TestScriptOfSample() {
50         BitSet bitset = new BitSet();
51         for (String script : new TreeSet<>(ScriptMetadata.getScripts())) {
52             Info info0 = ScriptMetadata.getInfo(script);
53             int codePointCount = UTF16.countCodePoint(info0.sampleChar);
54             assertEquals("Sample must be single character", 1, codePointCount);
55             if (ICU_UNICODE_VERSION.compareTo(info0.age) >= 0) {
56                 int scriptCode =
57                         UScript.getScriptExtensions(info0.sampleChar.codePointAt(0), bitset);
58                 assertTrue(
59                         script
60                                 + ", "
61                                 + Utility.hex(info0.sampleChar)
62                                 + ": The sample character must have a "
63                                 + "single, valid script, no ScriptExtensions: "
64                                 + scriptCode,
65                         scriptCode >= 0);
66             }
67         }
68     }
69 
TestBasic()70     public void TestBasic() {
71         Info info0 = ScriptMetadata.getInfo(UScript.LATIN);
72         if (ScriptMetadata.errors.size() != 0) {
73             if (ScriptMetadata.errors.size() == 1) {
74                 logln(
75                         "ScriptMetadata initialization errors\t"
76                                 + ScriptMetadata.errors.size()
77                                 + "\t"
78                                 + Joiner.on("\n").join(ScriptMetadata.errors));
79             } else {
80                 errln(
81                         "ScriptMetadata initialization errors\t"
82                                 + ScriptMetadata.errors.size()
83                                 + "\t"
84                                 + Joiner.on("\n").join(ScriptMetadata.errors));
85             }
86         }
87 
88         // Latin Latn 2 L European Recommended no no no no
89         assertEquals("Latin-rank", 2, info0.rank);
90         assertEquals("Latin-country", "IT", info0.originCountry);
91         assertEquals("Latin-sample", "L", info0.sampleChar);
92         assertEquals("Latin-id usage", ScriptMetadata.IdUsage.RECOMMENDED, info0.idUsage);
93         assertEquals("Latin-ime?", Trinary.NO, info0.ime);
94         assertEquals("Latin-lb letters?", Trinary.NO, info0.lbLetters);
95         assertEquals("Latin-rtl?", Trinary.NO, info0.rtl);
96         assertEquals("Latin-shaping", Shaping.MIN, info0.shapingReq);
97         assertEquals("Latin-density", 1, info0.density);
98         assertEquals("Latin-Case", Trinary.YES, info0.hasCase);
99 
100         info0 = ScriptMetadata.getInfo(UScript.HEBREW);
101         assertEquals("Arabic-rtl", Trinary.YES, info0.rtl);
102         assertEquals("Arabic-shaping", Shaping.NO, info0.shapingReq);
103         assertEquals("Arabic-Case", Trinary.NO, info0.hasCase);
104     }
105 
106     @SuppressWarnings("deprecation")
TestScripts()107     public void TestScripts() {
108         UnicodeSet temp = new UnicodeSet();
109         Set<String> missingScripts = new TreeSet<>();
110         Relation<IdUsage, String> map =
111                 Relation.of(new EnumMap<IdUsage, Set<String>>(IdUsage.class), LinkedHashSet.class);
112         for (int i = UScript.COMMON; i < UScript.CODE_LIMIT; ++i) {
113             Info info = ScriptMetadata.getInfo(i);
114             if (info != null) {
115                 map.put(
116                         info.idUsage,
117                         UScript.getName(i) + "\t(" + UScript.getShortName(i) + ")\t" + info);
118             } else {
119                 // There are many script codes that are not "real"; there are no
120                 // Unicode characters for them.
121                 // separate those out.
122                 temp.applyIntPropertyValue(UProperty.SCRIPT, i);
123                 if (temp.size() != 0) { // is real
124                     errln(
125                             "Missing script metadata for "
126                                     + UScript.getName(i)
127                                     + "\t("
128                                     + UScript.getShortName(i));
129                 } else { // is not real
130                     missingScripts.add(UScript.getShortName(i));
131                 }
132             }
133         }
134         for (Entry<IdUsage, String> entry : map.keyValueSet()) {
135             logln("Script metadata found for script:" + entry.getValue());
136         }
137         if (!missingScripts.isEmpty()) {
138             logln(
139                     "No script metadata for the following scripts (no Unicode characters defined): "
140                             + missingScripts.toString());
141         }
142     }
143 
144     // lifted from ShowLanguages
getEnglishTypes( String type, int code, StandardCodes sc, CLDRFile english)145     private static Set<String> getEnglishTypes(
146             String type, int code, StandardCodes sc, CLDRFile english) {
147         Set<String> result = new HashSet<>(sc.getSurveyToolDisplayCodes(type));
148         for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext(); ) {
149             XPathParts parts = XPathParts.getFrozenInstance(it.next());
150             String newType = parts.getAttributeValue(-1, "type");
151             if (!result.contains(newType)) {
152                 result.add(newType);
153             }
154         }
155         return result;
156     }
157 
158     // lifted from ShowLanguages
getScriptsToShow(StandardCodes sc, CLDRFile english)159     private static Set<String> getScriptsToShow(StandardCodes sc, CLDRFile english) {
160         return getEnglishTypes("script", CLDRFile.SCRIPT_NAME, sc, english);
161     }
162 
TestShowLanguages()163     public void TestShowLanguages() {
164         // lifted from ShowLanguages - this is what ShowLanguages tried to do.
165         StandardCodes sc = StandardCodes.make();
166         CLDRFile english = testInfo.getEnglish();
167         Set<String> bads = new TreeSet<>();
168         UnicodeSet temp = new UnicodeSet();
169         for (String s : getScriptsToShow(sc, english)) {
170             if (ScriptMetadata.getInfo(s) == null) {
171                 // There are many script codes that are not "real"; there are no
172                 // Unicode characters for them.
173                 // separate those out.
174                 temp.applyIntPropertyValue(UProperty.SCRIPT, UScript.getCodeFromName(s));
175                 if (temp.size() != 0) { // is real
176                     bads.add(s);
177                 }
178             }
179         }
180         if (!bads.isEmpty()) {
181             errln("No metadata for scripts: " + bads.toString());
182         }
183     }
184 
TestGeographicGrouping()185     public void TestGeographicGrouping() {
186         CLDRFile english = testInfo.getEnglish();
187         Set<Row.R3<IdUsage, String, String>> lines = new TreeSet<>();
188         Set<String> extras = ScriptMetadata.getExtras();
189         for (Entry<String, Info> sc : ScriptMetadata.iterable()) {
190             String scriptCode = sc.getKey();
191             if (extras.contains(scriptCode)) {
192                 continue;
193             }
194             Info info = sc.getValue();
195             String continent = Containment.getContinent(info.originCountry);
196             String container =
197                     !continent.equals("142")
198                             ? continent
199                             : Containment.getSubcontinent(info.originCountry);
200 
201             lines.add(
202                     Row.of(
203                             info.idUsage,
204                             english.getName(CLDRFile.TERRITORY_NAME, continent),
205                             info.idUsage
206                                     + "\t"
207                                     + english.getName(CLDRFile.TERRITORY_NAME, container)
208                                     + "\t"
209                                     + scriptCode
210                                     + "\t"
211                                     + english.getName(CLDRFile.SCRIPT_NAME, scriptCode)));
212         }
213         for (Row.R3<IdUsage, String, String> s : lines) {
214             logln(s.get2());
215         }
216     }
217 
TestScriptCategories()218     public void TestScriptCategories() {
219 
220         // test completeness
221         Set<String> scripts = new TreeSet<>(ScriptMetadata.getScripts());
222         scripts.removeAll(Arrays.asList("Zinh", "Zyyy", "Zzzz"));
223         logln("All: " + scripts);
224         for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) {
225             logln(x + ": " + x.scripts.toString());
226             scripts.removeAll(x.scripts);
227         }
228         assertEquals("Completeness", Collections.EMPTY_SET, scripts);
229 
230         // test no overlap
231         assertEquals("Overlap", Collections.EMPTY_SET, scripts);
232         for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) {
233             for (ScriptMetadata.Groupings y : ScriptMetadata.Groupings.values()) {
234                 if (y == x) continue;
235                 assertTrue("overlap", Collections.disjoint(x.scripts, y.scripts));
236             }
237         }
238 
239         // assertEqualsX(Groupings.EUROPEAN, ScriptCategories.OLD_EUROPEAN);
240         // assertEqualsX(Groupings.MIDDLE_EASTERN,
241         // ScriptCategories.OLD_MIDDLE_EASTERN);
242         // assertEqualsX(Groupings.SOUTH_ASIAN,
243         // ScriptCategories.OLD_SOUTH_ASIAN);
244         // assertEqualsX(Groupings.SOUTHEAST_ASIAN,
245         // ScriptCategories.OLD_SOUTHEAST_ASIAN);
246         // assertEqualsX(Groupings.EAST_ASIAN, ScriptCategories.OLD_EAST_ASIAN);
247         // assertEqualsX(Groupings.AFRICAN, ScriptCategories.OLD_AFRICAN);
248         // assertEqualsX(Groupings.AMERICAN, ScriptCategories.OLD_AMERICAN);
249         //
250         // assertEqualsX("Historic: ", ScriptCategories.HISTORIC_SCRIPTS,
251         // ScriptCategories.OLD_HISTORIC_SCRIPTS);
252         //
253     }
254 
255     //    private void assertEqualsX(Groupings aRaw, Set<String> bRaw) {
256     //        assertEqualsX(aRaw.toString(), aRaw.scripts, bRaw);
257     //    }
258 
assertEqualsX(String title, Set<String> a, Set<String> bRaw)259     public void assertEqualsX(String title, Set<String> a, Set<String> bRaw) {
260         TreeSet<String> b =
261                 With.in(bRaw).toCollection(ScriptMetadata.TO_SHORT_SCRIPT, new TreeSet<String>());
262 
263         Set<String> a_b = new TreeSet<>(a);
264         a_b.removeAll(b);
265         Set<String> b_a = new TreeSet<>(b);
266         b_a.removeAll(a);
267         assertEquals(title + " New vs Old, ", a_b.toString(), b_a.toString());
268     }
269 }
270