• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.unittest;
2 
3 import java.util.Arrays;
4 import java.util.BitSet;
5 import java.util.Collections;
6 import java.util.EnumMap;
7 import java.util.HashSet;
8 import java.util.Iterator;
9 import java.util.LinkedHashSet;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 import java.util.TreeSet;
13 
14 import org.unicode.cldr.draft.EnumLookup;
15 import org.unicode.cldr.draft.ScriptMetadata;
16 import org.unicode.cldr.draft.ScriptMetadata.IdUsage;
17 import org.unicode.cldr.draft.ScriptMetadata.Info;
18 import org.unicode.cldr.draft.ScriptMetadata.Shaping;
19 import org.unicode.cldr.draft.ScriptMetadata.Trinary;
20 import org.unicode.cldr.util.CLDRConfig;
21 import org.unicode.cldr.util.CLDRFile;
22 import org.unicode.cldr.util.Containment;
23 import org.unicode.cldr.util.StandardCodes;
24 import org.unicode.cldr.util.With;
25 import org.unicode.cldr.util.XPathParts;
26 
27 import com.ibm.icu.dev.util.CollectionUtilities;
28 import com.ibm.icu.impl.Relation;
29 import com.ibm.icu.impl.Row;
30 import com.ibm.icu.lang.UCharacter;
31 import com.ibm.icu.lang.UProperty;
32 import com.ibm.icu.lang.UScript;
33 import com.ibm.icu.text.UTF16;
34 import com.ibm.icu.text.UnicodeSet;
35 import com.ibm.icu.util.VersionInfo;
36 
37 public class TestScriptMetadata extends TestFmwkPlus {
38     private static final VersionInfo ICU_UNICODE_VERSION = UCharacter.getUnicodeVersion();
39     static CLDRConfig testInfo = CLDRConfig.getInstance();
40 
main(String[] args)41     public static void main(String[] args) {
42         new TestScriptMetadata().run(args);
43     }
44 
TestLookup()45     public void TestLookup() {
46         EnumLookup<IdUsage> temp = EnumLookup.of(IdUsage.class);
47         assertEquals("", IdUsage.LIMITED_USE, temp.forString("limited Use"));
48     }
49 
TestScriptOfSample()50     public void TestScriptOfSample() {
51         BitSet bitset = new BitSet();
52         for (String script : new TreeSet<String>(ScriptMetadata.getScripts())) {
53             Info info0 = ScriptMetadata.getInfo(script);
54             int codePointCount = UTF16.countCodePoint(info0.sampleChar);
55             assertEquals("Sample must be single character", 1, codePointCount);
56             if (ICU_UNICODE_VERSION.compareTo(info0.age) >= 0) {
57                 int scriptCode = UScript.getScriptExtensions(
58                     info0.sampleChar.codePointAt(0), bitset);
59                 assertTrue(script + ": The sample character must have a " +
60                     "single, valid script, no ScriptExtensions: " + scriptCode,
61                     scriptCode >= 0);
62             }
63         }
64     }
65 
TestBasic()66     public void TestBasic() {
67         Info info0 = ScriptMetadata.getInfo(UScript.LATIN);
68         if (ScriptMetadata.errors.size() != 0) {
69             if (ScriptMetadata.errors.size() == 1) {
70                 logln("ScriptMetadata initialization errors\t"
71                     + ScriptMetadata.errors.size() + "\t"
72                     + CollectionUtilities.join(ScriptMetadata.errors, "\n"));
73             } else {
74                 errln("ScriptMetadata initialization errors\t"
75                     + ScriptMetadata.errors.size() + "\t"
76                     + CollectionUtilities.join(ScriptMetadata.errors, "\n"));
77             }
78         }
79 
80         // Latin Latn 2 L European Recommended no no no no
81         assertEquals("Latin-rank", 2, info0.rank);
82         assertEquals("Latin-country", "IT", info0.originCountry);
83         assertEquals("Latin-sample", "L", info0.sampleChar);
84         assertEquals("Latin-id usage", ScriptMetadata.IdUsage.RECOMMENDED,
85             info0.idUsage);
86         assertEquals("Latin-ime?", Trinary.NO, info0.ime);
87         assertEquals("Latin-lb letters?", Trinary.NO, info0.lbLetters);
88         assertEquals("Latin-rtl?", Trinary.NO, info0.rtl);
89         assertEquals("Latin-shaping", Shaping.MIN, info0.shapingReq);
90         assertEquals("Latin-density", 1, info0.density);
91         assertEquals("Latin-Case", Trinary.YES, info0.hasCase);
92 
93         info0 = ScriptMetadata.getInfo(UScript.HEBREW);
94         assertEquals("Arabic-rtl", Trinary.YES, info0.rtl);
95         assertEquals("Arabic-shaping", Shaping.NO, info0.shapingReq);
96         assertEquals("Arabic-Case", Trinary.NO, info0.hasCase);
97     }
98 
99     @SuppressWarnings("deprecation")
TestScripts()100     public void TestScripts() {
101         UnicodeSet temp = new UnicodeSet();
102         Set<String> missingScripts = new TreeSet<String>();
103         Relation<IdUsage, String> map = Relation.of(
104             new EnumMap<IdUsage, Set<String>>(IdUsage.class),
105             LinkedHashSet.class);
106         for (int i = UScript.COMMON; i < UScript.CODE_LIMIT; ++i) {
107             Info info = ScriptMetadata.getInfo(i);
108             if (info != null) {
109                 map.put(info.idUsage,
110                     UScript.getName(i) + "\t(" + UScript.getShortName(i)
111                         + ")\t" + info);
112             } else {
113                 // There are many script codes that are not "real"; there are no
114                 // Unicode characters for them.
115                 // separate those out.
116                 temp.applyIntPropertyValue(UProperty.SCRIPT, i);
117                 if (temp.size() != 0) { // is real
118                     errln("Missing script metadata for " + UScript.getName(i)
119                         + "\t(" + UScript.getShortName(i));
120                 } else { // is not real
121                     missingScripts.add(UScript.getShortName(i));
122                 }
123             }
124         }
125         for (Entry<IdUsage, String> entry : map.keyValueSet()) {
126             logln("Script metadata found for script:" + entry.getValue());
127         }
128         if (!missingScripts.isEmpty()) {
129             logln("No script metadata for the following scripts (no Unicode characters defined): "
130                 + missingScripts.toString());
131         }
132     }
133 
134     // lifted from ShowLanguages
getEnglishTypes(String type, int code, StandardCodes sc, CLDRFile english)135     private static Set<String> getEnglishTypes(String type, int code, StandardCodes sc, CLDRFile english) {
136         Set<String> result = new HashSet<String>(sc.getSurveyToolDisplayCodes(type));
137         for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext();) {
138             XPathParts parts = XPathParts.getFrozenInstance(it.next());
139             String newType = parts.getAttributeValue(-1, "type");
140             if (!result.contains(newType)) {
141                 result.add(newType);
142             }
143         }
144         return result;
145     }
146 
147     // lifted from ShowLanguages
getScriptsToShow(StandardCodes sc, CLDRFile english)148     private static Set<String> getScriptsToShow(StandardCodes sc,
149         CLDRFile english) {
150         return getEnglishTypes("script", CLDRFile.SCRIPT_NAME, sc, english);
151     }
152 
TestShowLanguages()153     public void TestShowLanguages() {
154         // lifted from ShowLanguages - this is what ShowLanguages tried to do.
155         StandardCodes sc = testInfo.getStandardCodes();
156         CLDRFile english = testInfo.getEnglish();
157         Set<String> bads = new TreeSet<String>();
158         UnicodeSet temp = new UnicodeSet();
159         for (String s : getScriptsToShow(sc, english)) {
160             if (ScriptMetadata.getInfo(s) == null) {
161                 // There are many script codes that are not "real"; there are no
162                 // Unicode characters for them.
163                 // separate those out.
164                 temp.applyIntPropertyValue(UProperty.SCRIPT,
165                     UScript.getCodeFromName(s));
166                 if (temp.size() != 0) { // is real
167                     bads.add(s);
168                 }
169             }
170         }
171         if (!bads.isEmpty()) {
172             errln("No metadata for scripts: " + bads.toString());
173         }
174     }
175 
TestGeographicGrouping()176     public void TestGeographicGrouping() {
177         CLDRFile english = testInfo.getEnglish();
178         Set<Row.R3<IdUsage, String, String>> lines = new TreeSet<Row.R3<IdUsage, String, String>>();
179         Set<String> extras = ScriptMetadata.getExtras();
180         for (Entry<String, Info> sc : ScriptMetadata.iterable()) {
181             String scriptCode = sc.getKey();
182             if (extras.contains(scriptCode)) {
183                 continue;
184             }
185             Info info = sc.getValue();
186             String continent = Containment.getContinent(info.originCountry);
187             String container = !continent.equals("142") ? continent
188                 : Containment.getSubcontinent(info.originCountry);
189 
190             lines.add(Row.of(
191                 info.idUsage,
192                 english.getName(CLDRFile.TERRITORY_NAME, continent),
193                 info.idUsage
194                     + "\t"
195                     + english.getName(CLDRFile.TERRITORY_NAME,
196                         container)
197                     + "\t" + scriptCode + "\t"
198                     + english.getName(CLDRFile.SCRIPT_NAME, scriptCode)));
199         }
200         for (Row.R3<IdUsage, String, String> s : lines) {
201             logln(s.get2());
202         }
203     }
204 
TestScriptCategories()205     public void TestScriptCategories() {
206 
207         // test completeness
208         Set<String> scripts = new TreeSet<String>(ScriptMetadata.getScripts());
209         scripts.removeAll(Arrays.asList("Zinh", "Zyyy", "Zzzz"));
210         logln("All: " + scripts);
211         for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) {
212             logln(x + ": " + x.scripts.toString());
213             scripts.removeAll(x.scripts);
214         }
215         assertEquals("Completeness", Collections.EMPTY_SET, scripts);
216 
217         // test no overlap
218         assertEquals("Overlap", Collections.EMPTY_SET, scripts);
219         for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) {
220             for (ScriptMetadata.Groupings y : ScriptMetadata.Groupings.values()) {
221                 if (y == x)
222                     continue;
223                 assertTrue("overlap",
224                     Collections.disjoint(x.scripts, y.scripts));
225             }
226         }
227 
228         // assertEqualsX(Groupings.EUROPEAN, ScriptCategories.OLD_EUROPEAN);
229         // assertEqualsX(Groupings.MIDDLE_EASTERN,
230         // ScriptCategories.OLD_MIDDLE_EASTERN);
231         // assertEqualsX(Groupings.SOUTH_ASIAN,
232         // ScriptCategories.OLD_SOUTH_ASIAN);
233         // assertEqualsX(Groupings.SOUTHEAST_ASIAN,
234         // ScriptCategories.OLD_SOUTHEAST_ASIAN);
235         // assertEqualsX(Groupings.EAST_ASIAN, ScriptCategories.OLD_EAST_ASIAN);
236         // assertEqualsX(Groupings.AFRICAN, ScriptCategories.OLD_AFRICAN);
237         // assertEqualsX(Groupings.AMERICAN, ScriptCategories.OLD_AMERICAN);
238         //
239         // assertEqualsX("Historic: ", ScriptCategories.HISTORIC_SCRIPTS,
240         // ScriptCategories.OLD_HISTORIC_SCRIPTS);
241         //
242     }
243 
244 //    private void assertEqualsX(Groupings aRaw, Set<String> bRaw) {
245 //        assertEqualsX(aRaw.toString(), aRaw.scripts, bRaw);
246 //    }
247 
assertEqualsX(String title, Set<String> a, Set<String> bRaw)248     public void assertEqualsX(String title, Set<String> a, Set<String> bRaw) {
249         TreeSet<String> b = With.in(bRaw).toCollection(
250             ScriptMetadata.TO_SHORT_SCRIPT, new TreeSet<String>());
251 
252         Set<String> a_b = new TreeSet<String>(a);
253         a_b.removeAll(b);
254         Set<String> b_a = new TreeSet<String>(b);
255         b_a.removeAll(a);
256         assertEquals(title + " New vs Old, ", a_b.toString(), b_a.toString());
257     }
258 
259 }
260