1 package org.unicode.cldr.unittest; 2 3 import java.util.Arrays; 4 import java.util.BitSet; 5 import java.util.Collections; 6 import java.util.EnumMap; 7 import java.util.HashSet; 8 import java.util.Iterator; 9 import java.util.LinkedHashSet; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.TreeSet; 13 14 import org.unicode.cldr.draft.EnumLookup; 15 import org.unicode.cldr.draft.ScriptMetadata; 16 import org.unicode.cldr.draft.ScriptMetadata.IdUsage; 17 import org.unicode.cldr.draft.ScriptMetadata.Info; 18 import org.unicode.cldr.draft.ScriptMetadata.Shaping; 19 import org.unicode.cldr.draft.ScriptMetadata.Trinary; 20 import org.unicode.cldr.util.CLDRConfig; 21 import org.unicode.cldr.util.CLDRFile; 22 import org.unicode.cldr.util.Containment; 23 import org.unicode.cldr.util.StandardCodes; 24 import org.unicode.cldr.util.With; 25 import org.unicode.cldr.util.XPathParts; 26 27 import com.google.common.base.Joiner; 28 import com.ibm.icu.impl.Relation; 29 import com.ibm.icu.impl.Row; 30 import com.ibm.icu.lang.UCharacter; 31 import com.ibm.icu.lang.UProperty; 32 import com.ibm.icu.lang.UScript; 33 import com.ibm.icu.text.UTF16; 34 import com.ibm.icu.text.UnicodeSet; 35 import com.ibm.icu.util.VersionInfo; 36 37 public class TestScriptMetadata extends TestFmwkPlus { 38 private static final VersionInfo ICU_UNICODE_VERSION = UCharacter.getUnicodeVersion(); 39 static CLDRConfig testInfo = CLDRConfig.getInstance(); 40 main(String[] args)41 public static void main(String[] args) { 42 new TestScriptMetadata().run(args); 43 } 44 TestLookup()45 public void TestLookup() { 46 EnumLookup<IdUsage> temp = EnumLookup.of(IdUsage.class); 47 assertEquals("", IdUsage.LIMITED_USE, temp.forString("limited Use")); 48 } 49 TestScriptOfSample()50 public void TestScriptOfSample() { 51 BitSet bitset = new BitSet(); 52 for (String script : new TreeSet<String>(ScriptMetadata.getScripts())) { 53 Info info0 = ScriptMetadata.getInfo(script); 54 int codePointCount = UTF16.countCodePoint(info0.sampleChar); 55 assertEquals("Sample must be single character", 1, codePointCount); 56 if (ICU_UNICODE_VERSION.compareTo(info0.age) >= 0) { 57 int scriptCode = UScript.getScriptExtensions( 58 info0.sampleChar.codePointAt(0), bitset); 59 assertTrue(script + ": The sample character must have a " + 60 "single, valid script, no ScriptExtensions: " + scriptCode, 61 scriptCode >= 0); 62 } 63 } 64 } 65 TestBasic()66 public void TestBasic() { 67 Info info0 = ScriptMetadata.getInfo(UScript.LATIN); 68 if (ScriptMetadata.errors.size() != 0) { 69 if (ScriptMetadata.errors.size() == 1) { 70 logln("ScriptMetadata initialization errors\t" 71 + ScriptMetadata.errors.size() + "\t" 72 + Joiner.on("\n").join(ScriptMetadata.errors)); 73 } else { 74 errln("ScriptMetadata initialization errors\t" 75 + ScriptMetadata.errors.size() + "\t" 76 + Joiner.on("\n").join(ScriptMetadata.errors)); 77 } 78 } 79 80 // Latin Latn 2 L European Recommended no no no no 81 assertEquals("Latin-rank", 2, info0.rank); 82 assertEquals("Latin-country", "IT", info0.originCountry); 83 assertEquals("Latin-sample", "L", info0.sampleChar); 84 assertEquals("Latin-id usage", ScriptMetadata.IdUsage.RECOMMENDED, 85 info0.idUsage); 86 assertEquals("Latin-ime?", Trinary.NO, info0.ime); 87 assertEquals("Latin-lb letters?", Trinary.NO, info0.lbLetters); 88 assertEquals("Latin-rtl?", Trinary.NO, info0.rtl); 89 assertEquals("Latin-shaping", Shaping.MIN, info0.shapingReq); 90 assertEquals("Latin-density", 1, info0.density); 91 assertEquals("Latin-Case", Trinary.YES, info0.hasCase); 92 93 info0 = ScriptMetadata.getInfo(UScript.HEBREW); 94 assertEquals("Arabic-rtl", Trinary.YES, info0.rtl); 95 assertEquals("Arabic-shaping", Shaping.NO, info0.shapingReq); 96 assertEquals("Arabic-Case", Trinary.NO, info0.hasCase); 97 } 98 99 @SuppressWarnings("deprecation") TestScripts()100 public void TestScripts() { 101 UnicodeSet temp = new UnicodeSet(); 102 Set<String> missingScripts = new TreeSet<String>(); 103 Relation<IdUsage, String> map = Relation.of( 104 new EnumMap<IdUsage, Set<String>>(IdUsage.class), 105 LinkedHashSet.class); 106 for (int i = UScript.COMMON; i < UScript.CODE_LIMIT; ++i) { 107 Info info = ScriptMetadata.getInfo(i); 108 if (info != null) { 109 map.put(info.idUsage, 110 UScript.getName(i) + "\t(" + UScript.getShortName(i) 111 + ")\t" + info); 112 } else { 113 // There are many script codes that are not "real"; there are no 114 // Unicode characters for them. 115 // separate those out. 116 temp.applyIntPropertyValue(UProperty.SCRIPT, i); 117 if (temp.size() != 0) { // is real 118 errln("Missing script metadata for " + UScript.getName(i) 119 + "\t(" + UScript.getShortName(i)); 120 } else { // is not real 121 missingScripts.add(UScript.getShortName(i)); 122 } 123 } 124 } 125 for (Entry<IdUsage, String> entry : map.keyValueSet()) { 126 logln("Script metadata found for script:" + entry.getValue()); 127 } 128 if (!missingScripts.isEmpty()) { 129 logln("No script metadata for the following scripts (no Unicode characters defined): " 130 + missingScripts.toString()); 131 } 132 } 133 134 // lifted from ShowLanguages getEnglishTypes(String type, int code, StandardCodes sc, CLDRFile english)135 private static Set<String> getEnglishTypes(String type, int code, StandardCodes sc, CLDRFile english) { 136 Set<String> result = new HashSet<String>(sc.getSurveyToolDisplayCodes(type)); 137 for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext();) { 138 XPathParts parts = XPathParts.getFrozenInstance(it.next()); 139 String newType = parts.getAttributeValue(-1, "type"); 140 if (!result.contains(newType)) { 141 result.add(newType); 142 } 143 } 144 return result; 145 } 146 147 // lifted from ShowLanguages getScriptsToShow(StandardCodes sc, CLDRFile english)148 private static Set<String> getScriptsToShow(StandardCodes sc, 149 CLDRFile english) { 150 return getEnglishTypes("script", CLDRFile.SCRIPT_NAME, sc, english); 151 } 152 TestShowLanguages()153 public void TestShowLanguages() { 154 // lifted from ShowLanguages - this is what ShowLanguages tried to do. 155 StandardCodes sc = testInfo.getStandardCodes(); 156 CLDRFile english = testInfo.getEnglish(); 157 Set<String> bads = new TreeSet<String>(); 158 UnicodeSet temp = new UnicodeSet(); 159 for (String s : getScriptsToShow(sc, english)) { 160 if (ScriptMetadata.getInfo(s) == null) { 161 // There are many script codes that are not "real"; there are no 162 // Unicode characters for them. 163 // separate those out. 164 temp.applyIntPropertyValue(UProperty.SCRIPT, 165 UScript.getCodeFromName(s)); 166 if (temp.size() != 0) { // is real 167 bads.add(s); 168 } 169 } 170 } 171 if (!bads.isEmpty()) { 172 errln("No metadata for scripts: " + bads.toString()); 173 } 174 } 175 TestGeographicGrouping()176 public void TestGeographicGrouping() { 177 CLDRFile english = testInfo.getEnglish(); 178 Set<Row.R3<IdUsage, String, String>> lines = new TreeSet<Row.R3<IdUsage, String, String>>(); 179 Set<String> extras = ScriptMetadata.getExtras(); 180 for (Entry<String, Info> sc : ScriptMetadata.iterable()) { 181 String scriptCode = sc.getKey(); 182 if (extras.contains(scriptCode)) { 183 continue; 184 } 185 Info info = sc.getValue(); 186 String continent = Containment.getContinent(info.originCountry); 187 String container = !continent.equals("142") ? continent 188 : Containment.getSubcontinent(info.originCountry); 189 190 lines.add(Row.of( 191 info.idUsage, 192 english.getName(CLDRFile.TERRITORY_NAME, continent), 193 info.idUsage 194 + "\t" 195 + english.getName(CLDRFile.TERRITORY_NAME, 196 container) 197 + "\t" + scriptCode + "\t" 198 + english.getName(CLDRFile.SCRIPT_NAME, scriptCode))); 199 } 200 for (Row.R3<IdUsage, String, String> s : lines) { 201 logln(s.get2()); 202 } 203 } 204 TestScriptCategories()205 public void TestScriptCategories() { 206 207 // test completeness 208 Set<String> scripts = new TreeSet<String>(ScriptMetadata.getScripts()); 209 scripts.removeAll(Arrays.asList("Zinh", "Zyyy", "Zzzz")); 210 logln("All: " + scripts); 211 for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) { 212 logln(x + ": " + x.scripts.toString()); 213 scripts.removeAll(x.scripts); 214 } 215 assertEquals("Completeness", Collections.EMPTY_SET, scripts); 216 217 // test no overlap 218 assertEquals("Overlap", Collections.EMPTY_SET, scripts); 219 for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) { 220 for (ScriptMetadata.Groupings y : ScriptMetadata.Groupings.values()) { 221 if (y == x) 222 continue; 223 assertTrue("overlap", 224 Collections.disjoint(x.scripts, y.scripts)); 225 } 226 } 227 228 // assertEqualsX(Groupings.EUROPEAN, ScriptCategories.OLD_EUROPEAN); 229 // assertEqualsX(Groupings.MIDDLE_EASTERN, 230 // ScriptCategories.OLD_MIDDLE_EASTERN); 231 // assertEqualsX(Groupings.SOUTH_ASIAN, 232 // ScriptCategories.OLD_SOUTH_ASIAN); 233 // assertEqualsX(Groupings.SOUTHEAST_ASIAN, 234 // ScriptCategories.OLD_SOUTHEAST_ASIAN); 235 // assertEqualsX(Groupings.EAST_ASIAN, ScriptCategories.OLD_EAST_ASIAN); 236 // assertEqualsX(Groupings.AFRICAN, ScriptCategories.OLD_AFRICAN); 237 // assertEqualsX(Groupings.AMERICAN, ScriptCategories.OLD_AMERICAN); 238 // 239 // assertEqualsX("Historic: ", ScriptCategories.HISTORIC_SCRIPTS, 240 // ScriptCategories.OLD_HISTORIC_SCRIPTS); 241 // 242 } 243 244 // private void assertEqualsX(Groupings aRaw, Set<String> bRaw) { 245 // assertEqualsX(aRaw.toString(), aRaw.scripts, bRaw); 246 // } 247 assertEqualsX(String title, Set<String> a, Set<String> bRaw)248 public void assertEqualsX(String title, Set<String> a, Set<String> bRaw) { 249 TreeSet<String> b = With.in(bRaw).toCollection( 250 ScriptMetadata.TO_SHORT_SCRIPT, new TreeSet<String>()); 251 252 Set<String> a_b = new TreeSet<String>(a); 253 a_b.removeAll(b); 254 Set<String> b_a = new TreeSet<String>(b); 255 b_a.removeAll(a); 256 assertEquals(title + " New vs Old, ", a_b.toString(), b_a.toString()); 257 } 258 259 } 260