1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.ibm.icu.impl.Relation; 5 import com.ibm.icu.impl.Row; 6 import com.ibm.icu.impl.Utility; 7 import com.ibm.icu.lang.UCharacter; 8 import com.ibm.icu.lang.UProperty; 9 import com.ibm.icu.lang.UScript; 10 import com.ibm.icu.text.UTF16; 11 import com.ibm.icu.text.UnicodeSet; 12 import com.ibm.icu.util.VersionInfo; 13 import java.util.Arrays; 14 import java.util.BitSet; 15 import java.util.Collections; 16 import java.util.EnumMap; 17 import java.util.HashSet; 18 import java.util.Iterator; 19 import java.util.LinkedHashSet; 20 import java.util.Map.Entry; 21 import java.util.Set; 22 import java.util.TreeSet; 23 import org.unicode.cldr.draft.EnumLookup; 24 import org.unicode.cldr.draft.ScriptMetadata; 25 import org.unicode.cldr.draft.ScriptMetadata.IdUsage; 26 import org.unicode.cldr.draft.ScriptMetadata.Info; 27 import org.unicode.cldr.draft.ScriptMetadata.Shaping; 28 import org.unicode.cldr.draft.ScriptMetadata.Trinary; 29 import org.unicode.cldr.util.CLDRConfig; 30 import org.unicode.cldr.util.CLDRFile; 31 import org.unicode.cldr.util.Containment; 32 import org.unicode.cldr.util.StandardCodes; 33 import org.unicode.cldr.util.With; 34 import org.unicode.cldr.util.XPathParts; 35 36 public class TestScriptMetadata extends TestFmwkPlus { 37 private static final VersionInfo ICU_UNICODE_VERSION = UCharacter.getUnicodeVersion(); 38 static CLDRConfig testInfo = CLDRConfig.getInstance(); 39 main(String[] args)40 public static void main(String[] args) { 41 new TestScriptMetadata().run(args); 42 } 43 TestLookup()44 public void TestLookup() { 45 EnumLookup<IdUsage> temp = EnumLookup.of(IdUsage.class); 46 assertEquals("", IdUsage.LIMITED_USE, temp.forString("limited Use")); 47 } 48 TestScriptOfSample()49 public void TestScriptOfSample() { 50 BitSet bitset = new BitSet(); 51 for (String script : new TreeSet<>(ScriptMetadata.getScripts())) { 52 Info info0 = ScriptMetadata.getInfo(script); 53 int codePointCount = UTF16.countCodePoint(info0.sampleChar); 54 assertEquals("Sample must be single character", 1, codePointCount); 55 if (ICU_UNICODE_VERSION.compareTo(info0.age) >= 0) { 56 int scriptCode = 57 UScript.getScriptExtensions(info0.sampleChar.codePointAt(0), bitset); 58 assertTrue( 59 script 60 + ", " 61 + Utility.hex(info0.sampleChar) 62 + ": The sample character must have a " 63 + "single, valid script, no ScriptExtensions: " 64 + scriptCode, 65 scriptCode >= 0); 66 } 67 } 68 } 69 TestBasic()70 public void TestBasic() { 71 Info info0 = ScriptMetadata.getInfo(UScript.LATIN); 72 if (ScriptMetadata.errors.size() != 0) { 73 if (ScriptMetadata.errors.size() == 1) { 74 logln( 75 "ScriptMetadata initialization errors\t" 76 + ScriptMetadata.errors.size() 77 + "\t" 78 + Joiner.on("\n").join(ScriptMetadata.errors)); 79 } else { 80 errln( 81 "ScriptMetadata initialization errors\t" 82 + ScriptMetadata.errors.size() 83 + "\t" 84 + Joiner.on("\n").join(ScriptMetadata.errors)); 85 } 86 } 87 88 // Latin Latn 2 L European Recommended no no no no 89 assertEquals("Latin-rank", 2, info0.rank); 90 assertEquals("Latin-country", "IT", info0.originCountry); 91 assertEquals("Latin-sample", "L", info0.sampleChar); 92 assertEquals("Latin-id usage", ScriptMetadata.IdUsage.RECOMMENDED, info0.idUsage); 93 assertEquals("Latin-ime?", Trinary.NO, info0.ime); 94 assertEquals("Latin-lb letters?", Trinary.NO, info0.lbLetters); 95 assertEquals("Latin-rtl?", Trinary.NO, info0.rtl); 96 assertEquals("Latin-shaping", Shaping.MIN, info0.shapingReq); 97 assertEquals("Latin-density", 1, info0.density); 98 assertEquals("Latin-Case", Trinary.YES, info0.hasCase); 99 100 info0 = ScriptMetadata.getInfo(UScript.HEBREW); 101 assertEquals("Arabic-rtl", Trinary.YES, info0.rtl); 102 assertEquals("Arabic-shaping", Shaping.NO, info0.shapingReq); 103 assertEquals("Arabic-Case", Trinary.NO, info0.hasCase); 104 } 105 106 @SuppressWarnings("deprecation") TestScripts()107 public void TestScripts() { 108 UnicodeSet temp = new UnicodeSet(); 109 Set<String> missingScripts = new TreeSet<>(); 110 Relation<IdUsage, String> map = 111 Relation.of(new EnumMap<IdUsage, Set<String>>(IdUsage.class), LinkedHashSet.class); 112 for (int i = UScript.COMMON; i < UScript.CODE_LIMIT; ++i) { 113 Info info = ScriptMetadata.getInfo(i); 114 if (info != null) { 115 map.put( 116 info.idUsage, 117 UScript.getName(i) + "\t(" + UScript.getShortName(i) + ")\t" + info); 118 } else { 119 // There are many script codes that are not "real"; there are no 120 // Unicode characters for them. 121 // separate those out. 122 temp.applyIntPropertyValue(UProperty.SCRIPT, i); 123 if (temp.size() != 0) { // is real 124 errln( 125 "Missing script metadata for " 126 + UScript.getName(i) 127 + "\t(" 128 + UScript.getShortName(i)); 129 } else { // is not real 130 missingScripts.add(UScript.getShortName(i)); 131 } 132 } 133 } 134 for (Entry<IdUsage, String> entry : map.keyValueSet()) { 135 logln("Script metadata found for script:" + entry.getValue()); 136 } 137 if (!missingScripts.isEmpty()) { 138 logln( 139 "No script metadata for the following scripts (no Unicode characters defined): " 140 + missingScripts.toString()); 141 } 142 } 143 144 // lifted from ShowLanguages getEnglishTypes( String type, int code, StandardCodes sc, CLDRFile english)145 private static Set<String> getEnglishTypes( 146 String type, int code, StandardCodes sc, CLDRFile english) { 147 Set<String> result = new HashSet<>(sc.getSurveyToolDisplayCodes(type)); 148 for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext(); ) { 149 XPathParts parts = XPathParts.getFrozenInstance(it.next()); 150 String newType = parts.getAttributeValue(-1, "type"); 151 if (!result.contains(newType)) { 152 result.add(newType); 153 } 154 } 155 return result; 156 } 157 158 // lifted from ShowLanguages getScriptsToShow(StandardCodes sc, CLDRFile english)159 private static Set<String> getScriptsToShow(StandardCodes sc, CLDRFile english) { 160 return getEnglishTypes("script", CLDRFile.SCRIPT_NAME, sc, english); 161 } 162 TestShowLanguages()163 public void TestShowLanguages() { 164 // lifted from ShowLanguages - this is what ShowLanguages tried to do. 165 StandardCodes sc = StandardCodes.make(); 166 CLDRFile english = testInfo.getEnglish(); 167 Set<String> bads = new TreeSet<>(); 168 UnicodeSet temp = new UnicodeSet(); 169 for (String s : getScriptsToShow(sc, english)) { 170 if (ScriptMetadata.getInfo(s) == null) { 171 // There are many script codes that are not "real"; there are no 172 // Unicode characters for them. 173 // separate those out. 174 temp.applyIntPropertyValue(UProperty.SCRIPT, UScript.getCodeFromName(s)); 175 if (temp.size() != 0) { // is real 176 bads.add(s); 177 } 178 } 179 } 180 if (!bads.isEmpty()) { 181 errln("No metadata for scripts: " + bads.toString()); 182 } 183 } 184 TestGeographicGrouping()185 public void TestGeographicGrouping() { 186 CLDRFile english = testInfo.getEnglish(); 187 Set<Row.R3<IdUsage, String, String>> lines = new TreeSet<>(); 188 Set<String> extras = ScriptMetadata.getExtras(); 189 for (Entry<String, Info> sc : ScriptMetadata.iterable()) { 190 String scriptCode = sc.getKey(); 191 if (extras.contains(scriptCode)) { 192 continue; 193 } 194 Info info = sc.getValue(); 195 String continent = Containment.getContinent(info.originCountry); 196 String container = 197 !continent.equals("142") 198 ? continent 199 : Containment.getSubcontinent(info.originCountry); 200 201 lines.add( 202 Row.of( 203 info.idUsage, 204 english.getName(CLDRFile.TERRITORY_NAME, continent), 205 info.idUsage 206 + "\t" 207 + english.getName(CLDRFile.TERRITORY_NAME, container) 208 + "\t" 209 + scriptCode 210 + "\t" 211 + english.getName(CLDRFile.SCRIPT_NAME, scriptCode))); 212 } 213 for (Row.R3<IdUsage, String, String> s : lines) { 214 logln(s.get2()); 215 } 216 } 217 TestScriptCategories()218 public void TestScriptCategories() { 219 220 // test completeness 221 Set<String> scripts = new TreeSet<>(ScriptMetadata.getScripts()); 222 scripts.removeAll(Arrays.asList("Zinh", "Zyyy", "Zzzz")); 223 logln("All: " + scripts); 224 for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) { 225 logln(x + ": " + x.scripts.toString()); 226 scripts.removeAll(x.scripts); 227 } 228 assertEquals("Completeness", Collections.EMPTY_SET, scripts); 229 230 // test no overlap 231 assertEquals("Overlap", Collections.EMPTY_SET, scripts); 232 for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) { 233 for (ScriptMetadata.Groupings y : ScriptMetadata.Groupings.values()) { 234 if (y == x) continue; 235 assertTrue("overlap", Collections.disjoint(x.scripts, y.scripts)); 236 } 237 } 238 239 // assertEqualsX(Groupings.EUROPEAN, ScriptCategories.OLD_EUROPEAN); 240 // assertEqualsX(Groupings.MIDDLE_EASTERN, 241 // ScriptCategories.OLD_MIDDLE_EASTERN); 242 // assertEqualsX(Groupings.SOUTH_ASIAN, 243 // ScriptCategories.OLD_SOUTH_ASIAN); 244 // assertEqualsX(Groupings.SOUTHEAST_ASIAN, 245 // ScriptCategories.OLD_SOUTHEAST_ASIAN); 246 // assertEqualsX(Groupings.EAST_ASIAN, ScriptCategories.OLD_EAST_ASIAN); 247 // assertEqualsX(Groupings.AFRICAN, ScriptCategories.OLD_AFRICAN); 248 // assertEqualsX(Groupings.AMERICAN, ScriptCategories.OLD_AMERICAN); 249 // 250 // assertEqualsX("Historic: ", ScriptCategories.HISTORIC_SCRIPTS, 251 // ScriptCategories.OLD_HISTORIC_SCRIPTS); 252 // 253 } 254 255 // private void assertEqualsX(Groupings aRaw, Set<String> bRaw) { 256 // assertEqualsX(aRaw.toString(), aRaw.scripts, bRaw); 257 // } 258 assertEqualsX(String title, Set<String> a, Set<String> bRaw)259 public void assertEqualsX(String title, Set<String> a, Set<String> bRaw) { 260 TreeSet<String> b = 261 With.in(bRaw).toCollection(ScriptMetadata.TO_SHORT_SCRIPT, new TreeSet<String>()); 262 263 Set<String> a_b = new TreeSet<>(a); 264 a_b.removeAll(b); 265 Set<String> b_a = new TreeSet<>(b); 266 b_a.removeAll(a); 267 assertEquals(title + " New vs Old, ", a_b.toString(), b_a.toString()); 268 } 269 } 270