1 package org.unicode.cldr.unittest; 2 3 import java.util.Arrays; 4 import java.util.BitSet; 5 import java.util.Collections; 6 import java.util.EnumMap; 7 import java.util.HashSet; 8 import java.util.Iterator; 9 import java.util.LinkedHashSet; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.TreeSet; 13 14 import org.unicode.cldr.draft.EnumLookup; 15 import org.unicode.cldr.draft.ScriptMetadata; 16 import org.unicode.cldr.draft.ScriptMetadata.IdUsage; 17 import org.unicode.cldr.draft.ScriptMetadata.Info; 18 import org.unicode.cldr.draft.ScriptMetadata.Shaping; 19 import org.unicode.cldr.draft.ScriptMetadata.Trinary; 20 import org.unicode.cldr.util.CLDRConfig; 21 import org.unicode.cldr.util.CLDRFile; 22 import org.unicode.cldr.util.Containment; 23 import org.unicode.cldr.util.StandardCodes; 24 import org.unicode.cldr.util.With; 25 import org.unicode.cldr.util.XPathParts; 26 27 import com.ibm.icu.dev.util.CollectionUtilities; 28 import com.ibm.icu.impl.Relation; 29 import com.ibm.icu.impl.Row; 30 import com.ibm.icu.lang.UCharacter; 31 import com.ibm.icu.lang.UProperty; 32 import com.ibm.icu.lang.UScript; 33 import com.ibm.icu.text.UTF16; 34 import com.ibm.icu.text.UnicodeSet; 35 import com.ibm.icu.util.VersionInfo; 36 37 public class TestScriptMetadata extends TestFmwkPlus { 38 private static final VersionInfo ICU_UNICODE_VERSION = UCharacter.getUnicodeVersion(); 39 static CLDRConfig testInfo = CLDRConfig.getInstance(); 40 main(String[] args)41 public static void main(String[] args) { 42 new TestScriptMetadata().run(args); 43 } 44 TestLookup()45 public void TestLookup() { 46 EnumLookup<IdUsage> temp = EnumLookup.of(IdUsage.class); 47 assertEquals("", IdUsage.LIMITED_USE, temp.forString("limited Use")); 48 } 49 TestScriptOfSample()50 public void TestScriptOfSample() { 51 BitSet bitset = new BitSet(); 52 for (String script : new TreeSet<String>(ScriptMetadata.getScripts())) { 53 Info info0 = ScriptMetadata.getInfo(script); 54 int codePointCount = UTF16.countCodePoint(info0.sampleChar); 55 assertEquals("Sample must be single character", 1, codePointCount); 56 if (ICU_UNICODE_VERSION.compareTo(info0.age) >= 0) { 57 int scriptCode = UScript.getScriptExtensions( 58 info0.sampleChar.codePointAt(0), bitset); 59 assertTrue(script + ": The sample character must have a " + 60 "single, valid script, no ScriptExtensions: " + scriptCode, 61 scriptCode >= 0); 62 } 63 } 64 } 65 TestBasic()66 public void TestBasic() { 67 Info info0 = ScriptMetadata.getInfo(UScript.LATIN); 68 if (ScriptMetadata.errors.size() != 0) { 69 if (ScriptMetadata.errors.size() == 1) { 70 logln("ScriptMetadata initialization errors\t" 71 + ScriptMetadata.errors.size() + "\t" 72 + CollectionUtilities.join(ScriptMetadata.errors, "\n")); 73 } else { 74 errln("ScriptMetadata initialization errors\t" 75 + ScriptMetadata.errors.size() + "\t" 76 + CollectionUtilities.join(ScriptMetadata.errors, "\n")); 77 } 78 } 79 80 // Latin Latn 2 L European Recommended no no no no 81 assertEquals("Latin-rank", 2, info0.rank); 82 assertEquals("Latin-country", "IT", info0.originCountry); 83 assertEquals("Latin-sample", "L", info0.sampleChar); 84 assertEquals("Latin-id usage", ScriptMetadata.IdUsage.RECOMMENDED, 85 info0.idUsage); 86 assertEquals("Latin-ime?", Trinary.NO, info0.ime); 87 assertEquals("Latin-lb letters?", Trinary.NO, info0.lbLetters); 88 assertEquals("Latin-rtl?", Trinary.NO, info0.rtl); 89 assertEquals("Latin-shaping", Shaping.MIN, info0.shapingReq); 90 assertEquals("Latin-density", 1, info0.density); 91 assertEquals("Latin-Case", Trinary.YES, info0.hasCase); 92 93 info0 = ScriptMetadata.getInfo(UScript.HEBREW); 94 assertEquals("Arabic-rtl", Trinary.YES, info0.rtl); 95 assertEquals("Arabic-shaping", Shaping.NO, info0.shapingReq); 96 assertEquals("Arabic-Case", Trinary.NO, info0.hasCase); 97 } 98 99 @SuppressWarnings("deprecation") TestScripts()100 public void TestScripts() { 101 UnicodeSet temp = new UnicodeSet(); 102 Set<String> missingScripts = new TreeSet<String>(); 103 Relation<IdUsage, String> map = Relation.of( 104 new EnumMap<IdUsage, Set<String>>(IdUsage.class), 105 LinkedHashSet.class); 106 for (int i = UScript.COMMON; i < UScript.CODE_LIMIT; ++i) { 107 Info info = ScriptMetadata.getInfo(i); 108 if (info != null) { 109 map.put(info.idUsage, 110 UScript.getName(i) + "\t(" + UScript.getShortName(i) 111 + ")\t" + info); 112 } else { 113 // There are many script codes that are not "real"; there are no 114 // Unicode characters for them. 115 // separate those out. 116 temp.applyIntPropertyValue(UProperty.SCRIPT, i); 117 if (temp.size() != 0) { // is real 118 errln("Missing script metadata for " + UScript.getName(i) 119 + "\t(" + UScript.getShortName(i)); 120 } else { // is not real 121 missingScripts.add(UScript.getShortName(i)); 122 } 123 } 124 } 125 for (Entry<IdUsage, String> entry : map.keyValueSet()) { 126 logln("Script metadata found for script:" + entry.getValue()); 127 } 128 if (!missingScripts.isEmpty()) { 129 logln("No script metadata for the following scripts (no Unicode characters defined): " 130 + missingScripts.toString()); 131 } 132 } 133 134 // lifted from ShowLanguages getEnglishTypes(String type, int code, StandardCodes sc, CLDRFile english)135 private static Set<String> getEnglishTypes(String type, int code, 136 StandardCodes sc, CLDRFile english) { 137 Set<String> result = new HashSet<String>( 138 sc.getSurveyToolDisplayCodes(type)); 139 XPathParts parts = new XPathParts(); 140 for (Iterator<String> it = english.getAvailableIterator(code); it 141 .hasNext();) { 142 parts.set(it.next()); 143 String newType = parts.getAttributeValue(-1, "type"); 144 if (!result.contains(newType)) { 145 result.add(newType); 146 } 147 } 148 return result; 149 } 150 151 // lifted from ShowLanguages getScriptsToShow(StandardCodes sc, CLDRFile english)152 private static Set<String> getScriptsToShow(StandardCodes sc, 153 CLDRFile english) { 154 return getEnglishTypes("script", CLDRFile.SCRIPT_NAME, sc, english); 155 } 156 TestShowLanguages()157 public void TestShowLanguages() { 158 // lifted from ShowLanguages - this is what ShowLanguages tried to do. 159 StandardCodes sc = testInfo.getStandardCodes(); 160 CLDRFile english = testInfo.getEnglish(); 161 Set<String> bads = new TreeSet<String>(); 162 UnicodeSet temp = new UnicodeSet(); 163 for (String s : getScriptsToShow(sc, english)) { 164 if (ScriptMetadata.getInfo(s) == null) { 165 // There are many script codes that are not "real"; there are no 166 // Unicode characters for them. 167 // separate those out. 168 temp.applyIntPropertyValue(UProperty.SCRIPT, 169 UScript.getCodeFromName(s)); 170 if (temp.size() != 0) { // is real 171 bads.add(s); 172 } 173 } 174 } 175 if (!bads.isEmpty()) { 176 errln("No metadata for scripts: " + bads.toString()); 177 } 178 } 179 TestGeographicGrouping()180 public void TestGeographicGrouping() { 181 CLDRFile english = testInfo.getEnglish(); 182 Set<Row.R3<IdUsage, String, String>> lines = new TreeSet<Row.R3<IdUsage, String, String>>(); 183 Set<String> extras = ScriptMetadata.getExtras(); 184 for (Entry<String, Info> sc : ScriptMetadata.iterable()) { 185 String scriptCode = sc.getKey(); 186 if (extras.contains(scriptCode)) { 187 continue; 188 } 189 Info info = sc.getValue(); 190 String continent = Containment.getContinent(info.originCountry); 191 String container = !continent.equals("142") ? continent 192 : Containment.getSubcontinent(info.originCountry); 193 194 lines.add(Row.of( 195 info.idUsage, 196 english.getName(CLDRFile.TERRITORY_NAME, continent), 197 info.idUsage 198 + "\t" 199 + english.getName(CLDRFile.TERRITORY_NAME, 200 container) 201 + "\t" + scriptCode + "\t" 202 + english.getName(CLDRFile.SCRIPT_NAME, scriptCode))); 203 } 204 for (Row.R3<IdUsage, String, String> s : lines) { 205 logln(s.get2()); 206 } 207 } 208 TestScriptCategories()209 public void TestScriptCategories() { 210 211 // test completeness 212 Set<String> scripts = new TreeSet<String>(ScriptMetadata.getScripts()); 213 scripts.removeAll(Arrays.asList("Zinh", "Zyyy", "Zzzz")); 214 logln("All: " + scripts); 215 for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) { 216 logln(x + ": " + x.scripts.toString()); 217 scripts.removeAll(x.scripts); 218 } 219 assertEquals("Completeness", Collections.EMPTY_SET, scripts); 220 221 // test no overlap 222 assertEquals("Overlap", Collections.EMPTY_SET, scripts); 223 for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) { 224 for (ScriptMetadata.Groupings y : ScriptMetadata.Groupings.values()) { 225 if (y == x) 226 continue; 227 assertTrue("overlap", 228 Collections.disjoint(x.scripts, y.scripts)); 229 } 230 } 231 232 // assertEqualsX(Groupings.EUROPEAN, ScriptCategories.OLD_EUROPEAN); 233 // assertEqualsX(Groupings.MIDDLE_EASTERN, 234 // ScriptCategories.OLD_MIDDLE_EASTERN); 235 // assertEqualsX(Groupings.SOUTH_ASIAN, 236 // ScriptCategories.OLD_SOUTH_ASIAN); 237 // assertEqualsX(Groupings.SOUTHEAST_ASIAN, 238 // ScriptCategories.OLD_SOUTHEAST_ASIAN); 239 // assertEqualsX(Groupings.EAST_ASIAN, ScriptCategories.OLD_EAST_ASIAN); 240 // assertEqualsX(Groupings.AFRICAN, ScriptCategories.OLD_AFRICAN); 241 // assertEqualsX(Groupings.AMERICAN, ScriptCategories.OLD_AMERICAN); 242 // 243 // assertEqualsX("Historic: ", ScriptCategories.HISTORIC_SCRIPTS, 244 // ScriptCategories.OLD_HISTORIC_SCRIPTS); 245 // 246 } 247 248 // private void assertEqualsX(Groupings aRaw, Set<String> bRaw) { 249 // assertEqualsX(aRaw.toString(), aRaw.scripts, bRaw); 250 // } 251 assertEqualsX(String title, Set<String> a, Set<String> bRaw)252 public void assertEqualsX(String title, Set<String> a, Set<String> bRaw) { 253 TreeSet<String> b = With.in(bRaw).toCollection( 254 ScriptMetadata.TO_SHORT_SCRIPT, new TreeSet<String>()); 255 256 Set<String> a_b = new TreeSet<String>(a); 257 a_b.removeAll(b); 258 Set<String> b_a = new TreeSet<String>(b); 259 b_a.removeAll(a); 260 assertEquals(title + " New vs Old, ", a_b.toString(), b_a.toString()); 261 } 262 263 } 264