1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.util.Collection; 5 import java.util.HashSet; 6 import java.util.Map; 7 import java.util.Map.Entry; 8 import java.util.Set; 9 import java.util.TreeMap; 10 11 import org.unicode.cldr.util.CLDRConfig; 12 import org.unicode.cldr.util.CLDRFile; 13 import org.unicode.cldr.util.CLDRFile.WinningChoice; 14 import org.unicode.cldr.util.CLDRPaths; 15 import org.unicode.cldr.util.Factory; 16 import org.unicode.cldr.util.Iso639Data; 17 import org.unicode.cldr.util.LanguageTagCanonicalizer; 18 import org.unicode.cldr.util.LanguageTagParser; 19 import org.unicode.cldr.util.SimpleFactory; 20 import org.unicode.cldr.util.StandardCodes; 21 import org.unicode.cldr.util.StandardCodes.LstrField; 22 import org.unicode.cldr.util.StandardCodes.LstrType; 23 import org.unicode.cldr.util.SupplementalDataInfo; 24 25 import com.google.common.collect.ImmutableMap; 26 import com.google.common.collect.ImmutableMultimap; 27 import com.google.common.collect.Multimap; 28 import com.google.common.collect.TreeMultimap; 29 import com.ibm.icu.lang.UScript; 30 import com.ibm.icu.text.UnicodeSet; 31 32 public class DeriveScripts { 33 private static final boolean SHOW = false; 34 35 static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 36 static final SupplementalDataInfo SUP = CONFIG.getSupplementalDataInfo(); 37 static final Multimap<String, String> LANG_TO_SCRIPT; 38 static final Map<String, String> SUPPRESS; 39 40 static { 41 File[] paths = { 42 // new File(CLDRPaths.MAIN_DIRECTORY), 43 // new File(CLDRPaths.SEED_DIRECTORY), 44 new File(CLDRPaths.EXEMPLARS_DIRECTORY) }; 45 final Factory fullCldrFactory = SimpleFactory.make(paths, ".*"); 46 LikelySubtags ls = new LikelySubtags(); 47 LanguageTagParser ltp = new LanguageTagParser(); 48 Set<String> seen = new HashSet<>(); 49 50 Multimap<String, String> langToScript = TreeMultimap.create(); 51 52 Map<String, String> suppress = new TreeMap<>(); 53 final Map<String, Map<LstrField, String>> langToInfo = StandardCodes.getLstregEnumRaw().get(LstrType.language); 54 for (Entry<String, Map<LstrField, String>> entry : langToInfo.entrySet()) { 55 final String suppressValue = entry.getValue().get(LstrField.Suppress_Script); 56 if (suppressValue != null) { 57 final String langCode = entry.getKey(); 58 String likelyScript = ls.getLikelyScript(langCode); 59 if (!likelyScript.equals("Zzzz")) { 60 // if (!suppressValue.equals(likelyScript)) { 61 // System.out.println("#" + langCode + "\tWarning: likely=" + likelyScript + ", suppress=" + suppressValue); 62 // } else { 63 // System.out.println("#" + langCode + "\tSuppress=Likely: " + suppressValue); 64 // } 65 continue; 66 } suppress.put(langCode, suppressValue)67 suppress.put(langCode, suppressValue); 68 } 69 } 70 SUPPRESS = ImmutableMap.copyOf(suppress); 71 72 LanguageTagCanonicalizer canon = new LanguageTagCanonicalizer(); 73 74 for (String file : fullCldrFactory.getAvailable()) { 75 String langScript = ltp.set(file).getLanguage(); 76 if (!file.equals(langScript)) { // skip other variants 77 continue; 78 } 79 // System.out.println(file); 80 // if (!seen.add(lang)) { // add if not present 81 // continue; 82 // } 83 String lang = canon.transform(ltp.getLanguage()); 84 if (lang.equals("root")) { 85 continue; 86 } 87 88 // String likelyScript = ls.getLikelyScript(lang); 89 // if (!likelyScript.equals("Zzzz")) { 90 // continue; 91 // } 92 93 String script = ""; 94 // script = ltp.getScript(); 95 // if (!script.isEmpty()) { 96 // add(langToScript, lang, script); 97 // continue; 98 // } 99 100 CLDRFile cldrFile = fullCldrFactory.make(lang, false); 101 UnicodeSet exemplars = cldrFile.getExemplarSet("", WinningChoice.WINNING); 102 for (String s : exemplars) { 103 int scriptNum = UScript.getScript(s.codePointAt(0)); 104 if (scriptNum != UScript.COMMON && scriptNum != UScript.INHERITED && scriptNum != UScript.UNKNOWN) { 105 script = UScript.getShortName(scriptNum); 106 break; 107 } 108 } 109 if (!script.isEmpty()) { add(langToScript, lang, script)110 add(langToScript, lang, script); 111 } 112 } 113 LANG_TO_SCRIPT = ImmutableMultimap.copyOf(langToScript); 114 } 115 add(Multimap<String, String> langToScript, String lang, String script)116 private static void add(Multimap<String, String> langToScript, String lang, String script) { 117 if (script != null) { 118 if (langToScript.put(lang, script)) { 119 if (SHOW) System.out.println("# Adding from actual exemplars: " + lang + ", " + script); 120 } 121 } 122 } 123 getLanguageToScript()124 public static Multimap<String, String> getLanguageToScript() { 125 return LANG_TO_SCRIPT; 126 } 127 showLine(String language, String scriptField, String status)128 public static void showLine(String language, String scriptField, String status) { 129 CLDRFile english = CONFIG.getEnglish(); 130 System.out.println(language + ";\t" + scriptField + "\t# " + english.getName(CLDRFile.LANGUAGE_NAME, language) 131 + ";\t" + status 132 + ";\t" + Iso639Data.getScope(language) 133 + ";\t" + Iso639Data.getType(language)); 134 } 135 main(String[] args)136 public static void main(String[] args) { 137 LikelySubtags ls = new LikelySubtags(); 138 CLDRFile english = CONFIG.getEnglish(); 139 int count = 0; 140 141 int i = 0; 142 System.out.println("#From Suppress Script"); 143 for (Entry<String, String> entry : SUPPRESS.entrySet()) { 144 showLine(entry.getKey(), entry.getValue(), "Suppress"); 145 ++i; 146 } 147 System.out.println("#total:\t" + i); 148 i = 0; 149 boolean haveMore = true; 150 151 System.out.println("\n#From Exemplars"); 152 for (int scriptCount = 1; haveMore; ++scriptCount) { 153 haveMore = false; 154 if (scriptCount != 1) { 155 System.out.println("\n#NEEDS RESOLUTION:\t" + scriptCount + " scripts"); 156 } 157 for (Entry<String, Collection<String>> entry : getLanguageToScript().asMap().entrySet()) { 158 Collection<String> scripts = entry.getValue(); 159 final int scriptsSize = scripts.size(); 160 if (scriptsSize != scriptCount) { 161 if (scriptsSize > scriptCount) { 162 haveMore = true; 163 } 164 continue; 165 } 166 167 String lang = entry.getKey(); 168 showLine(lang, scripts.size() == 1 ? scripts.iterator().next() : scripts.toString(), "Exemplars" + (scripts.size() == 1 ? "" : "*")); 169 ++i; 170 String likelyScript = scriptsSize == 1 ? "" : ls.getLikelyScript(lang); 171 System.out.println(++count + "\t" + scriptsSize + "\t" + lang + "\t" + english.getName(lang) 172 + "\t" + scripts + "\t" + likelyScript 173 // + "\t" + script + "\t" + english.getName(CLDRFile.SCRIPT_NAME, script) 174 ); 175 } 176 System.out.println("#total:\t" + i); 177 i = 0; 178 } 179 } 180 getSuppress()181 public static Map<String, String> getSuppress() { 182 return SUPPRESS; 183 } 184 } 185