1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.util.Collection; 5 import java.util.HashSet; 6 import java.util.Map; 7 import java.util.Map.Entry; 8 import java.util.Set; 9 import java.util.TreeMap; 10 11 import org.unicode.cldr.util.CLDRConfig; 12 import org.unicode.cldr.util.CLDRFile; 13 import org.unicode.cldr.util.CLDRFile.WinningChoice; 14 import org.unicode.cldr.util.CLDRPaths; 15 import org.unicode.cldr.util.Factory; 16 import org.unicode.cldr.util.Iso639Data; 17 import org.unicode.cldr.util.LanguageTagCanonicalizer; 18 import org.unicode.cldr.util.LanguageTagParser; 19 import org.unicode.cldr.util.SimpleFactory; 20 import org.unicode.cldr.util.StandardCodes; 21 import org.unicode.cldr.util.StandardCodes.LstrField; 22 import org.unicode.cldr.util.StandardCodes.LstrType; 23 import org.unicode.cldr.util.SupplementalDataInfo; 24 25 import com.google.common.collect.ImmutableMap; 26 import com.google.common.collect.ImmutableMultimap; 27 import com.google.common.collect.Multimap; 28 import com.google.common.collect.TreeMultimap; 29 import com.ibm.icu.lang.UScript; 30 import com.ibm.icu.text.UnicodeSet; 31 32 public class DeriveScripts { 33 private static final boolean SHOW = false; 34 35 static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 36 static final SupplementalDataInfo SUP = CONFIG.getSupplementalDataInfo(); 37 static final Multimap<String, String> LANG_TO_SCRIPT; 38 static final Map<String, String> SUPPRESS; 39 40 static { 41 File[] paths = { 42 // new File(CLDRPaths.MAIN_DIRECTORY), 43 // new File(CLDRPaths.SEED_DIRECTORY), 44 new File(CLDRPaths.EXEMPLARS_DIRECTORY) }; 45 final Factory fullCldrFactory = SimpleFactory.make(paths, ".*"); 46 LikelySubtags ls = new LikelySubtags(); 47 LanguageTagParser ltp = new LanguageTagParser(); 48 Set<String> seen = new HashSet<>(); 49 50 Multimap<String, String> langToScript = TreeMultimap.create(); 51 52 Map<String, String> suppress = new TreeMap<>(); 53 final Map<String, Map<LstrField, String>> langToInfo = StandardCodes.getLstregEnumRaw().get(LstrType.language); 54 for (Entry<String, Map<LstrField, String>> entry : langToInfo.entrySet()) { 55 final String suppressValue = entry.getValue().get(LstrField.Suppress_Script); 56 if (suppressValue != null) { 57 final String langCode = entry.getKey(); 58 String likelyScript = ls.getLikelyScript(langCode); 59 if (!likelyScript.equals("Zzzz")) { 60 // if (!suppressValue.equals(likelyScript)) { 61 // System.out.println("#" + langCode + "\tWarning: likely=" + likelyScript + ", suppress=" + suppressValue); 62 // } else { 63 // System.out.println("#" + langCode + "\tSuppress=Likely: " + suppressValue); 64 // } 65 continue; 66 } suppress.put(langCode, suppressValue)67 suppress.put(langCode, suppressValue); 68 } 69 } 70 SUPPRESS = ImmutableMap.copyOf(suppress); 71 72 LanguageTagCanonicalizer canon = new LanguageTagCanonicalizer(); 73 74 for (String file : fullCldrFactory.getAvailable()) { 75 String langScript = ltp.set(file).getLanguage(); 76 if (!file.equals(langScript)) { // skip other variants 77 continue; 78 } 79 // System.out.println(file); 80 // if (!seen.add(lang)) { // add if not present 81 // continue; 82 // } 83 String lang = canon.transform(ltp.getLanguage()); 84 if (lang.equals("root")) { 85 continue; 86 } 87 88 // String likelyScript = ls.getLikelyScript(lang); 89 // if (!likelyScript.equals("Zzzz")) { 90 // continue; 91 // } 92 93 String script = ""; 94 // script = ltp.getScript(); 95 // if (!script.isEmpty()) { 96 // add(langToScript, lang, script); 97 // continue; 98 // } 99 100 CLDRFile cldrFile; 101 try { 102 cldrFile = fullCldrFactory.make(lang, false); 103 } catch(final SimpleFactory.NoSourceDirectoryException nsde) { 104 throw new RuntimeException("Cannot load locale "+ lang+" for " + file 105 + " (canonicalized from " + ltp.getLanguage()+")", nsde); 106 } 107 UnicodeSet exemplars = cldrFile.getExemplarSet("", WinningChoice.WINNING); 108 for (String s : exemplars) { 109 int scriptNum = UScript.getScript(s.codePointAt(0)); 110 if (scriptNum != UScript.COMMON && scriptNum != UScript.INHERITED && scriptNum != UScript.UNKNOWN) { 111 script = UScript.getShortName(scriptNum); 112 break; 113 } 114 } 115 if (!script.isEmpty()) { add(langToScript, lang, script)116 add(langToScript, lang, script); 117 } 118 } 119 LANG_TO_SCRIPT = ImmutableMultimap.copyOf(langToScript); 120 } 121 add(Multimap<String, String> langToScript, String lang, String script)122 private static void add(Multimap<String, String> langToScript, String lang, String script) { 123 if (script != null) { 124 if (langToScript.put(lang, script)) { 125 if (SHOW) System.out.println("# Adding from actual exemplars: " + lang + ", " + script); 126 } 127 } 128 } 129 getLanguageToScript()130 public static Multimap<String, String> getLanguageToScript() { 131 return LANG_TO_SCRIPT; 132 } 133 showLine(String language, String scriptField, String status)134 public static void showLine(String language, String scriptField, String status) { 135 CLDRFile english = CONFIG.getEnglish(); 136 System.out.println(language + ";\t" + scriptField + "\t# " + english.getName(CLDRFile.LANGUAGE_NAME, language) 137 + ";\t" + status 138 + ";\t" + Iso639Data.getScope(language) 139 + ";\t" + Iso639Data.getType(language)); 140 } 141 main(String[] args)142 public static void main(String[] args) { 143 LikelySubtags ls = new LikelySubtags(); 144 CLDRFile english = CONFIG.getEnglish(); 145 int count = 0; 146 147 int i = 0; 148 System.out.println("#From Suppress Script"); 149 for (Entry<String, String> entry : SUPPRESS.entrySet()) { 150 showLine(entry.getKey(), entry.getValue(), "Suppress"); 151 ++i; 152 } 153 System.out.println("#total:\t" + i); 154 i = 0; 155 boolean haveMore = true; 156 157 System.out.println("\n#From Exemplars"); 158 for (int scriptCount = 1; haveMore; ++scriptCount) { 159 haveMore = false; 160 if (scriptCount != 1) { 161 System.out.println("\n#NEEDS RESOLUTION:\t" + scriptCount + " scripts"); 162 } 163 for (Entry<String, Collection<String>> entry : getLanguageToScript().asMap().entrySet()) { 164 Collection<String> scripts = entry.getValue(); 165 final int scriptsSize = scripts.size(); 166 if (scriptsSize != scriptCount) { 167 if (scriptsSize > scriptCount) { 168 haveMore = true; 169 } 170 continue; 171 } 172 173 String lang = entry.getKey(); 174 showLine(lang, scripts.size() == 1 ? scripts.iterator().next() : scripts.toString(), "Exemplars" + (scripts.size() == 1 ? "" : "*")); 175 ++i; 176 String likelyScript = scriptsSize == 1 ? "" : ls.getLikelyScript(lang); 177 System.out.println(++count + "\t" + scriptsSize + "\t" + lang + "\t" + english.getName(lang) 178 + "\t" + scripts + "\t" + likelyScript 179 // + "\t" + script + "\t" + english.getName(CLDRFile.SCRIPT_NAME, script) 180 ); 181 } 182 System.out.println("#total:\t" + i); 183 i = 0; 184 } 185 } 186 getSuppress()187 public static Map<String, String> getSuppress() { 188 return SUPPRESS; 189 } 190 } 191