1 package org.unicode.cldr.tool; 2 3 import java.util.Collections; 4 import java.util.HashMap; 5 import java.util.HashSet; 6 import java.util.LinkedHashMap; 7 import java.util.LinkedHashSet; 8 import java.util.List; 9 import java.util.Locale; 10 import java.util.Map; 11 import java.util.Map.Entry; 12 import java.util.Set; 13 import java.util.TreeMap; 14 import java.util.TreeSet; 15 16 import org.unicode.cldr.util.Builder; 17 import org.unicode.cldr.util.CLDRConfig; 18 import org.unicode.cldr.util.CldrUtility; 19 import org.unicode.cldr.util.LanguageTagParser; 20 import org.unicode.cldr.util.StandardCodes; 21 import org.unicode.cldr.util.StringIterables; 22 23 import com.ibm.icu.impl.Row.R2; 24 import com.ibm.icu.util.ULocale; 25 26 public class LanguageCodeConverter { 27 private static Map<String, String> languageNameToCode = new TreeMap<>(); 28 private static Set<String> exceptionCodes = new TreeSet<>(); 29 private static Set<String> parseErrors = new LinkedHashSet<>(); 30 31 private static Map<String, R2<List<String>, String>> languageAliases = CLDRConfig.getInstance().getSupplementalDataInfo().getLocaleAliasInfo() 32 .get("language"); 33 34 /** 35 * Public only for testing. 36 * 37 * @internal 38 */ 39 public static final Map<String, String> GOOGLE_CLDR = Builder.with(new LinkedHashMap<String, String>()) // preserve order 40 .put("iw", "he") 41 .put("jw", "jv") 42 .put("no", "nb") 43 .put("tl", "fil") 44 .put("pt-BR", "pt") 45 .put("xx-bork", "x_bork") 46 .put("xx-elmer", "x_elmer") 47 .put("xx-hacker", "x_hacker") 48 .put("xx-pirate", "x_pirate") 49 .put("xx-klingon", "tlh") 50 .put("zh-CN", "zh") 51 .put("zh-TW", "zh_Hant") 52 .put("zh-HK", "zh_Hant_HK") 53 .put("sit-NP", "lif") 54 .put("ut", "und") 55 .put("un", "und") 56 .put("xx", "und") 57 58 // .put("sh", "fil") 59 .freeze(); 60 61 /** 62 * Public only for testing. 63 * 64 * @internal 65 */ 66 public static final Map<String, String> CLDR_GOOGLE = Builder.with(new HashMap<String, String>()) 67 .putAllTransposed(GOOGLE_CLDR) 68 .freeze(); 69 70 /** 71 * Public only for testing. 72 * 73 * @internal 74 */ 75 public static final Map<String, String> EXTRA_SCRIPTS = Builder.with(new HashMap<String, String>()) 76 .on("crs", "pcm", "tlh").put("Latn") 77 .freeze(); 78 79 static { 80 // Reads the CLDR copy of 81 // http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry 82 Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes.getLStreg(); 83 Map<String, Map<String, String>> languages = lstreg.get("language"); 84 Set<String> validCodes = new HashSet<>(); 85 86 for (Entry<String, Map<String, String>> codeInfo : languages.entrySet()) { 87 String code = codeInfo.getKey(); 88 R2<List<String>, String> replacement = languageAliases.get(code); 89 // Returns "sh" -> <{"sr_Latn"}, reason> 90 if (replacement != null) { 91 List<String> replacements = replacement.get0(); 92 if (replacements.size() != 1) { 93 continue; 94 } 95 code = replacements.get(0); 96 if (code.contains("_")) { 97 continue; 98 } 99 } 100 // if (languageAliases.containsKey(code)) { 101 // continue; 102 // } 103 final Map<String, String> info = codeInfo.getValue(); 104 String deprecated = info.get("Deprecated"); 105 if (deprecated != null) { 106 continue; 107 } 108 String name = info.get("Description"); 109 if (name.equals("Private use")) { 110 continue; 111 } 112 validCodes.add(code); 113 if (name.contains(StandardCodes.DESCRIPTION_SEPARATOR)) { 114 for (String namePart : name.split(StandardCodes.DESCRIPTION_SEPARATOR)) { 115 addNameToCode("lstr", code, namePart); 116 } 117 } else { 118 addNameToCode("lstr", code, name); 119 } 120 } 121 122 // CLDRFile english; // = testInfo.getEnglish(); 123 for (String code : validCodes) { 124 String icuName = ULocale.getDisplayName(code, "en"); 125 addNameToCode("cldr", code, icuName); 126 // if (languageAliases.containsKey(code)) { 127 // continue; 128 // } 129 // String cldrName = english.getName("language", code); 130 // if (cldrName != null && !cldrName.equals("private-use")) { 131 // addNameToCode("cldr", code, cldrName); 132 // } 133 } 134 // add exceptions 135 LanguageTagParser ltp = new LanguageTagParser(); 136 for (String line : StringIterables.in(CldrUtility.getUTF8Data("external/alternate_language_names.txt"))) { 137 String[] parts = CldrUtility.cleanSemiFields(line); 138 if (parts == null || parts.length == 0) continue; 139 String code = parts[0]; 140 if (!validCodes.contains(code)) { 141 if (code.equals("*OMIT")) { 142 parseErrors.add("Skipping " + line); 143 continue; 144 } 145 String base = ltp.set(code).getLanguage(); 146 if (!validCodes.contains(base)) { 147 R2<List<String>, String> alias = languageAliases.get(base); 148 if (alias != null) { 149 code = alias.get0().get(0); 150 } else { 151 parseErrors.add("Skipping " + line); 152 continue; 153 } 154 } 155 } toUnderbarLocale(code)156 exceptionCodes.add(toUnderbarLocale(code)); 157 if (parts.length < 2) { 158 continue; 159 } 160 String name = parts[1]; 161 if (parts.length > 2) { 162 name += ";" + parts[2]; // HACK 163 } 164 addNameToCode("exception", code, name); 165 } 166 for (String cldr : GOOGLE_CLDR.values()) { 167 String goodCode = toUnderbarLocale(cldr); 168 exceptionCodes.add(goodCode); 169 } 170 languageNameToCode = Collections.unmodifiableMap(languageNameToCode); 171 exceptionCodes = Collections.unmodifiableSet(exceptionCodes); 172 parseErrors = Collections.unmodifiableSet(parseErrors); 173 } 174 addNameToCode(final String type, final String code, String name)175 private static void addNameToCode(final String type, final String code, String name) { 176 if (code.equals("mru") && name.equals("mru")) { 177 // mru=Mono (Cameroon) 178 // mro=Mru 179 // Ignore the CLDR mapping of the code to itself, 180 // to avoid clobbering the mapping of the real name Mru to the real code mro. 181 return; 182 } 183 name = name.toLowerCase(Locale.ENGLISH); 184 String oldCode = languageNameToCode.get(name); 185 if (oldCode != null) { 186 if (!oldCode.equals(code)) { 187 parseErrors.add("Name Collision! " + type + ": " + name + " <" + oldCode + ", " + code + ">"); 188 } else { 189 return; 190 } 191 } 192 languageNameToCode.put(name, code); 193 } 194 toGoogleLocaleId(String localeId)195 public static String toGoogleLocaleId(String localeId) { 196 // TODO fix to do languages, etc. field by field 197 localeId = localeId.replace("-", "_"); 198 String result = CLDR_GOOGLE.get(localeId); 199 result = result == null ? localeId : result; 200 return result.replace("_", "-"); 201 } 202 fromGoogleLocaleId(String localeId)203 public static String fromGoogleLocaleId(String localeId) { 204 localeId = localeId.replace("_", "-"); 205 // TODO fix to do languages, etc. field by field 206 String result = GOOGLE_CLDR.get(localeId); 207 result = result == null ? localeId : result; 208 return result.replace("-", "_"); 209 } 210 toUnderbarLocale(String localeId)211 public static String toUnderbarLocale(String localeId) { 212 return localeId.replace("-", "_"); 213 } 214 toHyphenLocale(String localeId)215 public static String toHyphenLocale(String localeId) { 216 return localeId.replace("_", "-"); 217 } 218 getCodeForName(String languageName)219 public static String getCodeForName(String languageName) { 220 return languageNameToCode.get(languageName.toLowerCase(Locale.ENGLISH)); 221 } 222 getExceptionCodes()223 public static Set<String> getExceptionCodes() { 224 return exceptionCodes; 225 } 226 getParseErrors()227 public static Set<String> getParseErrors() { 228 return parseErrors; 229 } 230 getLanguageNameToCode()231 public static Map<String, String> getLanguageNameToCode() { 232 return languageNameToCode; 233 } 234 235 } 236