1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.HashMultimap; 4 import com.google.common.collect.Multimap; 5 import com.google.common.collect.TreeMultimap; 6 import com.ibm.icu.impl.Row.R4; 7 import com.ibm.icu.text.Transform; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import org.unicode.cldr.util.CLDRConfig; 13 import org.unicode.cldr.util.CLDRFile; 14 import org.unicode.cldr.util.StandardCodes; 15 import org.unicode.cldr.util.StandardCodes.LstrField; 16 import org.unicode.cldr.util.StandardCodes.LstrType; 17 import org.unicode.cldr.util.SupplementalDataInfo; 18 19 public class GenerateLanguageMatches { 20 private static final CLDRFile ENGLISH = CLDRConfig.getInstance().getEnglish(); 21 private static final SupplementalDataInfo SDI = 22 CLDRConfig.getInstance().getSupplementalDataInfo(); 23 main(String[] args)24 public static void main(String[] args) { 25 26 Map<String, Map<LstrField, String>> lstr = 27 StandardCodes.getLstregEnumRaw().get(LstrType.language); 28 29 // we will limit to locales that are in CLDR. 30 31 Set<String> locales = CLDRConfig.getInstance().getFullCldrFactory().getAvailableLanguages(); 32 33 // Get the current languageMatch data 34 35 List<R4<String, String, Integer, Boolean>> matchData = 36 SDI.getLanguageMatcherData("written_new"); 37 Multimap<String, String> desiredToSupported = HashMultimap.create(); 38 for (R4<String, String, Integer, Boolean> item : matchData) { 39 desiredToSupported.put(item.get0(), item.get1()); 40 if (!item.get3()) { // if not oneway 41 desiredToSupported.put(item.get1(), item.get0()); 42 } 43 } 44 45 // get the language aliases, since we can suppress those. 46 Set<String> languageAliases = SDI.getLocaleAliasInfo().get("language").keySet(); 47 48 // filter all of the encompassed languages (only use macro languages that are in CLDR). 49 50 Multimap<String, String> macroToEncompassed = TreeMultimap.create(); 51 for (Entry<String, Map<LstrField, String>> localeInfo : lstr.entrySet()) { 52 String locale = localeInfo.getKey(); 53 if (locale.contains("_")) { 54 continue; 55 } 56 // filter out ones with aliases 57 if (languageAliases.contains(locale)) { 58 continue; 59 } 60 61 // we filter to only encompassed languages (those with macro languages) 62 63 Map<LstrField, String> data = localeInfo.getValue(); 64 String macroLanguage = data.get(LstrField.Macrolanguage); 65 if (macroLanguage == null || !locales.contains(macroLanguage)) { 66 continue; 67 } 68 69 // Filter out what is in LanguageInfo already 70 71 if (desiredToSupported.containsEntry(locale, macroLanguage)) { 72 continue; 73 } 74 75 macroToEncompassed.put(macroLanguage, locale); 76 } 77 78 // now print sorted items 79 80 String last = ""; 81 System.out.println( 82 "<!-- START generated by GenerateLanguageMatches.java: don't manually change -->"); 83 for (Entry<String, String> entry : macroToEncompassed.entries()) { 84 String macroLanguage = entry.getKey(); 85 if (macroLanguage.equals("ku")) { 86 continue; // these are problematic, since they shift scripts; also, might be better 87 // to fall back to ar/fa 88 } 89 if (!last.contentEquals(macroLanguage)) { 90 System.out.println("<!-- Encompassed by " + getName(macroLanguage) + " -->"); 91 } 92 String encompassed = entry.getValue(); 93 System.out.println( 94 "\t\t\t<languageMatch desired=\"" 95 + encompassed 96 + "\" supported=\"" 97 + macroLanguage 98 + "\" distance=\"10\" oneway=\"true\"/>\t" 99 + "<!-- " 100 + getName(encompassed) 101 + " -->"); 102 last = macroLanguage; 103 } 104 System.out.println("<!-- END generated by GenerateLanguageMatches.java -->"); 105 } 106 107 static final Transform<String, String> MENU = 108 new Transform<String, String>() { 109 @Override 110 public String transform(@SuppressWarnings("unused") String source) { 111 return "menu"; 112 } 113 }; 114 getName(String lang)115 private static String getName(String lang) { 116 return ENGLISH.getName(CLDRFile.LANGUAGE_NAME, lang, MENU); 117 } 118 } 119