1 package org.unicode.cldr.util; 2 3 import java.util.Collections; 4 import java.util.Comparator; 5 import java.util.LinkedHashMap; 6 import java.util.Map; 7 import java.util.Set; 8 import java.util.TreeMap; 9 10 import org.unicode.cldr.util.ChainedMap.M3; 11 12 import com.google.common.base.Joiner; 13 import com.ibm.icu.util.ULocale; 14 15 public enum LanguageGroup { 16 root("und"), germanic("gem"), celtic("cel"), romance("roa"), slavic("sla"), baltic("bat"), 17 indic("inc"), iranian("ira"), other_indo("ine_001"), caucasian("cau"), dravidian("dra"), 18 uralic("urj"), cjk("und_Hani"), sino_tibetan("sit"), tai("tai"), austronesian("map"), 19 turkic("trk"), afroasiatic("afa"), austroasiatic("aav"), niger_congo("nic"), 20 east_sudanic("sdv"), songhay("son"), american("und_019"), art("art"), other("und_001"); 21 22 public final String iso; 23 LanguageGroup(String iso)24 LanguageGroup(String iso) { 25 this.iso = iso; 26 } 27 28 static final Map<ULocale, LanguageGroup> LANGUAGE_GROUP; 29 static final M3<LanguageGroup, ULocale, Integer> GROUP_LANGUAGE = ChainedMap.of(new TreeMap<LanguageGroup, Object>(), new LinkedHashMap<ULocale, Object>(), 30 Integer.class); 31 add(Map<ULocale, LanguageGroup> map, LanguageGroup group, String... baseLanguages)32 private static void add(Map<ULocale, LanguageGroup> map, LanguageGroup group, String... baseLanguages) { 33 Map<ULocale, Integer> soFar = GROUP_LANGUAGE.get(group); 34 int count = soFar == null ? 0 : soFar.size(); 35 for (String s : baseLanguages) { 36 ULocale loc = new ULocale(s); 37 if (map.put(loc, group) != null) { 38 throw new IllegalArgumentException("duplicate: " + s + ", " + group); 39 } 40 GROUP_LANGUAGE.put(group, loc, count); 41 ++count; 42 } 43 } 44 45 static { 46 LinkedHashMap<ULocale, LanguageGroup> temp = new LinkedHashMap<>(); 47 LANGUAGE_GROUP = Collections.unmodifiableMap(temp); add(temp, root, "root")48 add(temp, root, "root"); add(temp, germanic, "en", "fy", "nl", "af", "de", "gsw", "wae", "ksh", "lb", "sv", "da", "nb", "nn", "fo", "is", "yi", "nds")49 add(temp, germanic, "en", "fy", "nl", "af", "de", "gsw", "wae", "ksh", "lb", "sv", "da", 50 "nb", "nn", "fo", "is", "yi", "nds"); add(temp, celtic, "ga", "gd", "cy", "gv", "kw", "br")51 add(temp, celtic, "ga", "gd", "cy", "gv", "kw", "br"); add(temp, romance, "fr", "pt", "gl", "es", "ca", "ast", "it", "rm", "ro", "fur", "an", "co", "oc", "sc", "scn", "wa")52 add(temp, romance, "fr", "pt", "gl", "es", "ca", "ast", "it", "rm", "ro", "fur", "an", 53 "co", "oc", "sc", "scn", "wa"); add(temp, slavic, "pl", "cs", "sk", "sl", "hr", "bs", "mk", "sr", "bg", "ru", "be", "uk", "dsb", "hsb", "cu", "szl")54 add(temp, slavic, "pl", "cs", "sk", "sl", "hr", "bs", "mk", "sr", "bg", "ru", "be", "uk", 55 "dsb", "hsb", "cu", "szl"); add(temp, baltic, "lt", "lv", "prg")56 add(temp, baltic, "lt", "lv", "prg"); add(temp, indic, "ur", "hi", "gu", "sd", "bn", "as", "ccp", "or", "mr", "ne", "pa", "si", "kok", "ks", "mai", "doi", "dv", "sa", "trw")57 add(temp, indic, "ur", "hi", "gu", "sd", "bn", "as", "ccp", "or", "mr", "ne", "pa", "si", 58 "kok", "ks", "mai", "doi", "dv", "sa", "trw"); add(temp, iranian, "fa", "ps", "ku", "os", "ckb", "lrc", "mzn", "tg", "bgn", "sdh")59 add(temp, iranian, "fa", "ps", "ku", "os", "ckb", "lrc", "mzn", "tg", "bgn", "sdh"); add(temp, other_indo, "el", "hy", "sq")60 add(temp, other_indo, "el", "hy", "sq"); add(temp, dravidian, "ta", "te", "ml", "kn")61 add(temp, dravidian, "ta", "te", "ml", "kn"); add(temp, cjk, "zh", "yue", "ja", "ko")62 add(temp, cjk, "zh", "yue", "ja", "ko"); add(temp, turkic, "tr", "az", "tk", "kk", "ky", "uz", "ug", "sah", "tt", "ba", "cv")63 add(temp, turkic, "tr", "az", "tk", "kk", "ky", "uz", "ug", "sah", "tt", "ba", "cv"); add(temp, uralic, "hu", "fi", "et", "se", "smn", "myv", "sma", "smj", "sms")64 add(temp, uralic, "hu", "fi", "et", "se", "smn", "myv", "sma", "smj", "sms"); add(temp, afroasiatic, "ar", "mt", "he", "om", "so", "ha", "am", "tzm", "zgh", "aa", "kab", "shi", "ssy", "ti", "byn", "gez", "sid", "syr", "tig", "wal")65 add(temp, afroasiatic, "ar", "mt", "he", "om", "so", "ha", "am", "tzm", "zgh", "aa", "kab", 66 "shi", "ssy", "ti", "byn", "gez", "sid", "syr", "tig", "wal"); add(temp, tai, "th", "lo", "blt")67 add(temp, tai, "th", "lo", "blt"); add(temp, austronesian, "id", "ms", "jv", "fil", "haw", "mg", "to", "ceb", "mi", "su", "trv")68 add(temp, austronesian, "id", "ms", "jv", "fil", "haw", "mg", "to", "ceb", "mi", "su", 69 "trv"); add(temp, austroasiatic, "vi", "km", "sat")70 add(temp, austroasiatic, "vi", "km", "sat"); add(temp, niger_congo, "sw", "swc", "yo", "ig", "ff", "sn", "zu", "wo", "xh", "agq", "ak", "asa", "bas", "bem", "bez", "bm", "cgg", "dua", "dyo", "ebu", "ee", "ewo", "guz", "jgo", "kam", "ki", "kkj", "ksb", "ksf", "lag", "lg", "ln", "lu", "luy", "mua", "nd", "nnh", "nr", "nyn", "rn", "rof", "rw", "sbp", "sg", "ss", "tn", "ts", "vai", "ve", "dav", "jmc", "kde", "mer", "mgh", "mgo", "nmg", "nso", "rwk", "seh", "vun", "xog", "yav", "bss", "cch", "gaa", "kaj", "kcg", "ken", "kpe", "nqo", "ny", "st")71 add(temp, niger_congo, "sw", "swc", "yo", "ig", "ff", "sn", "zu", "wo", "xh", "agq", "ak", 72 "asa", "bas", "bem", "bez", "bm", "cgg", "dua", "dyo", "ebu", "ee", "ewo", "guz", 73 "jgo", "kam", "ki", "kkj", "ksb", "ksf", "lag", "lg", "ln", "lu", "luy", "mua", "nd", 74 "nnh", "nr", "nyn", "rn", "rof", "rw", "sbp", "sg", "ss", "tn", "ts", "vai", "ve", 75 "dav", "jmc", "kde", "mer", "mgh", "mgo", "nmg", "nso", "rwk", "seh", "vun", "xog", 76 "yav", "bss", "cch", "gaa", "kaj", "kcg", "ken", "kpe", "nqo", "ny", "st"); add(temp, american, "chr", "kl", "lkt", "qu", "arn", "cad", "cic", "gn", "iu", "moh", "mus", "nv", "osa", "quc", "nci")77 add(temp, american, "chr", "kl", "lkt", "qu", "arn", "cad", "cic", "gn", "iu", "moh", 78 "mus", "nv", "osa", "quc", "nci"); add(temp, east_sudanic, "luo", "mas", "nus", "saq", "teo", "kln")79 add(temp, east_sudanic, "luo", "mas", "nus", "saq", "teo", "kln"); add(temp, sino_tibetan, "my", "bo", "brx", "dz", "ii", "mni")80 add(temp, sino_tibetan, "my", "bo", "brx", "dz", "ii", "mni"); add(temp, songhay, "dje", "khq", "ses", "twq")81 add(temp, songhay, "dje", "khq", "ses", "twq"); add(temp, caucasian, "ka", "ce")82 add(temp, caucasian, "ka", "ce"); add(temp, other, "eu", "mn", "naq", "pcm", "kea", "mfe", "wbp")83 add(temp, other, "eu", "mn", "naq", "pcm", "kea", "mfe", "wbp"); add(temp, art, "eo", "vo", "ia", "io", "jbo")84 add(temp, art, "eo", "vo", "ia", "io", "jbo"); 85 //GROUP_LANGUAGE.freeze(); 86 } 87 get(ULocale locale)88 public static LanguageGroup get(ULocale locale) { 89 return CldrUtility.ifNull(LANGUAGE_GROUP.get(new ULocale(locale.getLanguage())), LanguageGroup.other); 90 } 91 getExplicit()92 public static Set<ULocale> getExplicit() { 93 return Collections.unmodifiableSet(LANGUAGE_GROUP.keySet()); 94 } 95 getLocales(LanguageGroup group)96 public static Set<ULocale> getLocales(LanguageGroup group) { 97 return Collections.unmodifiableSet(GROUP_LANGUAGE.get(group).keySet()); 98 } 99 100 /** 101 * return position in group, or -1 if in no group 102 * @param locale 103 * @return 104 */ rankInGroup(ULocale locale)105 public static int rankInGroup(ULocale locale) { 106 locale = new ULocale(locale.getLanguage()); 107 LanguageGroup group = LANGUAGE_GROUP.get(locale); 108 if (group == null) { 109 return Integer.MAX_VALUE; 110 } 111 return GROUP_LANGUAGE.get(group).get(locale); 112 } 113 114 public static Comparator<ULocale> COMPARATOR = new Comparator<ULocale>() { 115 @Override 116 public int compare(ULocale o1, ULocale o2) { 117 LanguageGroup group1 = get(o1); 118 LanguageGroup group2 = get(o2); 119 int diff = group1.ordinal() - group2.ordinal(); 120 if (diff != 0) return diff; 121 int r1 = rankInGroup(o1); 122 int r2 = rankInGroup(o2); 123 diff = r1 - r2; 124 return diff != 0 ? diff : o1.compareTo(o2); 125 } 126 }; 127 main(String[] args)128 public static void main(String[] args) { 129 CLDRFile english = CLDRConfig.getInstance().getEnglish(); 130 System.out.print("<supplementalData>\n" 131 + "\t<version number=\"$Revision:$\"/>\n" 132 + "\t<languageGroups>\n"); 133 for (LanguageGroup languageGroup : LanguageGroup.values()) { 134 Set<ULocale> locales = LanguageGroup.getLocales(languageGroup); 135 String englishName = languageGroup.getName(english); 136 System.out.print("\t\t<languageGroup id=\"" + languageGroup.iso 137 + "\" code=\"" + Joiner.on(", ").join(locales) 138 + "\"/>\t<!-- " + englishName + " -->\n"); 139 } 140 System.out.print("\t</languageGroups>" 141 + "\n<supplementalData>\n"); 142 } 143 getName(CLDRFile cldrFile)144 public String getName(CLDRFile cldrFile) { 145 String prefix = ""; 146 LanguageTagParser ltp = new LanguageTagParser().set(iso); 147 switch (ltp.getRegion()) { 148 case "001": 149 if (ltp.getLanguage().equals("und")) { 150 return "Other"; 151 } 152 prefix = "Other "; 153 break; 154 case "": 155 break; 156 default: 157 return cldrFile.getName(CLDRFile.TERRITORY_NAME, ltp.getRegion()); 158 } 159 switch (ltp.getScript()) { 160 case "Hani": 161 return "CJK"; 162 case "": 163 break; 164 default: 165 throw new IllegalArgumentException("Need to fix code: " + ltp.getScript()); 166 } 167 return prefix + cldrFile.getName(ltp.getLanguage()).replace(" [Other]", "").replace(" languages", ""); 168 } 169 170 @Override toString()171 public String toString() { 172 return getName(CLDRConfig.getInstance().getEnglish()); 173 } 174 }