1 package org.unicode.cldr.util; 2 3 import java.util.Collections; 4 import java.util.Comparator; 5 import java.util.LinkedHashMap; 6 import java.util.Map; 7 import java.util.Set; 8 import java.util.TreeMap; 9 10 import org.unicode.cldr.util.ChainedMap.M3; 11 12 import com.ibm.icu.dev.util.CollectionUtilities; 13 import com.ibm.icu.util.ULocale; 14 15 public enum LanguageGroup { 16 root("und"), germanic("gem"), celtic("cel"), romance("roa"), slavic("sla"), baltic("bat"), indic("inc"), other_indo("ine_001"), dravidian("dra"), uralic( 17 "urj"), cjk("und_Hani"), sino_tibetan("sit"), tai("tai"), austronesian("map"), turkic("trk"), afroasiatic( 18 "afa"), austroasiatic("aav"), niger_congo("nic"), east_sudanic("sdv"), songhay("son"), american("und_019"), art("art"), other("und_001"); 19 20 public final String iso; 21 LanguageGroup(String iso)22 LanguageGroup(String iso) { 23 this.iso = iso; 24 } 25 26 static final Map<ULocale, LanguageGroup> LANGUAGE_GROUP; 27 static final M3<LanguageGroup, ULocale, Integer> GROUP_LANGUAGE = ChainedMap.of(new TreeMap<LanguageGroup, Object>(), new LinkedHashMap<ULocale, Object>(), 28 Integer.class); 29 add(Map<ULocale, LanguageGroup> map, LanguageGroup group, String... baseLanguages)30 private static void add(Map<ULocale, LanguageGroup> map, LanguageGroup group, String... baseLanguages) { 31 Map<ULocale, Integer> soFar = GROUP_LANGUAGE.get(group); 32 int count = soFar == null ? 0 : soFar.size(); 33 for (String s : baseLanguages) { 34 ULocale loc = new ULocale(s); 35 if (map.put(loc, group) != null) { 36 throw new IllegalArgumentException("duplicate: " + s + ", " + group); 37 } 38 ; 39 GROUP_LANGUAGE.put(group, loc, count); 40 ++count; 41 } 42 } 43 44 static { 45 LinkedHashMap<ULocale, LanguageGroup> temp = new LinkedHashMap<>(); 46 LANGUAGE_GROUP = Collections.unmodifiableMap(temp); add(temp, root, "root")47 add(temp, root, "root"); add(temp, germanic, "en", "fy", "nl", "af", "de", "gsw", "wae", "ksh", "lb", "sv", "da", "nb", "nn", "fo", "is", "yi")48 add(temp, germanic, "en", "fy", "nl", "af", "de", "gsw", "wae", "ksh", "lb", "sv", "da", "nb", "nn", "fo", "is", "yi"); add(temp, celtic, "ga", "gd", "cy", "gv", "kw", "br")49 add(temp, celtic, "ga", "gd", "cy", "gv", "kw", "br"); add(temp, romance, "fr", "pt", "gl", "es", "ca", "ast", "it", "rm", "ro")50 add(temp, romance, "fr", "pt", "gl", "es", "ca", "ast", "it", "rm", "ro"); add(temp, slavic, "pl", "cs", "sk", "sl", "hr", "bs", "mk", "sr", "bg", "ru", "be", "uk")51 add(temp, slavic, "pl", "cs", "sk", "sl", "hr", "bs", "mk", "sr", "bg", "ru", "be", "uk"); add(temp, baltic, "lt", "lv")52 add(temp, baltic, "lt", "lv"); add(temp, other_indo, "el", "hy", "sq", "fa", "ps", "os")53 add(temp, other_indo, "el", "hy", "sq", "fa", "ps", "os"); add(temp, indic, "ur", "hi", "gu", "sd", "bn", "as", "ccp", "or", "mr", "ne", "pa", "si")54 add(temp, indic, "ur", "hi", "gu", "sd", "bn", "as", "ccp", "or", "mr", "ne", "pa", "si"); add(temp, dravidian, "ta", "te", "ml", "kn")55 add(temp, dravidian, "ta", "te", "ml", "kn"); add(temp, cjk, "zh", "yue", "ja", "ko")56 add(temp, cjk, "zh", "yue", "ja", "ko"); add(temp, turkic, "tr", "az", "tk", "kk", "ky", "uz", "ug")57 add(temp, turkic, "tr", "az", "tk", "kk", "ky", "uz", "ug"); add(temp, uralic, "hu", "fi", "et", "se", "smn")58 add(temp, uralic, "hu", "fi", "et", "se", "smn"); add(temp, afroasiatic, "ar", "mt", "he", "om", "so", "ha", "am", "tzm", "zgh")59 add(temp, afroasiatic, "ar", "mt", "he", "om", "so", "ha", "am", "tzm", "zgh"); add(temp, tai, "th", "lo")60 add(temp, tai, "th", "lo"); add(temp, austronesian, "id", "ms", "jv", "fil", "haw")61 add(temp, austronesian, "id", "ms", "jv", "fil", "haw"); add(temp, austroasiatic, "vi", "km")62 add(temp, austroasiatic, "vi", "km"); add(temp, niger_congo, "sw", "swc", "yo", "ig", "ff", "sn", "zu")63 add(temp, niger_congo, "sw", "swc", "yo", "ig", "ff", "sn", "zu"); add(temp, other, "ka", "eu", "mn", "naq")64 add(temp, other, "ka", "eu", "mn", "naq"); add(temp, sino_tibetan, "my")65 add(temp, sino_tibetan, "my"); add(temp, afroasiatic, "aa", "kab", "shi", "ssy", "ti")66 add(temp, afroasiatic, "aa", "kab", "shi", "ssy", "ti"); add(temp, american, "chr", "kl", "lkt", "qu")67 add(temp, american, "chr", "kl", "lkt", "qu"); add(temp, art, "eo", "vo", "ia")68 add(temp, art, "eo", "vo", "ia"); add(temp, austronesian, "mg", "to")69 add(temp, austronesian, "mg", "to"); add(temp, east_sudanic, "luo", "mas", "nus", "saq", "teo", "kln")70 add(temp, east_sudanic, "luo", "mas", "nus", "saq", "teo", "kln"); add(temp, indic, "kok", "ks")71 add(temp, indic, "kok", "ks"); add(temp, niger_congo, "agq", "ak", "asa", "bas", "bem", "bez", "bm", "cgg", "dua", "dyo", "ebu", "ee", "ewo", "guz", "jgo", "kam", "ki", "kkj", "ksb", "ksf", "lag", "lg", "ln", "lu", "luy", "mua", "nd", "nnh", "nr", "nyn", "rn", "rof", "rw", "sbp", "sg", "ss", "tn", "ts", "vai", "ve", "dav", "jmc", "kde", "mer", "mgh", "mgo", "nmg", "nso", "rwk", "seh", "vun", "xog", "yav")72 add(temp, niger_congo, "agq", "ak", "asa", "bas", "bem", "bez", "bm", "cgg", "dua", "dyo", "ebu", "ee", "ewo", "guz", "jgo", "kam", "ki", "kkj", "ksb", 73 "ksf", "lag", "lg", "ln", "lu", "luy", "mua", "nd", "nnh", "nr", "nyn", "rn", "rof", "rw", "sbp", "sg", "ss", "tn", "ts", "vai", "ve", "dav", 74 "jmc", "kde", "mer", "mgh", "mgo", "nmg", "nso", "rwk", "seh", "vun", "xog", "yav"); add(temp, romance, "fur", "kea", "mfe")75 add(temp, romance, "fur", "kea", "mfe"); add(temp, sino_tibetan, "bo", "brx", "dz", "ii")76 add(temp, sino_tibetan, "bo", "brx", "dz", "ii"); add(temp, slavic, "dsb", "hsb")77 add(temp, slavic, "dsb", "hsb"); add(temp, songhay, "dje", "khq", "ses", "twq")78 add(temp, songhay, "dje", "khq", "ses", "twq"); add(temp, turkic, "sah")79 add(temp, turkic, "sah"); 80 //GROUP_LANGUAGE.freeze(); 81 } 82 get(ULocale locale)83 public static LanguageGroup get(ULocale locale) { 84 return CldrUtility.ifNull(LANGUAGE_GROUP.get(new ULocale(locale.getLanguage())), LanguageGroup.other); 85 } 86 getExplicit()87 public static Set<ULocale> getExplicit() { 88 return Collections.unmodifiableSet(LANGUAGE_GROUP.keySet()); 89 } 90 getLocales(LanguageGroup group)91 public static Set<ULocale> getLocales(LanguageGroup group) { 92 return Collections.unmodifiableSet(GROUP_LANGUAGE.get(group).keySet()); 93 } 94 95 /** 96 * return position in group, or -1 if in no group 97 * @param locale 98 * @return 99 */ rankInGroup(ULocale locale)100 public static int rankInGroup(ULocale locale) { 101 locale = new ULocale(locale.getLanguage()); 102 LanguageGroup group = LANGUAGE_GROUP.get(locale); 103 if (group == null) { 104 return Integer.MAX_VALUE; 105 } 106 return GROUP_LANGUAGE.get(group).get(locale); 107 } 108 109 public static Comparator<ULocale> COMPARATOR = new Comparator<ULocale>() { 110 @Override 111 public int compare(ULocale o1, ULocale o2) { 112 LanguageGroup group1 = get(o1); 113 LanguageGroup group2 = get(o2); 114 int diff = group1.ordinal() - group2.ordinal(); 115 if (diff != 0) return diff; 116 int r1 = rankInGroup(o1); 117 int r2 = rankInGroup(o2); 118 diff = r1 - r2; 119 return diff != 0 ? diff : o1.compareTo(o2); 120 } 121 }; 122 main(String[] args)123 public static void main(String[] args) { 124 CLDRFile english = CLDRConfig.getInstance().getEnglish(); 125 System.out.print("<supplementalData>\n" 126 + "\t<version number=\"$Revision:$\"/>\n" 127 + "\t<languageGroups>\n"); 128 for (LanguageGroup languageGroup : LanguageGroup.values()) { 129 Set<ULocale> locales = LanguageGroup.getLocales(languageGroup); 130 String englishName = languageGroup.getName(english); 131 System.out.print("\t\t<languageGroup id=\"" + languageGroup.iso 132 + "\" code=\"" + CollectionUtilities.join(locales, ", ") 133 + "\"/>\t<!-- " + englishName + " -->\n"); 134 } 135 System.out.print("\t</languageGroups>" 136 + "\n<supplementalData>\n"); 137 } 138 getName(CLDRFile cldrFile)139 public String getName(CLDRFile cldrFile) { 140 String prefix = ""; 141 LanguageTagParser ltp = new LanguageTagParser().set(iso); 142 switch (ltp.getRegion()) { 143 case "001": 144 if (ltp.getLanguage().equals("und")) { 145 return "Other"; 146 } 147 prefix = "Other "; 148 break; 149 case "": 150 break; 151 default: 152 return cldrFile.getName(CLDRFile.TERRITORY_NAME, ltp.getRegion()); 153 } 154 switch (ltp.getScript()) { 155 case "Hani": 156 return "CJK"; 157 case "": 158 break; 159 default: 160 throw new IllegalArgumentException("Need to fix code: " + ltp.getScript()); 161 } 162 return prefix + cldrFile.getName(ltp.getLanguage()).replace(" [Other]", "").replace(" languages", ""); 163 } 164 165 @Override toString()166 public String toString() { 167 return getName(CLDRConfig.getInstance().getEnglish()); 168 } 169 }