• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.HashMultimap;
4 import com.google.common.collect.Multimap;
5 import com.google.common.collect.TreeMultimap;
6 import com.ibm.icu.impl.Row.R4;
7 import com.ibm.icu.text.Transform;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 import org.unicode.cldr.util.CLDRConfig;
13 import org.unicode.cldr.util.CLDRFile;
14 import org.unicode.cldr.util.StandardCodes;
15 import org.unicode.cldr.util.StandardCodes.LstrField;
16 import org.unicode.cldr.util.StandardCodes.LstrType;
17 import org.unicode.cldr.util.SupplementalDataInfo;
18 
19 public class GenerateLanguageMatches {
20     private static final CLDRFile ENGLISH = CLDRConfig.getInstance().getEnglish();
21     private static final SupplementalDataInfo SDI =
22             CLDRConfig.getInstance().getSupplementalDataInfo();
23 
main(String[] args)24     public static void main(String[] args) {
25 
26         Map<String, Map<LstrField, String>> lstr =
27                 StandardCodes.getLstregEnumRaw().get(LstrType.language);
28 
29         // we will limit to locales that are in CLDR.
30 
31         Set<String> locales = CLDRConfig.getInstance().getFullCldrFactory().getAvailableLanguages();
32 
33         // Get the current languageMatch data
34 
35         List<R4<String, String, Integer, Boolean>> matchData =
36                 SDI.getLanguageMatcherData("written_new");
37         Multimap<String, String> desiredToSupported = HashMultimap.create();
38         for (R4<String, String, Integer, Boolean> item : matchData) {
39             desiredToSupported.put(item.get0(), item.get1());
40             if (!item.get3()) { // if not oneway
41                 desiredToSupported.put(item.get1(), item.get0());
42             }
43         }
44 
45         // get the language aliases, since we can suppress those.
46         Set<String> languageAliases = SDI.getLocaleAliasInfo().get("language").keySet();
47 
48         // filter all of the encompassed languages (only use macro languages that are in CLDR).
49 
50         Multimap<String, String> macroToEncompassed = TreeMultimap.create();
51         for (Entry<String, Map<LstrField, String>> localeInfo : lstr.entrySet()) {
52             String locale = localeInfo.getKey();
53             if (locale.contains("_")) {
54                 continue;
55             }
56             // filter out ones with aliases
57             if (languageAliases.contains(locale)) {
58                 continue;
59             }
60 
61             // we filter to only encompassed languages (those with macro languages)
62 
63             Map<LstrField, String> data = localeInfo.getValue();
64             String macroLanguage = data.get(LstrField.Macrolanguage);
65             if (macroLanguage == null || !locales.contains(macroLanguage)) {
66                 continue;
67             }
68 
69             // Filter out what is in LanguageInfo already
70 
71             if (desiredToSupported.containsEntry(locale, macroLanguage)) {
72                 continue;
73             }
74 
75             macroToEncompassed.put(macroLanguage, locale);
76         }
77 
78         // now print sorted items
79 
80         String last = "";
81         System.out.println(
82                 "<!-- START generated by GenerateLanguageMatches.java: don't manually change -->");
83         for (Entry<String, String> entry : macroToEncompassed.entries()) {
84             String macroLanguage = entry.getKey();
85             if (macroLanguage.equals("ku")) {
86                 continue; // these are problematic, since they shift scripts; also, might be better
87                 // to fall back to ar/fa
88             }
89             if (!last.contentEquals(macroLanguage)) {
90                 System.out.println("<!-- Encompassed by " + getName(macroLanguage) + " -->");
91             }
92             String encompassed = entry.getValue();
93             System.out.println(
94                     "\t\t\t<languageMatch desired=\""
95                             + encompassed
96                             + "\" supported=\""
97                             + macroLanguage
98                             + "\" distance=\"10\" oneway=\"true\"/>\t"
99                             + "<!-- "
100                             + getName(encompassed)
101                             + " -->");
102             last = macroLanguage;
103         }
104         System.out.println("<!-- END generated by GenerateLanguageMatches.java -->");
105     }
106 
107     static final Transform<String, String> MENU =
108             new Transform<String, String>() {
109                 @Override
110                 public String transform(@SuppressWarnings("unused") String source) {
111                     return "menu";
112                 }
113             };
114 
getName(String lang)115     private static String getName(String lang) {
116         return ENGLISH.getName(CLDRFile.LANGUAGE_NAME, lang, MENU);
117     }
118 }
119