• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.ibm.icu.text.BreakIterator;
4 import com.ibm.icu.text.Collator;
5 import com.ibm.icu.text.NumberFormat;
6 import com.ibm.icu.text.RuleBasedCollator;
7 import com.ibm.icu.text.UTF16;
8 import com.ibm.icu.text.UnicodeSet;
9 import com.ibm.icu.util.ULocale;
10 import java.io.PrintWriter;
11 import java.util.ArrayList;
12 import java.util.Arrays;
13 import java.util.Comparator;
14 import java.util.HashSet;
15 import java.util.Iterator;
16 import java.util.List;
17 import java.util.Map;
18 import java.util.Random;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.TreeSet;
22 import org.unicode.cldr.draft.FileUtilities;
23 import org.unicode.cldr.util.ArrayComparator;
24 import org.unicode.cldr.util.CLDRFile;
25 import org.unicode.cldr.util.CLDRPaths;
26 import org.unicode.cldr.util.Factory;
27 import org.unicode.cldr.util.Level;
28 import org.unicode.cldr.util.Organization;
29 import org.unicode.cldr.util.StandardCodes;
30 import org.unicode.cldr.util.SupplementalDataInfo;
31 import org.unicode.cldr.util.XPathParts;
32 
33 public class GenerateG2xG2 {
34     static CLDRFile english;
35     static CLDRFile root;
36 
main(String[] args)37     public static void main(String[] args) throws Exception {
38         if (showLocales(-1)) return;
39         // showCollator();
40 
41         String sourceLanguage = "G5";
42         String targetLanguage = "G5";
43         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
44         english = cldrFactory.make("en", true);
45         root = cldrFactory.make("root", true);
46         StandardCodes sc = StandardCodes.make();
47         Map<Organization, Map<String, Level>> type_code_value = sc.getLocaleTypes();
48         Set<String> sourceSet = new TreeSet<>();
49         Set<String> targetLanguageSet = new TreeSet<>();
50         targetLanguageSet.add("no");
51         addPriority("G2", "nn");
52         addPriority("G2", "no");
53         targetLanguageSet.add("nn");
54         Set<String> targetScriptSet = new TreeSet<>();
55         Set<String> targetRegionSet = new TreeSet<>();
56         Set<String> targetTZSet = new TreeSet<>();
57         Set<String> targetCurrencySet = new TreeSet<>();
58         for (Organization type : type_code_value.keySet()) {
59             Map<String, Level> code_value = type_code_value.get(type);
60             if (!type.equals(Organization.ibm)) continue;
61             for (String locale : code_value.keySet()) {
62                 if (locale.equals("no")) continue;
63                 String priority = code_value.get(locale).toString();
64                 ULocale ulocale = new ULocale(locale);
65                 String language = ulocale.getLanguage();
66                 String script = ulocale.getScript();
67                 String territory = ulocale.getCountry();
68                 if (sourceLanguage.compareTo(priority) >= 0) {
69                     if (language.equals("no")) language = "nn";
70                     locale = new ULocale(language, script).toString();
71                     sourceSet.add(locale);
72                     addPriority(priority, locale);
73                 }
74                 if (targetLanguage.compareTo(priority) >= 0) {
75                     targetLanguageSet.add(language);
76                     targetScriptSet.add(script);
77                     targetRegionSet.add(territory);
78                     addPriority(priority, language);
79                     addPriority(priority, script);
80                     addPriority("G4", territory); // will normally be overridden
81                 }
82             }
83         }
84         // set the priorities for territories
85         Map<String, List<String>> worldBankInfo = sc.getWorldBankInfo();
86         Set<String> euCodes =
87                 new HashSet<>(
88                         Arrays.asList(
89                                 new String[] {
90                                     "AT", "BE", "CY", "CZ", "DK", "EE", "FI", "FR", "DE", "GR",
91                                     "HU", "IT", "LV", "LT", "LU", "MT", "NL", "PL", "PT", "SI",
92                                     "ES", "SE", "GB"
93                                 }));
94         for (String countryCode : worldBankInfo.keySet()) {
95             if (priorityMap.get(countryCode) == null)
96                 continue; // only use ones we already have: defaults G4
97             List<String> values = worldBankInfo.get(countryCode);
98             double gdp = Double.parseDouble(values.get(1));
99             if (gdp >= 1E+13) addPriority("G0", countryCode);
100             else if (gdp >= 1E+12) addPriority("G1", countryCode);
101             else if (gdp >= 1E+11) addPriority("G2", countryCode);
102             else if (euCodes.contains(countryCode)) addPriority("G3", countryCode);
103             // else if (gdp >= 1E+10) addPriority("G4", countryCode);
104         }
105         // fill in the currencies, and TZs for the countries that have multiple zones
106         Map<String, Set<String>> c2z = sc.getCountryToZoneSet();
107         SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
108         Set<String> mainTimeZones = supplementalDataInfo.getCanonicalTimeZones();
109         for (Iterator<String> it = targetRegionSet.iterator(); it.hasNext(); ) {
110             String country = it.next();
111             String priority = priorityMap.get(country);
112             for (Iterator<String> it2 = getCurrency(country).iterator(); it2.hasNext(); ) {
113                 String currency = it2.next();
114                 targetCurrencySet.add(currency);
115                 addPriority(priority, currency);
116             }
117             Set<String> s = c2z.get(country);
118             if (s.size() == 1) continue;
119             for (Iterator<String> it2 = s.iterator(); it2.hasNext(); ) {
120                 String tzid = it2.next();
121                 if (!mainTimeZones.contains(tzid)) continue;
122                 targetTZSet.add(tzid);
123                 addPriority(priority, tzid);
124             }
125         }
126         // print out missing translations.
127         PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "G2xG2.txt");
128         // show priorities
129         Comparator<String> comp = new UTF16.StringComparator();
130         @SuppressWarnings("unchecked")
131         Set<String[]> priority_set =
132                 new TreeSet<String[]>(new ArrayComparator(new Comparator[] {comp, comp, comp}));
133         for (Iterator<String> it = priorityMap.keySet().iterator(); it.hasNext(); ) {
134             String code = it.next();
135             String priority = priorityMap.get(code);
136             if (priority == null) continue;
137             int type = getType(code);
138             // if (type != CLDRFile.TERRITORY_NAME) continue;
139             priority_set.add(new String[] {priority, type + "", code});
140         }
141         String lastPriority = "";
142         // String lastType = "";
143         for (Iterator<String[]> it = priority_set.iterator(); it.hasNext(); ) {
144             String[] items = it.next();
145             if (!lastPriority.equals(items[0])) {
146                 lastPriority = items[0];
147                 pw.println();
148                 // pw.println(lastPriority);
149             }
150             String typeName = getTypeName(items[2]);
151             pw.println(
152                     lastPriority
153                             + "\t"
154                             + typeName
155                             + "\t"
156                             + items[2]
157                             + "\t("
158                             + getItemName(english, items[2])
159                             + ")");
160         }
161         pw.flush();
162         // print out missing translations.
163         for (Iterator<String> it = sourceSet.iterator(); it.hasNext(); ) {
164             String sourceLocale = it.next();
165             System.out.print(sourceLocale + ", ");
166             CLDRFile sourceData = cldrFactory.make(sourceLocale, true);
167             pw.println();
168             String title = sourceLocale;
169             checkItems(pw, title, sourceData, CLDRFile.LANGUAGE_NAME, targetLanguageSet);
170             checkItems(pw, title, sourceData, CLDRFile.SCRIPT_NAME, targetScriptSet);
171             checkItems(pw, title, sourceData, CLDRFile.TERRITORY_NAME, targetRegionSet);
172             checkItems(pw, title, sourceData, CLDRFile.CURRENCY_NAME, targetCurrencySet);
173             // only check timezones if exemplar characters don't include a-z
174             String v = sourceData.getStringValue("//ldml/characters/exemplarCharacters");
175             UnicodeSet exemplars = new UnicodeSet(v);
176             if (exemplars.contains('a', 'z')) continue;
177             checkItems(pw, title, sourceData, CLDRFile.TZ_EXEMPLAR, targetTZSet);
178         }
179         pw.println();
180         pw.println("Sizes - incremental");
181         pw.println();
182         int runningTotalCount = 0;
183         int runningMissingCount = 0;
184         NumberFormat percent = NumberFormat.getPercentInstance();
185         percent.setMinimumFractionDigits(1);
186         NumberFormat nf = NumberFormat.getInstance();
187         nf.setGroupingUsed(true);
188         nf.setMinimumFractionDigits(0);
189         for (Iterator<String> it = totalMap.keySet().iterator(); it.hasNext(); ) {
190             String key = it.next();
191             Totals t = totalMap.get(key);
192             runningTotalCount = t.totalCount;
193             runningMissingCount = t.missingCount;
194             pw.println(
195                     key.substring(0, 2)
196                             + "\t"
197                             + key.substring(2)
198                             + "\t"
199                             + runningMissingCount
200                             + "\t"
201                             + runningTotalCount
202                             + "\t"
203                             + percent.format(runningMissingCount / (0.0 + runningTotalCount)));
204         }
205         pw.close();
206         System.out.println();
207         System.out.println("Done");
208     }
209 
showLocales(int choice)210     private static boolean showLocales(int choice) throws Exception {
211         ULocale desiredDisplayLocale = ULocale.ENGLISH;
212         Set<String> testSet = new TreeSet<>();
213         StandardCodes sc = StandardCodes.make();
214         {
215             Set<String> countries = sc.getGoodAvailableCodes("territory");
216             Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
217             english = cldrFactory.make("en", true);
218             for (Iterator<String> it = countries.iterator(); it.hasNext(); ) {
219                 String territory = it.next();
220                 if (territory.charAt(0) < 'A') continue;
221                 String locale = "haw-" + territory;
222                 System.out.print(locale + ": " + english.getName(locale) + ", ");
223             }
224             if (true) return true;
225         }
226 
227         if (choice == -1) {
228 
229             testSet.addAll(sc.getGoodAvailableCodes("currency"));
230             Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
231             english = cldrFactory.make("en", false);
232             for (Iterator it = testSet.iterator(); it.hasNext(); ) {
233                 String country = (String) it.next();
234                 System.out.println(
235                         country + "\t" + english.getName(CLDRFile.CURRENCY_NAME, country));
236             }
237             return true;
238         } else if (choice == 0) { // get available
239             ULocale[] list = BreakIterator.getAvailableULocales();
240             for (int i = 0; i < list.length; ++i) {
241                 testSet.add(list[i].toString());
242             }
243         } else {
244             boolean USE_3066bis = choice == 2;
245             // produce random list of RFC3066 language tags
246             Set<String> legacy = sc.getAvailableCodes("legacy");
247             List<String> language_subtags = new ArrayList<>(sc.getGoodAvailableCodes("language"));
248             List<String> script_subtags = new ArrayList<>(sc.getGoodAvailableCodes("script"));
249             List<String> region_subtags = new ArrayList<>(sc.getGoodAvailableCodes("territory"));
250             for (String possibility : legacy) {
251                 System.out.println(possibility);
252                 if (new ULocale(possibility).getScript().length() != 0) {
253                     System.out.println("\tAdding");
254                     testSet.add(possibility);
255                 }
256             }
257             if (!USE_3066bis)
258                 for (Iterator it = region_subtags.iterator(); it.hasNext(); ) {
259                     String possibility = (String) it.next();
260                     if (possibility.compareTo("A") < 0) it.remove();
261                 }
262             Random rand = new Random();
263             for (int i = 0; i < 200; ++i) {
264                 int r = rand.nextInt(language_subtags.size());
265                 String result = language_subtags.get(rand.nextInt(language_subtags.size()));
266                 if (USE_3066bis && rand.nextDouble() > 0.5) {
267                     result += "-" + script_subtags.get(rand.nextInt(script_subtags.size()));
268                 }
269                 if (rand.nextDouble() > 0.1) {
270                     result += "-" + region_subtags.get(rand.nextInt(region_subtags.size()));
271                 }
272                 testSet.add(result);
273             }
274         }
275         for (Iterator<String> it = testSet.iterator(); it.hasNext(); ) {
276             ULocale language = new ULocale(it.next());
277             System.out.println(language + " \t" + language.getDisplayName(desiredDisplayLocale));
278         }
279         return true;
280     }
281 
showCollator()282     private static void showCollator() throws Exception {
283         RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(new ULocale("zh"));
284         showExample(col);
285         String rules = col.getRules(false);
286         // System.out.println(com.ibm.icu.impl.Utility.escape(rules));
287         rules += "& \u93CA < A <<< a & \u7C3F < B <<< b";
288         RuleBasedCollator col2 = new RuleBasedCollator(rules);
289         showExample(col2);
290     }
291 
showExample(RuleBasedCollator col)292     private static void showExample(RuleBasedCollator col) {
293         String samples = "a A b B \u5416 \u93CA \u516b \u7C3F";
294         Set<String> s = new TreeSet<>(col);
295         s.addAll(Arrays.asList(samples.split(" ")));
296         System.out.println(com.ibm.icu.impl.Utility.escape(s.toString()));
297     }
298 
299     static Map<String, String> priorityMap = new TreeMap<>();
300 
addPriority(String priority, String code)301     static void addPriority(String priority, String code) {
302         if (code.length() == 0) return;
303         String oldPriority = priorityMap.get(code);
304         if (oldPriority == null || priority.compareTo(oldPriority) < 0)
305             priorityMap.put(code, priority);
306         System.out.println(code + ": " + priority);
307     }
308 
309     static class Totals {
310         int totalCount;
311         int missingCount;
312     }
313 
314     static Map<String, Totals> totalMap = new TreeMap<>();
315 
checkItems( PrintWriter pw, String sourceLocale, CLDRFile sourceData, int type, Set<String> targetItemSet)316     static void checkItems(
317             PrintWriter pw,
318             String sourceLocale,
319             CLDRFile sourceData,
320             int type,
321             Set<String> targetItemSet) {
322         for (Iterator<String> it2 = targetItemSet.iterator(); it2.hasNext(); ) {
323             String item = it2.next();
324             if (item.length() == 0) continue;
325             String key = priorityMap.get(sourceLocale) + "" + priorityMap.get(item);
326             Totals t = totalMap.get(key);
327             if (t == null) totalMap.put(key, t = new Totals());
328             t.totalCount++;
329             String translation = getItemName(sourceData, type, item);
330             String rootName = getItemName(root, type, item);
331             if (rootName.equals(translation)) {
332                 t.missingCount++;
333                 pw.println(
334                         priorityMap.get(sourceLocale)
335                                 + "\t"
336                                 + sourceLocale
337                                 + "\t("
338                                 + english.getName(sourceLocale)
339                                 + ": "
340                                 + sourceData.getName(sourceLocale)
341                                 + ")"
342                                 + "\t"
343                                 + priorityMap.get(item)
344                                 + "\t"
345                                 + item
346                                 + "\t("
347                                 + getItemName(english, type, item)
348                                 + ")");
349             }
350         }
351     }
352 
getItemName(CLDRFile data, String item)353     private static String getItemName(CLDRFile data, String item) {
354         return getItemName(data, getType(item), item);
355     }
356 
getType(String item)357     private static int getType(String item) {
358         int type = CLDRFile.LANGUAGE_NAME;
359         if (item.indexOf('/') >= 0) type = CLDRFile.TZ_EXEMPLAR; // America/Los_Angeles
360         else if (item.length() == 4) type = CLDRFile.SCRIPT_NAME; // Hant
361         else if (item.charAt(0) <= '9') type = CLDRFile.TERRITORY_NAME; // 001
362         else if (item.charAt(0) < 'a') {
363             if (item.length() == 3) type = CLDRFile.CURRENCY_NAME;
364             else type = CLDRFile.TERRITORY_NAME; // US or USD
365         }
366         return type;
367     }
368 
getTypeName(String item)369     private static String getTypeName(String item) {
370         switch (getType(item)) {
371             case CLDRFile.LANGUAGE_NAME:
372                 return "Lang";
373             case CLDRFile.TZ_EXEMPLAR:
374                 return "Zone";
375             case CLDRFile.SCRIPT_NAME:
376                 return "Script";
377             case CLDRFile.TERRITORY_NAME:
378                 return "Region";
379             case CLDRFile.CURRENCY_NAME:
380                 return "Curr.";
381         }
382         return "?";
383     }
384 
getItemName(CLDRFile data, int type, String item)385     private static String getItemName(CLDRFile data, int type, String item) {
386         String result;
387         if (type == CLDRFile.LANGUAGE_NAME) {
388             result = data.getName(item);
389         } else if (type != CLDRFile.TZ_EXEMPLAR) {
390             result = data.getName(type, item);
391         } else {
392             String prefix = "//ldml/dates/timeZoneNames/zone[@type=\"" + item + "\"]/exemplarCity";
393             result = data.getStringValue(prefix);
394         }
395         return result == null ? item : result;
396     }
397 
398     static Map<String, List<String>> territory_currency = null;
399 
getCurrency(String territory)400     private static List<String> getCurrency(String territory) {
401         if (territory_currency == null) {
402             territory_currency = new TreeMap<>();
403             Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
404             CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
405             for (String path : supp) {
406                 if (path.indexOf("/currencyData") >= 0) {
407                     // <region iso3166="AR">
408                     // <currency iso4217="ARS" from="1992-01-01"/>
409                     if (path.indexOf("/region") >= 0) {
410                         XPathParts parts = XPathParts.getFrozenInstance(supp.getFullXPath(path));
411                         Map<String, String> attributes = parts.getAttributes(parts.size() - 2);
412                         String iso3166 = attributes.get("iso3166");
413                         attributes = parts.getAttributes(parts.size() - 1);
414                         String iso4217 = attributes.get("iso4217");
415                         String to = attributes.get("to");
416                         if (to != null) {
417                             continue;
418                         }
419                         List<String> info = territory_currency.get(iso3166);
420                         if (info == null) {
421                             territory_currency.put(iso3166, info = new ArrayList<>());
422                         }
423                         info.add(iso4217);
424                     }
425                 }
426             }
427         }
428         return territory_currency.get(territory);
429     }
430 }
431