• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.ImmutableMap;
4 import com.ibm.icu.impl.Relation;
5 import com.ibm.icu.impl.Utility;
6 import com.ibm.icu.text.Collator;
7 import com.ibm.icu.text.DateFormat;
8 import com.ibm.icu.text.DecimalFormat;
9 import com.ibm.icu.text.NumberFormat;
10 import com.ibm.icu.text.SimpleDateFormat;
11 import com.ibm.icu.text.Transliterator;
12 import com.ibm.icu.util.ULocale;
13 import java.io.BufferedReader;
14 import java.io.IOException;
15 import java.io.PrintWriter;
16 import java.text.ParseException;
17 import java.util.Arrays;
18 import java.util.Collection;
19 import java.util.Comparator;
20 import java.util.Date;
21 import java.util.HashMap;
22 import java.util.Iterator;
23 import java.util.List;
24 import java.util.Locale;
25 import java.util.Map;
26 import java.util.Set;
27 import java.util.TreeMap;
28 import java.util.TreeSet;
29 import org.unicode.cldr.util.CLDRFile;
30 import org.unicode.cldr.util.CLDRPaths;
31 import org.unicode.cldr.util.CldrUtility;
32 import org.unicode.cldr.util.Factory;
33 import org.unicode.cldr.util.Iso639Data;
34 import org.unicode.cldr.util.Iso639Data.Scope;
35 import org.unicode.cldr.util.Iso639Data.Type;
36 import org.unicode.cldr.util.Log;
37 import org.unicode.cldr.util.StandardCodes;
38 import org.unicode.cldr.util.StandardCodes.LstrType;
39 import org.unicode.cldr.util.SupplementalDataInfo;
40 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
41 import org.unicode.cldr.util.Validity;
42 import org.unicode.cldr.util.Validity.Status;
43 import org.unicode.cldr.util.XPathParts;
44 
45 public class GenerateEnums {
46     private static final String CODE_INDENT = "  ";
47 
48     private static final String DATA_INDENT = "    ";
49 
50     private static final String LIST_INDENT = "              ";
51 
52     private StandardCodes sc = StandardCodes.make();
53 
54     private Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
55 
56     //    private Factory supplementalFactory = Factory.make(
57     //        CLDRPaths.SUPPLEMENTAL_DIRECTORY, ".*");
58 
59     private Set<String> cldrCodes = new TreeSet<>();
60 
61     // private Map enum_canonical = new TreeMap();
62     private Map<String, String> enum_alpha3 = new TreeMap<>();
63 
64     private Map<String, String> enum_UN = new TreeMap<>();
65 
66     // private Map enum_FIPS10 = new TreeMap();
67 
68     // private Map enum_TLD = new TreeMap();
69 
70     private CLDRFile english = factory.make("en", false);
71 
72     private CLDRFile supplementalMetadata = factory.make("supplementalMetadata", false);
73 
74     private CLDRFile supplementalData = factory.make("supplementalData", false);
75 
76     private Relation<String, String> unlimitedCurrencyCodes;
77 
78     private Set<String> scripts = new TreeSet<>();
79 
80     private Set<String> languages = new TreeSet<>();
81 
82     private final Set<String> ignoreMissingAlpha3 =
83             new TreeSet<>(Arrays.asList("EA", "EZ", "IC", "UN"));
84 
main(String[] args)85     public static void main(String[] args) throws IOException {
86         GenerateEnums gen = new GenerateEnums();
87         gen.showLanguageInfo();
88         gen.loadCLDRData();
89         gen.showCounts();
90         gen.showCurrencies();
91         gen.showLanguages();
92         gen.showScripts();
93         gen.showRegionCodeInfo();
94         System.out.println("DONE");
95     }
96 
showCounts()97     private void showCounts() {
98         System.out.format(
99                 "Language Subtags: %s" + CldrUtility.LINE_SEPARATOR,
100                 sc.getGoodAvailableCodes("language").size());
101         System.out.format(
102                 "Script Subtags: %s" + CldrUtility.LINE_SEPARATOR,
103                 sc.getGoodAvailableCodes("script").size());
104         System.out.format(
105                 "Territory Subtags: %s" + CldrUtility.LINE_SEPARATOR,
106                 sc.getGoodAvailableCodes("territory").size());
107     }
108 
showCurrencies()109     private void showCurrencies() throws IOException {
110         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/currency_enum.txt");
111         Log.println();
112         Log.println("Currency Data");
113         Log.println();
114         showGeneratedCommentStart(CODE_INDENT);
115         compareSets(
116                 "currencies from sup.data", currencyCodes, "valid currencies", validCurrencyCodes);
117         Set<String> unused = new TreeSet<>(validCurrencyCodes);
118         unused.removeAll(currencyCodes);
119         showCurrencies(currencyCodes);
120         Log.println();
121         showCurrencies(unused);
122         Map<String, String> sorted = new TreeMap<>(Collator.getInstance(ULocale.ENGLISH));
123         for (String code : validCurrencyCodes) {
124             if (unused.contains(code) && !code.equals("CLF"))
125                 continue; // we include CLF for compatibility
126             sorted.put(getName(code), code);
127         }
128         int lineLength =
129                 "  /** Belgian Franc */                                            BEF,".length();
130         for (String name : sorted.keySet()) {
131             printRow(Log.getLog(), sorted.get(name), name, "currency", null, lineLength);
132         }
133         showGeneratedCommentEnd(CODE_INDENT);
134         Log.close();
135     }
136 
getName(String code)137     private String getName(String code) {
138         String result = english.getName(CLDRFile.CURRENCY_NAME, code);
139         if (result == null) {
140             result = code;
141             System.out.println("Failed to find: " + code);
142         }
143         return result;
144     }
145 
showCurrencies(Set<String> both)146     private void showCurrencies(Set<String> both) {
147         // /** Afghani */ AFN,
148         for (Iterator<String> it = both.iterator(); it.hasNext(); ) {
149             String code = it.next();
150             String englishName = getName(code);
151             if (englishName == null) {}
152             Set<String> regions = unlimitedCurrencyCodes.getAll(code);
153             System.out.println(
154                     code
155                             + "\t"
156                             + englishName
157                             + "\t"
158                             + (validCurrencyCodes.contains(code)
159                                     ? currencyCodes.contains(code) ? "" : "valid-only"
160                                     : "supp-only")
161                             + "\t"
162                             + (regions != null ? regions : "unused"));
163         }
164     }
165 
showScripts()166     private void showScripts() throws IOException {
167         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/script_enum.txt");
168         Log.println();
169         Log.println("Script Data");
170         Log.println();
171 
172         showGeneratedCommentStart(CODE_INDENT);
173         Map<String, String> code_replacements = new TreeMap<>();
174         int len = "  /** Arabic */                                        Arab,".length();
175         for (Iterator<String> it = scripts.iterator(); it.hasNext(); ) {
176             String code = it.next();
177             String englishName = english.getName(CLDRFile.SCRIPT_NAME, code);
178             if (englishName == null) continue;
179             printRow(Log.getLog(), code, null, "script", code_replacements, len);
180             // Log.println(" /**" + englishName + "*/ " + code + ",");
181         }
182         showGeneratedCommentEnd(CODE_INDENT);
183         Log.close();
184     }
185 
showLanguageInfo()186     private void showLanguageInfo() throws IOException {
187         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_info.txt");
188         System.out.println();
189         System.out.println("Language Converter");
190         System.out.println();
191         StringBuilder buffer = new StringBuilder();
192         // language information
193         for (String language : sc.getAvailableCodes("language")) {
194             Scope scope = Iso639Data.getScope(language);
195             if (scope == Scope.PrivateUse) {
196                 continue;
197             }
198             buffer.setLength(0);
199             String alpha3 = Iso639Data.toAlpha3(language);
200             if (alpha3 != null) {
201                 buffer.append(".add(\"" + alpha3 + "\")");
202             }
203             Type type = Iso639Data.getType(language);
204             if (type != Type.Living) {
205                 buffer.append(".add(Type." + type + ")");
206             }
207             if (scope != Scope.Individual) {
208                 buffer.append(".add(Scope." + scope + ")");
209             }
210             if (buffer.length() > 0) {
211                 Log.println("\t\tto(\"" + language + "\")" + buffer + ";");
212             }
213         }
214         Log.close();
215     }
216 
showLanguages()217     private void showLanguages() throws IOException {
218         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_enum.txt");
219         System.out.println();
220         System.out.println("Language Data");
221         System.out.println();
222 
223         for (Iterator<String> it = languages.iterator(); it.hasNext(); ) {
224             String code = it.next();
225             String englishName = english.getName(CLDRFile.LANGUAGE_NAME, code);
226             if (englishName == null) continue;
227             System.out.println("     /**" + englishName + "*/    " + code + ",");
228         }
229 
230         showGeneratedCommentStart(LIST_INDENT);
231         /*
232          * get the form: "anp frr frs gsw krl zxx aa ab ace ach ada ady ae af afa
233          * afh" + " ain ak akk ale alg alt am an ang apa ar arc arn arp art arw" + "
234          * as ast ath aus av awa ay az ba bad bai bal ban bas bat be"
235          */
236         StringBuffer buffer = new StringBuffer();
237         int lineLimit = 70 - LIST_INDENT.length();
238         char lastChar = 0;
239         for (Iterator<String> it = languages.iterator(); it.hasNext(); ) {
240             String code = it.next();
241             if (code.equals("root")) {
242                 continue;
243             }
244             if (code.charAt(0) != lastChar || buffer.length() + 1 + code.length() > lineLimit) {
245                 if (buffer.length() != 0) Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
246                 buffer.setLength(0);
247                 lastChar = code.charAt(0);
248             }
249             buffer.append(code).append(' ');
250         }
251         // remove the very last space
252         if (buffer.charAt(buffer.length() - 1) == ' ') {
253             buffer.setLength(buffer.length() - 1);
254         }
255         Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
256 
257         showGeneratedCommentEnd(LIST_INDENT);
258         Log.close();
259     }
260 
261     @SuppressWarnings("rawtypes")
join(Collection collection, String separator)262     private Object join(Collection collection, String separator) {
263         if (collection == null) return null;
264         StringBuffer result = new StringBuffer();
265         boolean first = true;
266         for (Iterator it = collection.iterator(); it.hasNext(); ) {
267             if (first) first = false;
268             else result.append(separator);
269             result.append(it.next());
270         }
271         return result.toString();
272     }
273 
274     static NumberFormat threeDigit = new DecimalFormat("000");
275 
loadCLDRData()276     public void loadCLDRData() throws IOException {
277         // BufferedReader codes = Utility.getUTF8Data("territory_codes.txt");
278         // while (true) {
279         // String line = codes.readLine();
280         // if (line == null)
281         // break;
282         // line = line.split("#")[0].trim();
283         // if (line.length() == 0)
284         // continue;
285         // String[] sourceValues = line.split("\\s+");
286         // String[] values = new String[5];
287         // for (int i = 0; i < values.length; ++i) {
288         // if (i >= sourceValues.length || sourceValues[i].equals("-"))
289         // values[i] = null;
290         // else
291         // values[i] = sourceValues[i];
292         // }
293         // String alpha2 = values[0];
294         // cldrCodes.add(alpha2);
295         // if (isPrivateUseRegion(alpha2))
296         // continue;
297         // String numeric = values[1];
298         // String alpha3 = values[2];
299         // String internet = values[3];
300         // if (internet != null)
301         // internet = internet.toUpperCase();
302         // String fips10 = values[4];
303         // String enumValue = enumName(alpha2);
304         // enum_alpha3.put(enumValue, alpha3);
305         // enum_UN.put(enumValue, numeric);
306         // enum_FIPS10.put(enumValue, fips10);
307         // enum_TLD.put(enumValue, internet);
308         // }
309         // codes.close();
310         DecimalFormat threeDigits = new DecimalFormat("000");
311         for (String value : supplementalDataInfo.getNumericTerritoryMapping().keySet()) {
312             cldrCodes.add(value);
313             if (isPrivateUseRegion(value)) continue;
314             enum_UN.put(
315                     value,
316                     threeDigits.format(
317                             supplementalDataInfo
318                                     .getNumericTerritoryMapping()
319                                     .getAll(value)
320                                     .iterator()
321                                     .next()));
322         }
323         for (String value : supplementalDataInfo.getAlpha3TerritoryMapping().keySet()) {
324             cldrCodes.add(value);
325             if (isPrivateUseRegion(value)) continue;
326             enum_alpha3.put(
327                     value,
328                     supplementalDataInfo
329                             .getAlpha3TerritoryMapping()
330                             .getAll(value)
331                             .iterator()
332                             .next());
333         }
334 
335         BufferedReader codes = CldrUtility.getUTF8Data("UnMacroRegions.txt");
336         Map<String, String> macro_name = new TreeMap<>();
337         while (true) {
338             String line = codes.readLine();
339             if (line == null) break;
340             line = line.trim();
341             if (line.length() == 0) continue;
342             if (line.charAt(0) < '0' || line.charAt(0) > '9') {
343                 System.out.println("GenerateEnums: Skipping: " + line);
344                 continue;
345             }
346             String[] sourceValues = line.split("\\s+");
347             int code = Integer.parseInt(sourceValues[0]);
348             String codeName = threeDigit.format(code);
349             macro_name.put(codeName, line);
350         }
351         codes.close();
352         //        String values =
353         // supplementalDataInfo.getValidityInfo().get("$territory").get1().trim();
354         Map<Status, Set<String>> validRegions =
355                 Validity.getInstance().getStatusToCodes(LstrType.region);
356         Set<String> regions = new TreeSet<>();
357         regions.addAll(validRegions.get(Status.regular));
358         regions.addAll(validRegions.get(Status.macroregion));
359         //        String[] validTerritories = values.split("\\s+");
360         //        for (int i = 0; i < validTerritories.length; ++i) {
361         for (String region : regions) {
362             if (corrigendum.contains(region)) {
363                 System.out.println("Skipping " + region + "\t\t" + getEnglishName(region));
364                 continue; // exception, corrigendum
365             }
366             if (isPrivateUseRegion(region)) continue;
367             if (region.charAt(0) < 'A') { // numeric
368                 enum_UN.put(enumName(region), region);
369                 cldrCodes.add(region);
370             } else {
371                 if (enum_alpha3.get(region) == null && !ignoreMissingAlpha3.contains(region)) {
372                     System.out.println("Missing alpha3 for: " + region);
373                 }
374             }
375         }
376         checkDuplicates(enum_UN);
377         checkDuplicates(enum_alpha3);
378         Set<String> availableCodes = new TreeSet<>(sc.getAvailableCodes("territory"));
379         compareSets("RFC 4646", availableCodes, "CLDR", cldrCodes);
380         Set<String> missing = new TreeSet<>(availableCodes);
381         missing.removeAll(cldrCodes);
382         // don't care list: "003"
383         // missing.remove("003");
384         // missing.remove("172");
385         // Remove the following. They don't have numeric or alpha3 codes so they can't be found.
386         missing.remove("EA");
387         missing.remove("EZ");
388         missing.remove("IC");
389         missing.remove("QU");
390         missing.remove("UN");
391         missing.remove("CQ");
392 
393         if (missing.size() != 0) {
394             throw new IllegalArgumentException("Codes in Registry but not in CLDR: " + missing);
395         }
396 
397         Set<String> UNValues = new TreeSet<>(enum_UN.values());
398 
399         for (Iterator<String> it = macro_name.keySet().iterator(); it.hasNext(); ) {
400             Object key = it.next();
401             Object value = macro_name.get(key);
402             if (!UNValues.contains(key)) {
403                 System.out.println("Macro " + key + "\t" + value);
404             }
405         }
406 
407         for (Iterator<String> it = enum_UN.keySet().iterator(); it.hasNext(); ) {
408             String region = it.next();
409             String englishName = getEnglishName(region);
410             if (englishName == null) {
411                 englishName = "NULL"; // for debugging\
412             }
413             String rfcName = getRFC3066Name(region);
414             if (!englishName.equals(rfcName)) {
415                 System.out.println(
416                         "Different names: {\""
417                                 + region
418                                 + "\",\t\""
419                                 + englishName
420                                 + " ("
421                                 + rfcName
422                                 + ")\"},");
423             }
424         }
425 
426         getContainment();
427 
428         DateFormat[] simpleFormats = {
429             new SimpleDateFormat("yyyy-MM-dd"),
430             new SimpleDateFormat("yyyy-MM"),
431             new SimpleDateFormat("yyyy"),
432         };
433         Date today = new Date();
434         Date longAgo = new Date(1000 - 1900, 1, 1);
435         currencyCodes = new TreeSet<>();
436         unlimitedCurrencyCodes =
437                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null);
438         for (Iterator<String> it =
439                         supplementalData.iterator("//supplementalData/currencyData/region");
440                 it.hasNext(); ) {
441             String path = it.next();
442             XPathParts parts = XPathParts.getFrozenInstance(path);
443             String region = parts.findAttributeValue("region", "iso3166");
444             String code = parts.findAttributeValue("currency", "iso4217");
445             String to = parts.findAttributeValue("currency", "to");
446             main:
447             if (to == null) {
448                 unlimitedCurrencyCodes.put(code, region);
449             } else {
450                 for (int i = 0; i < simpleFormats.length; ++i) {
451                     try {
452                         Date foo = simpleFormats[i].parse(to);
453                         if (foo.compareTo(longAgo) < 0) {
454                             System.out.println("Date Error: can't parse " + to);
455                             break main;
456                         } else if (foo.compareTo(today) >= 0) {
457                             unlimitedCurrencyCodes.put(code, region);
458                         }
459                         break main;
460                     } catch (ParseException e) {
461                     }
462                 }
463                 System.out.println("Date Error: can't parse " + to);
464             }
465             currencyCodes.add(code);
466         }
467 
468         validCurrencyCodes = new TreeSet<>();
469         Set<String> bcp47CurrencyCodes = supplementalDataInfo.getBcp47Keys().getAll("cu");
470         for (String code : bcp47CurrencyCodes) {
471             validCurrencyCodes.add(code.toUpperCase());
472         }
473 
474         scripts = supplementalDataInfo.getCLDRScriptCodes();
475         languages = supplementalDataInfo.getCLDRLanguageCodes();
476 
477         // Set availableCodes = new TreeSet(sc.getAvailableCodes("territory"));
478         // availableCodes.add("003");
479         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
480         // String code = (String) next())
481         // canonicalRegion_UN.put(alpha2, numeric);
482         // }
483 
484         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
485         // String code = (String)it.next();
486         // RegionCode region = map_id_canonical_RFC.get(code);
487         // if (region != null) continue; // skip others
488         // region = new RegionCode(code);
489         // map_id_canonical_RFC.put(code,region);
490         // map_canonical_id_RFC.put(region,code);
491         // if ("A".compareTo(code) > 0) {
492         // map_id_canonical_UN.put(code,region);
493         // map_canonical_id_UN.put(region,code);
494         // } else {
495         // map_id_canonical_A2.put(code,region);
496         // map_canonical_id_A2.put(region,code);
497         // }
498         // }
499         // for (Iterator it = goodAvailableCodes.iterator(); it.hasNext();) {
500         // String code = (String)it.next();
501         // good.add(getInstance(code));
502         // }
503     }
504 
getContainment()505     public void getContainment() {
506         // <group type="001" contains="002 009 019 142 150"/> <!--World -->
507         for (Iterator<String> it =
508                         supplementalData.iterator("//supplementalData/territoryContainment/group");
509                 it.hasNext(); ) {
510             String path = it.next();
511             String fullPath = supplementalData.getFullXPath(path);
512             XPathParts parts = XPathParts.getFrozenInstance(fullPath);
513             String container = parts.getAttributeValue(parts.size() - 1, "type");
514             final String containedString = parts.getAttributeValue(-1, "contains");
515             List<String> contained = Arrays.asList(containedString.trim().split("\\s+"));
516             containment.put(container, contained);
517         }
518         // fix recursiveContainment.
519         // for (String region : (Collection<String>)containment.keySet()) {
520         // Set temp = new LinkedHashSet();
521         // addContains(region, temp);
522         // recursiveContainment.put(region, temp);
523         // }
524         Set<String> startingFromWorld = new TreeSet<>();
525         addContains("001", startingFromWorld);
526         compareSets("World", startingFromWorld, "CLDR", cldrCodes);
527         // generateContains();
528     }
529 
generateContains()530     private void generateContains() {
531 
532         for (String region : containment.keySet()) {
533             List<String> plain = containment.get(region);
534             // Collection recursive = (Collection)recursiveContainment.get(region);
535 
536             String setAsString = CldrUtility.join(plain, " ");
537             // String setAsString2 = recursive.equals(plain) ? "" : ", " +
538             // Utility.join(recursive," ");
539             Log.println("\t\tadd(\"" + region + "\", \"" + setAsString + "\");");
540         }
541     }
542 
543     Map<String, List<String>> containment = new TreeMap<>();
544 
545     // Map recursiveContainment = new TreeMap();
546 
addContains(String string, Set<String> startingFromWorld)547     private void addContains(String string, Set<String> startingFromWorld) {
548         startingFromWorld.add(string);
549         List<String> contained = containment.get(string);
550         if (contained == null) return;
551         for (Iterator<String> it = contained.iterator(); it.hasNext(); ) {
552             addContains(it.next(), startingFromWorld);
553         }
554     }
555 
556     @SuppressWarnings("rawtypes")
compareSets(String name, Set availableCodes, String name2, Set cldrCodes)557     private void compareSets(String name, Set availableCodes, String name2, Set cldrCodes) {
558         Set temp = new TreeSet();
559         temp.addAll(availableCodes);
560         temp.removeAll(cldrCodes);
561         System.out.println("In " + name + " but not in " + name2 + ": " + temp);
562         temp.clear();
563         temp.addAll(cldrCodes);
564         temp.removeAll(availableCodes);
565         System.out.println("Not in " + name + " but in " + name2 + ": " + temp);
566     }
567 
568     @SuppressWarnings("rawtypes")
checkDuplicates(Map m)569     private void checkDuplicates(Map m) {
570         Map backMap = new HashMap();
571         for (Iterator it = m.keySet().iterator(); it.hasNext(); ) {
572             Object key = it.next();
573             Object o = m.get(key);
574             Object otherKey = backMap.get(o);
575             if (otherKey != null)
576                 System.out.println("Collision with: " + key + ",\t" + otherKey + ",\t" + o);
577             else backMap.put(o, key);
578         }
579     }
580 
581     Set<String> corrigendum =
582             new TreeSet<>(Arrays.asList(new String[] {"QE", "833", "830", "172"})); // 003, 419
583 
584     private ImmutableMap<String, String> extraNames =
585             ImmutableMap.<String, String>builder()
586                     .put("BU", "Burma")
587                     .put("TP", "East Timor")
588                     .put("YU", "Yugoslavia")
589                     .put("ZR", "Zaire")
590                     .put("CD", "Congo (Kinshasa, Democratic Republic)")
591                     .put("CI", "Ivory Coast (Cote d'Ivoire)")
592                     .put("FM", "Micronesia (Federated States)")
593                     .put("TL", "East Timor (Timor-Leste)")
594                     // .put("155", "Western Europe")
595                     .build();
596 
597     private Set<String> currencyCodes;
598 
599     private Set<String> validCurrencyCodes;
600 
601     static SupplementalDataInfo supplementalDataInfo =
602             SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
603 
604     /**
605      * Get the RegionCode Enum
606      *
607      * @throws IOException
608      */
showRegionCodeInfo()609     private void showRegionCodeInfo() throws IOException {
610         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_enum.txt");
611         System.out.println();
612         System.out.println("Data for RegionCode");
613         System.out.println();
614         showGeneratedCommentStart(CODE_INDENT);
615 
616         Set<String> reordered = new TreeSet<>(new LengthFirstComparator());
617         reordered.addAll(enum_UN.keySet());
618         Map<String, String> code_replacements = new TreeMap<>();
619         int len = "  /** Polynesia */                                    UN061,".length();
620         for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) {
621             String region = it.next();
622             printRow(Log.getLog(), region, null, "territory", code_replacements, len);
623         }
624         showGeneratedCommentEnd(CODE_INDENT);
625         Log.close();
626 
627         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_info.txt");
628         Log.println();
629         Log.println("Data for ISO Region Codes");
630         Log.println();
631         for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) {
632             if (territory.equals("ZZ")) {
633                 continue;
634             }
635             PopulationData popData = supplementalDataInfo.getPopulationDataForTerritory(territory);
636             // to("ak").add(Scope.Macrolanguage).add("aka");
637             Log.formatln(
638                     "    addRegion(RegionCode.%s, %s, %s, %s) // %s",
639                     territory,
640                     format(popData.getPopulation()),
641                     format(popData.getLiteratePopulation() / popData.getPopulation()),
642                     format(popData.getGdp()),
643                     english.getName("territory", territory));
644             // remove all the ISO 639-3 until they are part of BCP 47
645             // we need to remove in earlier pass so we have the count
646             Set<String> languages = new TreeSet<>();
647             for (String language :
648                     supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territory)) {
649                 if (Iso639Data.getSource(language) == Iso639Data.Source.ISO_639_3) {
650                     continue;
651                 }
652                 popData =
653                         supplementalDataInfo.getLanguageAndTerritoryPopulationData(
654                                 language, territory);
655                 if (popData.getPopulation() == 0
656                         || Double.isNaN(
657                                 popData.getLiteratePopulation() / popData.getPopulation())) {
658                     continue;
659                 }
660                 languages.add(language);
661             }
662             int count = languages.size();
663             for (String language : languages) {
664                 --count; // we need to know the last one
665                 popData =
666                         supplementalDataInfo.getLanguageAndTerritoryPopulationData(
667                                 language, territory);
668                 Log.formatln(
669                         "    .addLanguage(\"%s\", %s, %s)%s // %s",
670                         language,
671                         format(popData.getPopulation()),
672                         format(popData.getLiteratePopulation() / popData.getPopulation()),
673                         (count == 0 ? ";" : ""),
674                         english.getName(language));
675             }
676         }
677         Log.close();
678 
679         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_converters.txt");
680         Log.println();
681         Log.println("Data for ISO Region Codes");
682         Log.println();
683         showGeneratedCommentStart(DATA_INDENT);
684         // addInfo(RegionCode.US, 840, "USA", "US", "US/XX", ....); ... are
685         // containees
686         reordered = new TreeSet<>(new DeprecatedAndLengthFirstComparator("territory"));
687         reordered.addAll(enum_UN.keySet());
688         for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) {
689             String region = it.next();
690             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
691             // UN
692             // name
693             // int un = Integer.parseInt((String) enum_UN.get(region)); // get around
694             // dumb octal
695             // syntax
696             String isoCode = enum_alpha3.get(region);
697             if (isoCode == null) continue;
698             Log.println(
699                     DATA_INDENT + "add(" + quote(isoCode) + ", " + "RegionCode." + region + ");");
700         }
701         doAliases(code_replacements);
702         showGeneratedCommentEnd(DATA_INDENT);
703         Log.println();
704         Log.println("Data for M.49 Region Codes");
705         Log.println();
706         showGeneratedCommentStart(DATA_INDENT);
707 
708         for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) {
709             String region = it.next();
710             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
711             // UN
712             // name
713             int un = Integer.parseInt(enum_UN.get(region), 10); // get
714             // around
715             // dumb
716             // octal
717             // syntax
718             Log.println(DATA_INDENT + "add(" + un + ", " + "RegionCode." + region + ");");
719         }
720         doAliases(code_replacements);
721 
722         System.out.println("Plain list");
723         for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) {
724             String region = it.next();
725             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
726             // UN
727             // name
728             String newCode = code_replacements.get(region);
729             if (newCode != null) continue;
730 
731             int un = Integer.parseInt(enum_UN.get(region), 10); // get
732             // around
733             // dumb
734             // octal
735             // syntax
736             System.out.println(un + "\t" + region + "\t" + english.getName("territory", region));
737         }
738 
739         showGeneratedCommentEnd(DATA_INDENT);
740 
741         getContainment();
742         Log.close();
743     }
744 
745     static NumberFormat nf = NumberFormat.getInstance(Locale.ENGLISH);
746 
747     static NumberFormat sf = NumberFormat.getScientificInstance(Locale.ENGLISH);
748 
749     static {
750         nf.setMaximumFractionDigits(3);
751         sf.setMaximumFractionDigits(3);
752         nf.setGroupingUsed(false);
753     }
754 
format(double value)755     private String format(double value) {
756         double newValue = CldrUtility.roundToDecimals(value, 3);
757         String option1 = nf.format(newValue);
758         String option2 = sf.format(value);
759         return option1.length() <= option2.length() ? option1 : option2;
760     }
761 
doAliases(Map<String, String> code_replacements)762     private void doAliases(Map<String, String> code_replacements) {
763         for (String code : code_replacements.keySet()) {
764             String newCode = code_replacements.get(code);
765             if (newCode.length() == 0) newCode = "ZZ";
766             Log.println(
767                     DATA_INDENT + "addAlias(" + "RegionCode." + code + ", \"" + newCode + "\");");
768         }
769     }
770 
showGeneratedCommentEnd(String indent)771     private void showGeneratedCommentEnd(String indent) {
772         Log.println(indent + "/* End of generated code. */");
773     }
774 
showGeneratedCommentStart(String indent)775     private void showGeneratedCommentStart(String indent) {
776         Log.println(indent + "/*");
777         Log.println(indent + " * The following information is generated from a tool,");
778         Log.println(indent + " * as described on");
779         Log.println(indent + " * http://wiki/Main/InternationalIdentifierUpdates.");
780         Log.println(indent + " * Do not edit manually.");
781         Log.println(indent + " * Start of generated code.");
782         Log.println(indent + " */");
783     }
784 
785     public static final class LengthFirstComparator implements Comparator<Object> {
786         @Override
compare(Object a, Object b)787         public int compare(Object a, Object b) {
788             String as = a.toString();
789             String bs = b.toString();
790             if (as.length() < bs.length()) return -1;
791             if (as.length() > bs.length()) return 1;
792             return as.compareTo(bs);
793         }
794     }
795 
796     public final class DeprecatedAndLengthFirstComparator implements Comparator<Object> {
797         String type;
798 
DeprecatedAndLengthFirstComparator(String type)799         DeprecatedAndLengthFirstComparator(String type) {
800             this.type = type;
801         }
802 
803         @Override
compare(Object a, Object b)804         public int compare(Object a, Object b) {
805             String as = a.toString();
806             String bs = b.toString();
807             String ar = getDeprecatedReplacement(type, as);
808             String br = getDeprecatedReplacement(type, bs);
809             // put the deprecated ones first, eg those that aren't null
810             if (ar != null) {
811                 if (br == null) return -1;
812             }
813             if (br != null) {
814                 if (ar == null) return 1;
815             }
816             // now check the length
817             if (as.length() < bs.length()) return -1;
818             if (as.length() > bs.length()) return 1;
819             return as.compareTo(bs);
820         }
821     }
822 
823     /**
824      * Returns null if not deprecated, otherwise "" if there is no replacement, otherwise the
825      * replacement.
826      *
827      * @return
828      */
getDeprecatedReplacement(String type, String cldrTypeValue)829     public String getDeprecatedReplacement(String type, String cldrTypeValue) {
830         if (type.equals("currency")) {
831             return null;
832         }
833         String path =
834                 supplementalMetadata.getFullXPath(
835                         "//supplementalData/metadata/alias/"
836                                 + type
837                                 + "Alias[@type=\""
838                                 + cldrTypeValue
839                                 + "\"]",
840                         true);
841         if (path == null) {
842             return null;
843         }
844         XPathParts parts = XPathParts.getFrozenInstance(path);
845         String replacement = parts.findAttributeValue("territoryAlias", "replacement");
846         if (replacement == null) {
847             return "";
848         }
849         return replacement;
850     }
851 
852     static Transliterator doFallbacks =
853             Transliterator.createFromRules("id", "[’ʻ] > ''; ", Transliterator.FORWARD);
854 
printRow( PrintWriter out, String codeName, String englishName, String type, Map<String, String> code_replacements, int lineLength)855     private void printRow(
856             PrintWriter out,
857             String codeName,
858             String englishName,
859             String type,
860             Map<String, String> code_replacements,
861             int lineLength) {
862         // int numeric = Integer.parseInt((String) enum_UN.get(codeName));
863         // String alpha3 = (String) enum_alpha3.get(codeName);
864         String cldrName = codeName.length() < 5 ? codeName : codeName.substring(2); // fix
865         // UN
866         // name
867         String replacement = getDeprecatedReplacement(type, cldrName);
868 
869         String resolvedEnglishName =
870                 englishName != null
871                         ? englishName
872                         : type.equals("territory")
873                                 ? getEnglishName(codeName)
874                                 : type.equals("currency")
875                                         ? getName(codeName)
876                                         : english.getName(CLDRFile.SCRIPT_NAME, codeName);
877         resolvedEnglishName = doFallbacks.transliterate(resolvedEnglishName);
878 
879         String prefix = CODE_INDENT + "/** " + resolvedEnglishName; // + " - " +
880         // threeDigit.format(numeric);
881         String printedCodeName = codeName;
882         if (replacement != null) {
883             code_replacements.put(codeName, replacement);
884             out.println(prefix);
885             prefix =
886                     CODE_INDENT
887                             + " * @deprecated"
888                             + (replacement.length() == 0 ? "" : " see " + replacement);
889             printedCodeName = "@Deprecated " + printedCodeName;
890         }
891         prefix += " */";
892 
893         if (codeName.equals("UN001")) {
894             out.println();
895         }
896         if (prefix.length() > lineLength - (printedCodeName.length() + 1)) {
897             // break at last space
898             int lastFit = prefix.lastIndexOf(' ', lineLength - (printedCodeName.length() + 1) - 2);
899             out.println(prefix.substring(0, lastFit));
900             prefix = CODE_INDENT + " *" + prefix.substring(lastFit);
901         }
902         out.print(prefix);
903         out.print(
904                 Utility.repeat(
905                         " ", (lineLength - (prefix.length() + printedCodeName.length() + 1))));
906         out.println(printedCodeName + ",");
907     }
908 
getEnglishName(String codeName)909     private String getEnglishName(String codeName) {
910         if (codeName.length() > 3) codeName = codeName.substring(2); // fix UN name
911         String name = extraNames.get(codeName);
912         if (name != null) return name;
913         name = english.getName(CLDRFile.TERRITORY_NAME, codeName);
914         if (name != null) return name;
915         return codeName;
916     }
917 
getRFC3066Name(String codeName)918     private String getRFC3066Name(String codeName) {
919         if (codeName.length() > 2) codeName = codeName.substring(2); // fix UN name
920         List<String> list = sc.getFullData("territory", codeName);
921         if (list == null) return null;
922         return list.get(0);
923     }
924 
enumName(String codeName)925     private String enumName(String codeName) {
926         return codeName.charAt(0) < 'A' ? "UN" + codeName : codeName;
927     }
928 
quote(Object input)929     static String quote(Object input) {
930         if (input != null) return '"' + input.toString().trim() + '"';
931         return null;
932     }
933 
isPrivateUseRegion(String codeName)934     static boolean isPrivateUseRegion(String codeName) {
935         // AA, QM..QZ, XA..XZ, ZZ - CLDR codes
936         if (codeName.equals("EU") || codeName.equals("QO") || codeName.equals("ZZ")) {
937             return false;
938         } else if (codeName.equals("AA") || codeName.equals("ZZ")) {
939             return true;
940         } else if (codeName.compareTo("QM") >= 0 && codeName.compareTo("QZ") <= 0) {
941             return true;
942         } else if (codeName.compareTo("XA") >= 0 && codeName.compareTo("XZ") <= 0) {
943             return true;
944         }
945         return false;
946     }
947     /*
948      * <reset before="tertiary">ウ</reset> <x><context>ウ</context><t>ヽ</t></x>
949      * <x><context>ウ</context><i>ヽ</i></x>
950      *
951      * <x><context>う</context><i>ゝ</i></x> <x><context>ゥ</context><i>ヽ</i></x>
952      * <x><context>ゥ</context><i>ヽ</i></x> <x><context>ぅ</context><i>ゝ</i></x>
953      * <x><context>ヴ</context><i>ヽ</i></x>
954      *
955      * <x><context>ゔ</context><i>ゝ</i></x> <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x>
956      * <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> <x><context>う</context><i>ゞ</i><extend>゙</extend></x>
957      *
958      * <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x>
959      * <x><context>ぅ</context><i>ゞ</i><extend>゙</extend></x> <x><context>ヴ</context><i>ヾ</i><extend>゙</extend></x>
960      *
961      * <x><context>ゔ</context><i>ゞ</i><extend>゙</extend></x>
962      */
963 }
964