1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.ImmutableMap; 4 import com.ibm.icu.impl.Relation; 5 import com.ibm.icu.impl.Utility; 6 import com.ibm.icu.text.Collator; 7 import com.ibm.icu.text.DateFormat; 8 import com.ibm.icu.text.DecimalFormat; 9 import com.ibm.icu.text.NumberFormat; 10 import com.ibm.icu.text.SimpleDateFormat; 11 import com.ibm.icu.text.Transliterator; 12 import com.ibm.icu.util.ULocale; 13 import java.io.BufferedReader; 14 import java.io.IOException; 15 import java.io.PrintWriter; 16 import java.text.ParseException; 17 import java.util.Arrays; 18 import java.util.Collection; 19 import java.util.Comparator; 20 import java.util.Date; 21 import java.util.HashMap; 22 import java.util.Iterator; 23 import java.util.List; 24 import java.util.Locale; 25 import java.util.Map; 26 import java.util.Set; 27 import java.util.TreeMap; 28 import java.util.TreeSet; 29 import org.unicode.cldr.util.CLDRFile; 30 import org.unicode.cldr.util.CLDRPaths; 31 import org.unicode.cldr.util.CldrUtility; 32 import org.unicode.cldr.util.Factory; 33 import org.unicode.cldr.util.Iso639Data; 34 import org.unicode.cldr.util.Iso639Data.Scope; 35 import org.unicode.cldr.util.Iso639Data.Type; 36 import org.unicode.cldr.util.Log; 37 import org.unicode.cldr.util.StandardCodes; 38 import org.unicode.cldr.util.StandardCodes.LstrType; 39 import org.unicode.cldr.util.SupplementalDataInfo; 40 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 41 import org.unicode.cldr.util.Validity; 42 import org.unicode.cldr.util.Validity.Status; 43 import org.unicode.cldr.util.XPathParts; 44 45 public class GenerateEnums { 46 private static final String CODE_INDENT = " "; 47 48 private static final String DATA_INDENT = " "; 49 50 private static final String LIST_INDENT = " "; 51 52 private StandardCodes sc = StandardCodes.make(); 53 54 private Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 55 56 // private Factory supplementalFactory = Factory.make( 57 // CLDRPaths.SUPPLEMENTAL_DIRECTORY, ".*"); 58 59 private Set<String> cldrCodes = new TreeSet<>(); 60 61 // private Map enum_canonical = new TreeMap(); 62 private Map<String, String> enum_alpha3 = new TreeMap<>(); 63 64 private Map<String, String> enum_UN = new TreeMap<>(); 65 66 // private Map enum_FIPS10 = new TreeMap(); 67 68 // private Map enum_TLD = new TreeMap(); 69 70 private CLDRFile english = factory.make("en", false); 71 72 private CLDRFile supplementalMetadata = factory.make("supplementalMetadata", false); 73 74 private CLDRFile supplementalData = factory.make("supplementalData", false); 75 76 private Relation<String, String> unlimitedCurrencyCodes; 77 78 private Set<String> scripts = new TreeSet<>(); 79 80 private Set<String> languages = new TreeSet<>(); 81 82 private final Set<String> ignoreMissingAlpha3 = 83 new TreeSet<>(Arrays.asList("EA", "EZ", "IC", "UN")); 84 main(String[] args)85 public static void main(String[] args) throws IOException { 86 GenerateEnums gen = new GenerateEnums(); 87 gen.showLanguageInfo(); 88 gen.loadCLDRData(); 89 gen.showCounts(); 90 gen.showCurrencies(); 91 gen.showLanguages(); 92 gen.showScripts(); 93 gen.showRegionCodeInfo(); 94 System.out.println("DONE"); 95 } 96 showCounts()97 private void showCounts() { 98 System.out.format( 99 "Language Subtags: %s" + CldrUtility.LINE_SEPARATOR, 100 sc.getGoodAvailableCodes("language").size()); 101 System.out.format( 102 "Script Subtags: %s" + CldrUtility.LINE_SEPARATOR, 103 sc.getGoodAvailableCodes("script").size()); 104 System.out.format( 105 "Territory Subtags: %s" + CldrUtility.LINE_SEPARATOR, 106 sc.getGoodAvailableCodes("territory").size()); 107 } 108 showCurrencies()109 private void showCurrencies() throws IOException { 110 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/currency_enum.txt"); 111 Log.println(); 112 Log.println("Currency Data"); 113 Log.println(); 114 showGeneratedCommentStart(CODE_INDENT); 115 compareSets( 116 "currencies from sup.data", currencyCodes, "valid currencies", validCurrencyCodes); 117 Set<String> unused = new TreeSet<>(validCurrencyCodes); 118 unused.removeAll(currencyCodes); 119 showCurrencies(currencyCodes); 120 Log.println(); 121 showCurrencies(unused); 122 Map<String, String> sorted = new TreeMap<>(Collator.getInstance(ULocale.ENGLISH)); 123 for (String code : validCurrencyCodes) { 124 if (unused.contains(code) && !code.equals("CLF")) 125 continue; // we include CLF for compatibility 126 sorted.put(getName(code), code); 127 } 128 int lineLength = 129 " /** Belgian Franc */ BEF,".length(); 130 for (String name : sorted.keySet()) { 131 printRow(Log.getLog(), sorted.get(name), name, "currency", null, lineLength); 132 } 133 showGeneratedCommentEnd(CODE_INDENT); 134 Log.close(); 135 } 136 getName(String code)137 private String getName(String code) { 138 String result = english.getName(CLDRFile.CURRENCY_NAME, code); 139 if (result == null) { 140 result = code; 141 System.out.println("Failed to find: " + code); 142 } 143 return result; 144 } 145 showCurrencies(Set<String> both)146 private void showCurrencies(Set<String> both) { 147 // /** Afghani */ AFN, 148 for (Iterator<String> it = both.iterator(); it.hasNext(); ) { 149 String code = it.next(); 150 String englishName = getName(code); 151 if (englishName == null) {} 152 Set<String> regions = unlimitedCurrencyCodes.getAll(code); 153 System.out.println( 154 code 155 + "\t" 156 + englishName 157 + "\t" 158 + (validCurrencyCodes.contains(code) 159 ? currencyCodes.contains(code) ? "" : "valid-only" 160 : "supp-only") 161 + "\t" 162 + (regions != null ? regions : "unused")); 163 } 164 } 165 showScripts()166 private void showScripts() throws IOException { 167 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/script_enum.txt"); 168 Log.println(); 169 Log.println("Script Data"); 170 Log.println(); 171 172 showGeneratedCommentStart(CODE_INDENT); 173 Map<String, String> code_replacements = new TreeMap<>(); 174 int len = " /** Arabic */ Arab,".length(); 175 for (Iterator<String> it = scripts.iterator(); it.hasNext(); ) { 176 String code = it.next(); 177 String englishName = english.getName(CLDRFile.SCRIPT_NAME, code); 178 if (englishName == null) continue; 179 printRow(Log.getLog(), code, null, "script", code_replacements, len); 180 // Log.println(" /**" + englishName + "*/ " + code + ","); 181 } 182 showGeneratedCommentEnd(CODE_INDENT); 183 Log.close(); 184 } 185 showLanguageInfo()186 private void showLanguageInfo() throws IOException { 187 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_info.txt"); 188 System.out.println(); 189 System.out.println("Language Converter"); 190 System.out.println(); 191 StringBuilder buffer = new StringBuilder(); 192 // language information 193 for (String language : sc.getAvailableCodes("language")) { 194 Scope scope = Iso639Data.getScope(language); 195 if (scope == Scope.PrivateUse) { 196 continue; 197 } 198 buffer.setLength(0); 199 String alpha3 = Iso639Data.toAlpha3(language); 200 if (alpha3 != null) { 201 buffer.append(".add(\"" + alpha3 + "\")"); 202 } 203 Type type = Iso639Data.getType(language); 204 if (type != Type.Living) { 205 buffer.append(".add(Type." + type + ")"); 206 } 207 if (scope != Scope.Individual) { 208 buffer.append(".add(Scope." + scope + ")"); 209 } 210 if (buffer.length() > 0) { 211 Log.println("\t\tto(\"" + language + "\")" + buffer + ";"); 212 } 213 } 214 Log.close(); 215 } 216 showLanguages()217 private void showLanguages() throws IOException { 218 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_enum.txt"); 219 System.out.println(); 220 System.out.println("Language Data"); 221 System.out.println(); 222 223 for (Iterator<String> it = languages.iterator(); it.hasNext(); ) { 224 String code = it.next(); 225 String englishName = english.getName(CLDRFile.LANGUAGE_NAME, code); 226 if (englishName == null) continue; 227 System.out.println(" /**" + englishName + "*/ " + code + ","); 228 } 229 230 showGeneratedCommentStart(LIST_INDENT); 231 /* 232 * get the form: "anp frr frs gsw krl zxx aa ab ace ach ada ady ae af afa 233 * afh" + " ain ak akk ale alg alt am an ang apa ar arc arn arp art arw" + " 234 * as ast ath aus av awa ay az ba bad bai bal ban bas bat be" 235 */ 236 StringBuffer buffer = new StringBuffer(); 237 int lineLimit = 70 - LIST_INDENT.length(); 238 char lastChar = 0; 239 for (Iterator<String> it = languages.iterator(); it.hasNext(); ) { 240 String code = it.next(); 241 if (code.equals("root")) { 242 continue; 243 } 244 if (code.charAt(0) != lastChar || buffer.length() + 1 + code.length() > lineLimit) { 245 if (buffer.length() != 0) Log.println(LIST_INDENT + "+ \"" + buffer + "\""); 246 buffer.setLength(0); 247 lastChar = code.charAt(0); 248 } 249 buffer.append(code).append(' '); 250 } 251 // remove the very last space 252 if (buffer.charAt(buffer.length() - 1) == ' ') { 253 buffer.setLength(buffer.length() - 1); 254 } 255 Log.println(LIST_INDENT + "+ \"" + buffer + "\""); 256 257 showGeneratedCommentEnd(LIST_INDENT); 258 Log.close(); 259 } 260 261 @SuppressWarnings("rawtypes") join(Collection collection, String separator)262 private Object join(Collection collection, String separator) { 263 if (collection == null) return null; 264 StringBuffer result = new StringBuffer(); 265 boolean first = true; 266 for (Iterator it = collection.iterator(); it.hasNext(); ) { 267 if (first) first = false; 268 else result.append(separator); 269 result.append(it.next()); 270 } 271 return result.toString(); 272 } 273 274 static NumberFormat threeDigit = new DecimalFormat("000"); 275 loadCLDRData()276 public void loadCLDRData() throws IOException { 277 // BufferedReader codes = Utility.getUTF8Data("territory_codes.txt"); 278 // while (true) { 279 // String line = codes.readLine(); 280 // if (line == null) 281 // break; 282 // line = line.split("#")[0].trim(); 283 // if (line.length() == 0) 284 // continue; 285 // String[] sourceValues = line.split("\\s+"); 286 // String[] values = new String[5]; 287 // for (int i = 0; i < values.length; ++i) { 288 // if (i >= sourceValues.length || sourceValues[i].equals("-")) 289 // values[i] = null; 290 // else 291 // values[i] = sourceValues[i]; 292 // } 293 // String alpha2 = values[0]; 294 // cldrCodes.add(alpha2); 295 // if (isPrivateUseRegion(alpha2)) 296 // continue; 297 // String numeric = values[1]; 298 // String alpha3 = values[2]; 299 // String internet = values[3]; 300 // if (internet != null) 301 // internet = internet.toUpperCase(); 302 // String fips10 = values[4]; 303 // String enumValue = enumName(alpha2); 304 // enum_alpha3.put(enumValue, alpha3); 305 // enum_UN.put(enumValue, numeric); 306 // enum_FIPS10.put(enumValue, fips10); 307 // enum_TLD.put(enumValue, internet); 308 // } 309 // codes.close(); 310 DecimalFormat threeDigits = new DecimalFormat("000"); 311 for (String value : supplementalDataInfo.getNumericTerritoryMapping().keySet()) { 312 cldrCodes.add(value); 313 if (isPrivateUseRegion(value)) continue; 314 enum_UN.put( 315 value, 316 threeDigits.format( 317 supplementalDataInfo 318 .getNumericTerritoryMapping() 319 .getAll(value) 320 .iterator() 321 .next())); 322 } 323 for (String value : supplementalDataInfo.getAlpha3TerritoryMapping().keySet()) { 324 cldrCodes.add(value); 325 if (isPrivateUseRegion(value)) continue; 326 enum_alpha3.put( 327 value, 328 supplementalDataInfo 329 .getAlpha3TerritoryMapping() 330 .getAll(value) 331 .iterator() 332 .next()); 333 } 334 335 BufferedReader codes = CldrUtility.getUTF8Data("UnMacroRegions.txt"); 336 Map<String, String> macro_name = new TreeMap<>(); 337 while (true) { 338 String line = codes.readLine(); 339 if (line == null) break; 340 line = line.trim(); 341 if (line.length() == 0) continue; 342 if (line.charAt(0) < '0' || line.charAt(0) > '9') { 343 System.out.println("GenerateEnums: Skipping: " + line); 344 continue; 345 } 346 String[] sourceValues = line.split("\\s+"); 347 int code = Integer.parseInt(sourceValues[0]); 348 String codeName = threeDigit.format(code); 349 macro_name.put(codeName, line); 350 } 351 codes.close(); 352 // String values = 353 // supplementalDataInfo.getValidityInfo().get("$territory").get1().trim(); 354 Map<Status, Set<String>> validRegions = 355 Validity.getInstance().getStatusToCodes(LstrType.region); 356 Set<String> regions = new TreeSet<>(); 357 regions.addAll(validRegions.get(Status.regular)); 358 regions.addAll(validRegions.get(Status.macroregion)); 359 // String[] validTerritories = values.split("\\s+"); 360 // for (int i = 0; i < validTerritories.length; ++i) { 361 for (String region : regions) { 362 if (corrigendum.contains(region)) { 363 System.out.println("Skipping " + region + "\t\t" + getEnglishName(region)); 364 continue; // exception, corrigendum 365 } 366 if (isPrivateUseRegion(region)) continue; 367 if (region.charAt(0) < 'A') { // numeric 368 enum_UN.put(enumName(region), region); 369 cldrCodes.add(region); 370 } else { 371 if (enum_alpha3.get(region) == null && !ignoreMissingAlpha3.contains(region)) { 372 System.out.println("Missing alpha3 for: " + region); 373 } 374 } 375 } 376 checkDuplicates(enum_UN); 377 checkDuplicates(enum_alpha3); 378 Set<String> availableCodes = new TreeSet<>(sc.getAvailableCodes("territory")); 379 compareSets("RFC 4646", availableCodes, "CLDR", cldrCodes); 380 Set<String> missing = new TreeSet<>(availableCodes); 381 missing.removeAll(cldrCodes); 382 // don't care list: "003" 383 // missing.remove("003"); 384 // missing.remove("172"); 385 // Remove the following. They don't have numeric or alpha3 codes so they can't be found. 386 missing.remove("EA"); 387 missing.remove("EZ"); 388 missing.remove("IC"); 389 missing.remove("QU"); 390 missing.remove("UN"); 391 missing.remove("CQ"); 392 393 if (missing.size() != 0) { 394 throw new IllegalArgumentException("Codes in Registry but not in CLDR: " + missing); 395 } 396 397 Set<String> UNValues = new TreeSet<>(enum_UN.values()); 398 399 for (Iterator<String> it = macro_name.keySet().iterator(); it.hasNext(); ) { 400 Object key = it.next(); 401 Object value = macro_name.get(key); 402 if (!UNValues.contains(key)) { 403 System.out.println("Macro " + key + "\t" + value); 404 } 405 } 406 407 for (Iterator<String> it = enum_UN.keySet().iterator(); it.hasNext(); ) { 408 String region = it.next(); 409 String englishName = getEnglishName(region); 410 if (englishName == null) { 411 englishName = "NULL"; // for debugging\ 412 } 413 String rfcName = getRFC3066Name(region); 414 if (!englishName.equals(rfcName)) { 415 System.out.println( 416 "Different names: {\"" 417 + region 418 + "\",\t\"" 419 + englishName 420 + " (" 421 + rfcName 422 + ")\"},"); 423 } 424 } 425 426 getContainment(); 427 428 DateFormat[] simpleFormats = { 429 new SimpleDateFormat("yyyy-MM-dd"), 430 new SimpleDateFormat("yyyy-MM"), 431 new SimpleDateFormat("yyyy"), 432 }; 433 Date today = new Date(); 434 Date longAgo = new Date(1000 - 1900, 1, 1); 435 currencyCodes = new TreeSet<>(); 436 unlimitedCurrencyCodes = 437 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null); 438 for (Iterator<String> it = 439 supplementalData.iterator("//supplementalData/currencyData/region"); 440 it.hasNext(); ) { 441 String path = it.next(); 442 XPathParts parts = XPathParts.getFrozenInstance(path); 443 String region = parts.findAttributeValue("region", "iso3166"); 444 String code = parts.findAttributeValue("currency", "iso4217"); 445 String to = parts.findAttributeValue("currency", "to"); 446 main: 447 if (to == null) { 448 unlimitedCurrencyCodes.put(code, region); 449 } else { 450 for (int i = 0; i < simpleFormats.length; ++i) { 451 try { 452 Date foo = simpleFormats[i].parse(to); 453 if (foo.compareTo(longAgo) < 0) { 454 System.out.println("Date Error: can't parse " + to); 455 break main; 456 } else if (foo.compareTo(today) >= 0) { 457 unlimitedCurrencyCodes.put(code, region); 458 } 459 break main; 460 } catch (ParseException e) { 461 } 462 } 463 System.out.println("Date Error: can't parse " + to); 464 } 465 currencyCodes.add(code); 466 } 467 468 validCurrencyCodes = new TreeSet<>(); 469 Set<String> bcp47CurrencyCodes = supplementalDataInfo.getBcp47Keys().getAll("cu"); 470 for (String code : bcp47CurrencyCodes) { 471 validCurrencyCodes.add(code.toUpperCase()); 472 } 473 474 scripts = supplementalDataInfo.getCLDRScriptCodes(); 475 languages = supplementalDataInfo.getCLDRLanguageCodes(); 476 477 // Set availableCodes = new TreeSet(sc.getAvailableCodes("territory")); 478 // availableCodes.add("003"); 479 // for (Iterator it = availableCodes.iterator(); it.hasNext();) { 480 // String code = (String) next()) 481 // canonicalRegion_UN.put(alpha2, numeric); 482 // } 483 484 // for (Iterator it = availableCodes.iterator(); it.hasNext();) { 485 // String code = (String)it.next(); 486 // RegionCode region = map_id_canonical_RFC.get(code); 487 // if (region != null) continue; // skip others 488 // region = new RegionCode(code); 489 // map_id_canonical_RFC.put(code,region); 490 // map_canonical_id_RFC.put(region,code); 491 // if ("A".compareTo(code) > 0) { 492 // map_id_canonical_UN.put(code,region); 493 // map_canonical_id_UN.put(region,code); 494 // } else { 495 // map_id_canonical_A2.put(code,region); 496 // map_canonical_id_A2.put(region,code); 497 // } 498 // } 499 // for (Iterator it = goodAvailableCodes.iterator(); it.hasNext();) { 500 // String code = (String)it.next(); 501 // good.add(getInstance(code)); 502 // } 503 } 504 getContainment()505 public void getContainment() { 506 // <group type="001" contains="002 009 019 142 150"/> <!--World --> 507 for (Iterator<String> it = 508 supplementalData.iterator("//supplementalData/territoryContainment/group"); 509 it.hasNext(); ) { 510 String path = it.next(); 511 String fullPath = supplementalData.getFullXPath(path); 512 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 513 String container = parts.getAttributeValue(parts.size() - 1, "type"); 514 final String containedString = parts.getAttributeValue(-1, "contains"); 515 List<String> contained = Arrays.asList(containedString.trim().split("\\s+")); 516 containment.put(container, contained); 517 } 518 // fix recursiveContainment. 519 // for (String region : (Collection<String>)containment.keySet()) { 520 // Set temp = new LinkedHashSet(); 521 // addContains(region, temp); 522 // recursiveContainment.put(region, temp); 523 // } 524 Set<String> startingFromWorld = new TreeSet<>(); 525 addContains("001", startingFromWorld); 526 compareSets("World", startingFromWorld, "CLDR", cldrCodes); 527 // generateContains(); 528 } 529 generateContains()530 private void generateContains() { 531 532 for (String region : containment.keySet()) { 533 List<String> plain = containment.get(region); 534 // Collection recursive = (Collection)recursiveContainment.get(region); 535 536 String setAsString = CldrUtility.join(plain, " "); 537 // String setAsString2 = recursive.equals(plain) ? "" : ", " + 538 // Utility.join(recursive," "); 539 Log.println("\t\tadd(\"" + region + "\", \"" + setAsString + "\");"); 540 } 541 } 542 543 Map<String, List<String>> containment = new TreeMap<>(); 544 545 // Map recursiveContainment = new TreeMap(); 546 addContains(String string, Set<String> startingFromWorld)547 private void addContains(String string, Set<String> startingFromWorld) { 548 startingFromWorld.add(string); 549 List<String> contained = containment.get(string); 550 if (contained == null) return; 551 for (Iterator<String> it = contained.iterator(); it.hasNext(); ) { 552 addContains(it.next(), startingFromWorld); 553 } 554 } 555 556 @SuppressWarnings("rawtypes") compareSets(String name, Set availableCodes, String name2, Set cldrCodes)557 private void compareSets(String name, Set availableCodes, String name2, Set cldrCodes) { 558 Set temp = new TreeSet(); 559 temp.addAll(availableCodes); 560 temp.removeAll(cldrCodes); 561 System.out.println("In " + name + " but not in " + name2 + ": " + temp); 562 temp.clear(); 563 temp.addAll(cldrCodes); 564 temp.removeAll(availableCodes); 565 System.out.println("Not in " + name + " but in " + name2 + ": " + temp); 566 } 567 568 @SuppressWarnings("rawtypes") checkDuplicates(Map m)569 private void checkDuplicates(Map m) { 570 Map backMap = new HashMap(); 571 for (Iterator it = m.keySet().iterator(); it.hasNext(); ) { 572 Object key = it.next(); 573 Object o = m.get(key); 574 Object otherKey = backMap.get(o); 575 if (otherKey != null) 576 System.out.println("Collision with: " + key + ",\t" + otherKey + ",\t" + o); 577 else backMap.put(o, key); 578 } 579 } 580 581 Set<String> corrigendum = 582 new TreeSet<>(Arrays.asList(new String[] {"QE", "833", "830", "172"})); // 003, 419 583 584 private ImmutableMap<String, String> extraNames = 585 ImmutableMap.<String, String>builder() 586 .put("BU", "Burma") 587 .put("TP", "East Timor") 588 .put("YU", "Yugoslavia") 589 .put("ZR", "Zaire") 590 .put("CD", "Congo (Kinshasa, Democratic Republic)") 591 .put("CI", "Ivory Coast (Cote d'Ivoire)") 592 .put("FM", "Micronesia (Federated States)") 593 .put("TL", "East Timor (Timor-Leste)") 594 // .put("155", "Western Europe") 595 .build(); 596 597 private Set<String> currencyCodes; 598 599 private Set<String> validCurrencyCodes; 600 601 static SupplementalDataInfo supplementalDataInfo = 602 SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 603 604 /** 605 * Get the RegionCode Enum 606 * 607 * @throws IOException 608 */ showRegionCodeInfo()609 private void showRegionCodeInfo() throws IOException { 610 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_enum.txt"); 611 System.out.println(); 612 System.out.println("Data for RegionCode"); 613 System.out.println(); 614 showGeneratedCommentStart(CODE_INDENT); 615 616 Set<String> reordered = new TreeSet<>(new LengthFirstComparator()); 617 reordered.addAll(enum_UN.keySet()); 618 Map<String, String> code_replacements = new TreeMap<>(); 619 int len = " /** Polynesia */ UN061,".length(); 620 for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) { 621 String region = it.next(); 622 printRow(Log.getLog(), region, null, "territory", code_replacements, len); 623 } 624 showGeneratedCommentEnd(CODE_INDENT); 625 Log.close(); 626 627 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_info.txt"); 628 Log.println(); 629 Log.println("Data for ISO Region Codes"); 630 Log.println(); 631 for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) { 632 if (territory.equals("ZZ")) { 633 continue; 634 } 635 PopulationData popData = supplementalDataInfo.getPopulationDataForTerritory(territory); 636 // to("ak").add(Scope.Macrolanguage).add("aka"); 637 Log.formatln( 638 " addRegion(RegionCode.%s, %s, %s, %s) // %s", 639 territory, 640 format(popData.getPopulation()), 641 format(popData.getLiteratePopulation() / popData.getPopulation()), 642 format(popData.getGdp()), 643 english.getName("territory", territory)); 644 // remove all the ISO 639-3 until they are part of BCP 47 645 // we need to remove in earlier pass so we have the count 646 Set<String> languages = new TreeSet<>(); 647 for (String language : 648 supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territory)) { 649 if (Iso639Data.getSource(language) == Iso639Data.Source.ISO_639_3) { 650 continue; 651 } 652 popData = 653 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 654 language, territory); 655 if (popData.getPopulation() == 0 656 || Double.isNaN( 657 popData.getLiteratePopulation() / popData.getPopulation())) { 658 continue; 659 } 660 languages.add(language); 661 } 662 int count = languages.size(); 663 for (String language : languages) { 664 --count; // we need to know the last one 665 popData = 666 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 667 language, territory); 668 Log.formatln( 669 " .addLanguage(\"%s\", %s, %s)%s // %s", 670 language, 671 format(popData.getPopulation()), 672 format(popData.getLiteratePopulation() / popData.getPopulation()), 673 (count == 0 ? ";" : ""), 674 english.getName(language)); 675 } 676 } 677 Log.close(); 678 679 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_converters.txt"); 680 Log.println(); 681 Log.println("Data for ISO Region Codes"); 682 Log.println(); 683 showGeneratedCommentStart(DATA_INDENT); 684 // addInfo(RegionCode.US, 840, "USA", "US", "US/XX", ....); ... are 685 // containees 686 reordered = new TreeSet<>(new DeprecatedAndLengthFirstComparator("territory")); 687 reordered.addAll(enum_UN.keySet()); 688 for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) { 689 String region = it.next(); 690 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 691 // UN 692 // name 693 // int un = Integer.parseInt((String) enum_UN.get(region)); // get around 694 // dumb octal 695 // syntax 696 String isoCode = enum_alpha3.get(region); 697 if (isoCode == null) continue; 698 Log.println( 699 DATA_INDENT + "add(" + quote(isoCode) + ", " + "RegionCode." + region + ");"); 700 } 701 doAliases(code_replacements); 702 showGeneratedCommentEnd(DATA_INDENT); 703 Log.println(); 704 Log.println("Data for M.49 Region Codes"); 705 Log.println(); 706 showGeneratedCommentStart(DATA_INDENT); 707 708 for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) { 709 String region = it.next(); 710 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 711 // UN 712 // name 713 int un = Integer.parseInt(enum_UN.get(region), 10); // get 714 // around 715 // dumb 716 // octal 717 // syntax 718 Log.println(DATA_INDENT + "add(" + un + ", " + "RegionCode." + region + ");"); 719 } 720 doAliases(code_replacements); 721 722 System.out.println("Plain list"); 723 for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) { 724 String region = it.next(); 725 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 726 // UN 727 // name 728 String newCode = code_replacements.get(region); 729 if (newCode != null) continue; 730 731 int un = Integer.parseInt(enum_UN.get(region), 10); // get 732 // around 733 // dumb 734 // octal 735 // syntax 736 System.out.println(un + "\t" + region + "\t" + english.getName("territory", region)); 737 } 738 739 showGeneratedCommentEnd(DATA_INDENT); 740 741 getContainment(); 742 Log.close(); 743 } 744 745 static NumberFormat nf = NumberFormat.getInstance(Locale.ENGLISH); 746 747 static NumberFormat sf = NumberFormat.getScientificInstance(Locale.ENGLISH); 748 749 static { 750 nf.setMaximumFractionDigits(3); 751 sf.setMaximumFractionDigits(3); 752 nf.setGroupingUsed(false); 753 } 754 format(double value)755 private String format(double value) { 756 double newValue = CldrUtility.roundToDecimals(value, 3); 757 String option1 = nf.format(newValue); 758 String option2 = sf.format(value); 759 return option1.length() <= option2.length() ? option1 : option2; 760 } 761 doAliases(Map<String, String> code_replacements)762 private void doAliases(Map<String, String> code_replacements) { 763 for (String code : code_replacements.keySet()) { 764 String newCode = code_replacements.get(code); 765 if (newCode.length() == 0) newCode = "ZZ"; 766 Log.println( 767 DATA_INDENT + "addAlias(" + "RegionCode." + code + ", \"" + newCode + "\");"); 768 } 769 } 770 showGeneratedCommentEnd(String indent)771 private void showGeneratedCommentEnd(String indent) { 772 Log.println(indent + "/* End of generated code. */"); 773 } 774 showGeneratedCommentStart(String indent)775 private void showGeneratedCommentStart(String indent) { 776 Log.println(indent + "/*"); 777 Log.println(indent + " * The following information is generated from a tool,"); 778 Log.println(indent + " * as described on"); 779 Log.println(indent + " * http://wiki/Main/InternationalIdentifierUpdates."); 780 Log.println(indent + " * Do not edit manually."); 781 Log.println(indent + " * Start of generated code."); 782 Log.println(indent + " */"); 783 } 784 785 public static final class LengthFirstComparator implements Comparator<Object> { 786 @Override compare(Object a, Object b)787 public int compare(Object a, Object b) { 788 String as = a.toString(); 789 String bs = b.toString(); 790 if (as.length() < bs.length()) return -1; 791 if (as.length() > bs.length()) return 1; 792 return as.compareTo(bs); 793 } 794 } 795 796 public final class DeprecatedAndLengthFirstComparator implements Comparator<Object> { 797 String type; 798 DeprecatedAndLengthFirstComparator(String type)799 DeprecatedAndLengthFirstComparator(String type) { 800 this.type = type; 801 } 802 803 @Override compare(Object a, Object b)804 public int compare(Object a, Object b) { 805 String as = a.toString(); 806 String bs = b.toString(); 807 String ar = getDeprecatedReplacement(type, as); 808 String br = getDeprecatedReplacement(type, bs); 809 // put the deprecated ones first, eg those that aren't null 810 if (ar != null) { 811 if (br == null) return -1; 812 } 813 if (br != null) { 814 if (ar == null) return 1; 815 } 816 // now check the length 817 if (as.length() < bs.length()) return -1; 818 if (as.length() > bs.length()) return 1; 819 return as.compareTo(bs); 820 } 821 } 822 823 /** 824 * Returns null if not deprecated, otherwise "" if there is no replacement, otherwise the 825 * replacement. 826 * 827 * @return 828 */ getDeprecatedReplacement(String type, String cldrTypeValue)829 public String getDeprecatedReplacement(String type, String cldrTypeValue) { 830 if (type.equals("currency")) { 831 return null; 832 } 833 String path = 834 supplementalMetadata.getFullXPath( 835 "//supplementalData/metadata/alias/" 836 + type 837 + "Alias[@type=\"" 838 + cldrTypeValue 839 + "\"]", 840 true); 841 if (path == null) { 842 return null; 843 } 844 XPathParts parts = XPathParts.getFrozenInstance(path); 845 String replacement = parts.findAttributeValue("territoryAlias", "replacement"); 846 if (replacement == null) { 847 return ""; 848 } 849 return replacement; 850 } 851 852 static Transliterator doFallbacks = 853 Transliterator.createFromRules("id", "[’ʻ] > ''; ", Transliterator.FORWARD); 854 printRow( PrintWriter out, String codeName, String englishName, String type, Map<String, String> code_replacements, int lineLength)855 private void printRow( 856 PrintWriter out, 857 String codeName, 858 String englishName, 859 String type, 860 Map<String, String> code_replacements, 861 int lineLength) { 862 // int numeric = Integer.parseInt((String) enum_UN.get(codeName)); 863 // String alpha3 = (String) enum_alpha3.get(codeName); 864 String cldrName = codeName.length() < 5 ? codeName : codeName.substring(2); // fix 865 // UN 866 // name 867 String replacement = getDeprecatedReplacement(type, cldrName); 868 869 String resolvedEnglishName = 870 englishName != null 871 ? englishName 872 : type.equals("territory") 873 ? getEnglishName(codeName) 874 : type.equals("currency") 875 ? getName(codeName) 876 : english.getName(CLDRFile.SCRIPT_NAME, codeName); 877 resolvedEnglishName = doFallbacks.transliterate(resolvedEnglishName); 878 879 String prefix = CODE_INDENT + "/** " + resolvedEnglishName; // + " - " + 880 // threeDigit.format(numeric); 881 String printedCodeName = codeName; 882 if (replacement != null) { 883 code_replacements.put(codeName, replacement); 884 out.println(prefix); 885 prefix = 886 CODE_INDENT 887 + " * @deprecated" 888 + (replacement.length() == 0 ? "" : " see " + replacement); 889 printedCodeName = "@Deprecated " + printedCodeName; 890 } 891 prefix += " */"; 892 893 if (codeName.equals("UN001")) { 894 out.println(); 895 } 896 if (prefix.length() > lineLength - (printedCodeName.length() + 1)) { 897 // break at last space 898 int lastFit = prefix.lastIndexOf(' ', lineLength - (printedCodeName.length() + 1) - 2); 899 out.println(prefix.substring(0, lastFit)); 900 prefix = CODE_INDENT + " *" + prefix.substring(lastFit); 901 } 902 out.print(prefix); 903 out.print( 904 Utility.repeat( 905 " ", (lineLength - (prefix.length() + printedCodeName.length() + 1)))); 906 out.println(printedCodeName + ","); 907 } 908 getEnglishName(String codeName)909 private String getEnglishName(String codeName) { 910 if (codeName.length() > 3) codeName = codeName.substring(2); // fix UN name 911 String name = extraNames.get(codeName); 912 if (name != null) return name; 913 name = english.getName(CLDRFile.TERRITORY_NAME, codeName); 914 if (name != null) return name; 915 return codeName; 916 } 917 getRFC3066Name(String codeName)918 private String getRFC3066Name(String codeName) { 919 if (codeName.length() > 2) codeName = codeName.substring(2); // fix UN name 920 List<String> list = sc.getFullData("territory", codeName); 921 if (list == null) return null; 922 return list.get(0); 923 } 924 enumName(String codeName)925 private String enumName(String codeName) { 926 return codeName.charAt(0) < 'A' ? "UN" + codeName : codeName; 927 } 928 quote(Object input)929 static String quote(Object input) { 930 if (input != null) return '"' + input.toString().trim() + '"'; 931 return null; 932 } 933 isPrivateUseRegion(String codeName)934 static boolean isPrivateUseRegion(String codeName) { 935 // AA, QM..QZ, XA..XZ, ZZ - CLDR codes 936 if (codeName.equals("EU") || codeName.equals("QO") || codeName.equals("ZZ")) { 937 return false; 938 } else if (codeName.equals("AA") || codeName.equals("ZZ")) { 939 return true; 940 } else if (codeName.compareTo("QM") >= 0 && codeName.compareTo("QZ") <= 0) { 941 return true; 942 } else if (codeName.compareTo("XA") >= 0 && codeName.compareTo("XZ") <= 0) { 943 return true; 944 } 945 return false; 946 } 947 /* 948 * <reset before="tertiary">ウ</reset> <x><context>ウ</context><t>ヽ</t></x> 949 * <x><context>ウ</context><i>ヽ</i></x> 950 * 951 * <x><context>う</context><i>ゝ</i></x> <x><context>ゥ</context><i>ヽ</i></x> 952 * <x><context>ゥ</context><i>ヽ</i></x> <x><context>ぅ</context><i>ゝ</i></x> 953 * <x><context>ヴ</context><i>ヽ</i></x> 954 * 955 * <x><context>ゔ</context><i>ゝ</i></x> <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> 956 * <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> <x><context>う</context><i>ゞ</i><extend>゙</extend></x> 957 * 958 * <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> 959 * <x><context>ぅ</context><i>ゞ</i><extend>゙</extend></x> <x><context>ヴ</context><i>ヾ</i><extend>゙</extend></x> 960 * 961 * <x><context>ゔ</context><i>ゞ</i><extend>゙</extend></x> 962 */ 963 } 964