1 package org.unicode.cldr.tool; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.text.ParseException; 7 import java.util.Arrays; 8 import java.util.Collection; 9 import java.util.Comparator; 10 import java.util.Date; 11 import java.util.HashMap; 12 import java.util.Iterator; 13 import java.util.List; 14 import java.util.Locale; 15 import java.util.Map; 16 import java.util.Set; 17 import java.util.TreeMap; 18 import java.util.TreeSet; 19 20 import org.unicode.cldr.util.CLDRFile; 21 import org.unicode.cldr.util.CLDRPaths; 22 import org.unicode.cldr.util.CldrUtility; 23 import org.unicode.cldr.util.Factory; 24 import org.unicode.cldr.util.Iso639Data; 25 import org.unicode.cldr.util.Iso639Data.Scope; 26 import org.unicode.cldr.util.Iso639Data.Type; 27 import org.unicode.cldr.util.Log; 28 import org.unicode.cldr.util.StandardCodes; 29 import org.unicode.cldr.util.StandardCodes.LstrType; 30 import org.unicode.cldr.util.SupplementalDataInfo; 31 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 32 import org.unicode.cldr.util.Validity; 33 import org.unicode.cldr.util.Validity.Status; 34 import org.unicode.cldr.util.XPathParts; 35 36 import com.google.common.collect.ImmutableMap; 37 import com.ibm.icu.impl.Relation; 38 import com.ibm.icu.impl.Utility; 39 import com.ibm.icu.text.Collator; 40 import com.ibm.icu.text.DateFormat; 41 import com.ibm.icu.text.DecimalFormat; 42 import com.ibm.icu.text.NumberFormat; 43 import com.ibm.icu.text.SimpleDateFormat; 44 import com.ibm.icu.text.Transliterator; 45 import com.ibm.icu.util.ULocale; 46 47 public class GenerateEnums { 48 private static final String CODE_INDENT = " "; 49 50 private static final String DATA_INDENT = " "; 51 52 private static final String LIST_INDENT = " "; 53 54 private StandardCodes sc = StandardCodes.make(); 55 56 private Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 57 58 // private Factory supplementalFactory = Factory.make( 59 // CLDRPaths.SUPPLEMENTAL_DIRECTORY, ".*"); 60 61 private Set<String> cldrCodes = new TreeSet<>(); 62 63 // private Map enum_canonical = new TreeMap(); 64 private Map<String, String> enum_alpha3 = new TreeMap<>(); 65 66 private Map<String, String> enum_UN = new TreeMap<>(); 67 68 // private Map enum_FIPS10 = new TreeMap(); 69 70 // private Map enum_TLD = new TreeMap(); 71 72 private CLDRFile english = factory.make("en", false); 73 74 private CLDRFile supplementalMetadata = factory.make("supplementalMetadata", 75 false); 76 77 private CLDRFile supplementalData = factory.make("supplementalData", false); 78 79 private Relation<String, String> unlimitedCurrencyCodes; 80 81 private Set<String> scripts = new TreeSet<>(); 82 83 private Set<String> languages = new TreeSet<>(); 84 main(String[] args)85 public static void main(String[] args) throws IOException { 86 GenerateEnums gen = new GenerateEnums(); 87 gen.showLanguageInfo(); 88 gen.loadCLDRData(); 89 gen.showCounts(); 90 gen.showCurrencies(); 91 gen.showLanguages(); 92 gen.showScripts(); 93 gen.showRegionCodeInfo(); 94 System.out.println("DONE"); 95 } 96 showCounts()97 private void showCounts() { 98 System.out.format("Language Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes( 99 "language").size()); 100 System.out.format("Script Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes( 101 "script").size()); 102 System.out.format("Territory Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes( 103 "territory").size()); 104 } 105 showCurrencies()106 private void showCurrencies() throws IOException { 107 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/currency_enum.txt"); 108 Log.println(); 109 Log.println("Currency Data"); 110 Log.println(); 111 showGeneratedCommentStart(CODE_INDENT); 112 compareSets("currencies from sup.data", currencyCodes, "valid currencies", 113 validCurrencyCodes); 114 Set<String> unused = new TreeSet<>(validCurrencyCodes); 115 unused.removeAll(currencyCodes); 116 showCurrencies(currencyCodes); 117 Log.println(); 118 showCurrencies(unused); 119 Map<String, String> sorted = new TreeMap<>(Collator 120 .getInstance(ULocale.ENGLISH)); 121 for (String code : validCurrencyCodes) { 122 if (unused.contains(code) && !code.equals("CLF")) 123 continue; // we include CLF for compatibility 124 sorted.put(getName(code), code); 125 } 126 int lineLength = " /** Belgian Franc */ BEF," 127 .length(); 128 for (String name : sorted.keySet()) { 129 printRow(Log.getLog(), sorted.get(name), name, "currency", null, 130 lineLength); 131 } 132 showGeneratedCommentEnd(CODE_INDENT); 133 Log.close(); 134 } 135 getName(String code)136 private String getName(String code) { 137 String result = english.getName(CLDRFile.CURRENCY_NAME, code); 138 if (result == null) { 139 result = code; 140 System.out.println("Failed to find: " + code); 141 } 142 return result; 143 } 144 showCurrencies(Set<String> both)145 private void showCurrencies(Set<String> both) { 146 // /** Afghani */ AFN, 147 for (Iterator<String> it = both.iterator(); it.hasNext();) { 148 String code = it.next(); 149 String englishName = getName(code); 150 if (englishName == null) { 151 } 152 Set<String> regions = unlimitedCurrencyCodes.getAll(code); 153 System.out 154 .println(code 155 + "\t" 156 + englishName 157 + "\t" 158 + (validCurrencyCodes.contains(code) ? currencyCodes 159 .contains(code) ? "" : "valid-only" : "supp-only") 160 + "\t" 161 + (regions != null ? regions : "unused")); 162 } 163 } 164 showScripts()165 private void showScripts() throws IOException { 166 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/script_enum.txt"); 167 Log.println(); 168 Log.println("Script Data"); 169 Log.println(); 170 171 showGeneratedCommentStart(CODE_INDENT); 172 Map<String, String> code_replacements = new TreeMap<>(); 173 int len = " /** Arabic */ Arab," 174 .length(); 175 for (Iterator<String> it = scripts.iterator(); it.hasNext();) { 176 String code = it.next(); 177 String englishName = english.getName(CLDRFile.SCRIPT_NAME, code); 178 if (englishName == null) 179 continue; 180 printRow(Log.getLog(), code, null, "script", code_replacements, len); 181 // Log.println(" /**" + englishName + "*/ " + code + ","); 182 } 183 showGeneratedCommentEnd(CODE_INDENT); 184 Log.close(); 185 } 186 showLanguageInfo()187 private void showLanguageInfo() throws IOException { 188 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_info.txt"); 189 System.out.println(); 190 System.out.println("Language Converter"); 191 System.out.println(); 192 StringBuilder buffer = new StringBuilder(); 193 // language information 194 for (String language : sc.getAvailableCodes("language")) { 195 Scope scope = Iso639Data.getScope(language); 196 if (scope == Scope.PrivateUse) { 197 continue; 198 } 199 buffer.setLength(0); 200 String alpha3 = Iso639Data.toAlpha3(language); 201 if (alpha3 != null) { 202 buffer.append(".add(\"" + alpha3 + "\")"); 203 } 204 Type type = Iso639Data.getType(language); 205 if (type != Type.Living) { 206 buffer.append(".add(Type." + type + ")"); 207 } 208 if (scope != Scope.Individual) { 209 buffer.append(".add(Scope." + scope + ")"); 210 } 211 if (buffer.length() > 0) { 212 Log.println("\t\tto(\"" + language + "\")" + buffer + ";"); 213 } 214 } 215 Log.close(); 216 } 217 showLanguages()218 private void showLanguages() throws IOException { 219 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_enum.txt"); 220 System.out.println(); 221 System.out.println("Language Data"); 222 System.out.println(); 223 224 for (Iterator<String> it = languages.iterator(); it.hasNext();) { 225 String code = it.next(); 226 String englishName = english.getName(CLDRFile.LANGUAGE_NAME, code); 227 if (englishName == null) 228 continue; 229 System.out.println(" /**" + englishName + "*/ " + code + ","); 230 } 231 232 showGeneratedCommentStart(LIST_INDENT); 233 /* 234 * get the form: "anp frr frs gsw krl zxx aa ab ace ach ada ady ae af afa 235 * afh" + " ain ak akk ale alg alt am an ang apa ar arc arn arp art arw" + " 236 * as ast ath aus av awa ay az ba bad bai bal ban bas bat be" 237 */ 238 StringBuffer buffer = new StringBuffer(); 239 int lineLimit = 70 - LIST_INDENT.length(); 240 char lastChar = 0; 241 for (Iterator<String> it = languages.iterator(); it.hasNext();) { 242 String code = it.next(); 243 if (code.equals("root")) { 244 continue; 245 } 246 if (code.charAt(0) != lastChar 247 || buffer.length() + 1 + code.length() > lineLimit) { 248 if (buffer.length() != 0) 249 Log.println(LIST_INDENT + "+ \"" + buffer + "\""); 250 buffer.setLength(0); 251 lastChar = code.charAt(0); 252 } 253 buffer.append(code).append(' '); 254 } 255 // remove the very last space 256 if (buffer.charAt(buffer.length() - 1) == ' ') { 257 buffer.setLength(buffer.length() - 1); 258 } 259 Log.println(LIST_INDENT + "+ \"" + buffer + "\""); 260 261 showGeneratedCommentEnd(LIST_INDENT); 262 Log.close(); 263 } 264 265 @SuppressWarnings("rawtypes") join(Collection collection, String separator)266 private Object join(Collection collection, String separator) { 267 if (collection == null) 268 return null; 269 StringBuffer result = new StringBuffer(); 270 boolean first = true; 271 for (Iterator it = collection.iterator(); it.hasNext();) { 272 if (first) 273 first = false; 274 else 275 result.append(separator); 276 result.append(it.next()); 277 } 278 return result.toString(); 279 } 280 281 static NumberFormat threeDigit = new DecimalFormat("000"); 282 loadCLDRData()283 public void loadCLDRData() throws IOException { 284 // BufferedReader codes = Utility.getUTF8Data("territory_codes.txt"); 285 // while (true) { 286 // String line = codes.readLine(); 287 // if (line == null) 288 // break; 289 // line = line.split("#")[0].trim(); 290 // if (line.length() == 0) 291 // continue; 292 // String[] sourceValues = line.split("\\s+"); 293 // String[] values = new String[5]; 294 // for (int i = 0; i < values.length; ++i) { 295 // if (i >= sourceValues.length || sourceValues[i].equals("-")) 296 // values[i] = null; 297 // else 298 // values[i] = sourceValues[i]; 299 // } 300 // String alpha2 = values[0]; 301 // cldrCodes.add(alpha2); 302 // if (isPrivateUseRegion(alpha2)) 303 // continue; 304 // String numeric = values[1]; 305 // String alpha3 = values[2]; 306 // String internet = values[3]; 307 // if (internet != null) 308 // internet = internet.toUpperCase(); 309 // String fips10 = values[4]; 310 // String enumValue = enumName(alpha2); 311 // enum_alpha3.put(enumValue, alpha3); 312 // enum_UN.put(enumValue, numeric); 313 // enum_FIPS10.put(enumValue, fips10); 314 // enum_TLD.put(enumValue, internet); 315 // } 316 // codes.close(); 317 DecimalFormat threeDigits = new DecimalFormat("000"); 318 for (String value : supplementalDataInfo.getNumericTerritoryMapping().keySet()) { 319 cldrCodes.add(value); 320 if (isPrivateUseRegion(value)) continue; 321 enum_UN.put(value, 322 threeDigits.format(supplementalDataInfo.getNumericTerritoryMapping().getAll(value).iterator().next())); 323 } 324 for (String value : supplementalDataInfo.getAlpha3TerritoryMapping().keySet()) { 325 cldrCodes.add(value); 326 if (isPrivateUseRegion(value)) continue; 327 enum_alpha3.put(value, supplementalDataInfo.getAlpha3TerritoryMapping().getAll(value).iterator().next()); 328 } 329 330 BufferedReader codes = CldrUtility.getUTF8Data("UnMacroRegions.txt"); 331 Map<String, String> macro_name = new TreeMap<>(); 332 while (true) { 333 String line = codes.readLine(); 334 if (line == null) 335 break; 336 line = line.trim(); 337 if (line.length() == 0) 338 continue; 339 if (line.charAt(0) < '0' || line.charAt(0) > '9') { 340 System.out.println("GenerateEnums: Skipping: " + line); 341 continue; 342 } 343 String[] sourceValues = line.split("\\s+"); 344 int code = Integer.parseInt(sourceValues[0]); 345 String codeName = threeDigit.format(code); 346 macro_name.put(codeName, line); 347 } 348 codes.close(); 349 // String values = supplementalDataInfo.getValidityInfo().get("$territory").get1().trim(); 350 Map<Status, Set<String>> validRegions = Validity.getInstance().getStatusToCodes(LstrType.region); 351 Set<String> regions = new TreeSet<>(); 352 regions.addAll(validRegions.get(Status.regular)); 353 regions.addAll(validRegions.get(Status.macroregion)); 354 // String[] validTerritories = values.split("\\s+"); 355 // for (int i = 0; i < validTerritories.length; ++i) { 356 for (String region : regions) { 357 if (corrigendum.contains(region)) { 358 System.out.println("Skipping " + region + "\t\t" 359 + getEnglishName(region)); 360 continue; // exception, corrigendum 361 } 362 if (isPrivateUseRegion(region)) 363 continue; 364 if (region.charAt(0) < 'A') {// numeric 365 enum_UN.put(enumName(region), region); 366 cldrCodes.add(region); 367 } else { 368 if (enum_alpha3.get(region) == null) { 369 System.out.println("Missing alpha3 for: " + region); 370 } 371 } 372 } 373 checkDuplicates(enum_UN); 374 checkDuplicates(enum_alpha3); 375 Set<String> availableCodes = new TreeSet<>(sc.getAvailableCodes("territory")); 376 compareSets("RFC 4646", availableCodes, "CLDR", cldrCodes); 377 Set<String> missing = new TreeSet<>(availableCodes); 378 missing.removeAll(cldrCodes); 379 // don't care list: "003" 380 // missing.remove("003"); 381 // missing.remove("172"); 382 // Remove the following. They don't have numeric or alpha3 codes so they can't be found. 383 missing.remove("EA"); 384 missing.remove("EZ"); 385 missing.remove("IC"); 386 missing.remove("QU"); 387 missing.remove("UN"); 388 389 if (missing.size() != 0) { 390 throw new IllegalArgumentException("Codes in Registry but not in CLDR: " 391 + missing); 392 } 393 394 Set<String> UNValues = new TreeSet<>(enum_UN.values()); 395 396 for (Iterator<String> it = macro_name.keySet().iterator(); it.hasNext();) { 397 Object key = it.next(); 398 Object value = macro_name.get(key); 399 if (!UNValues.contains(key)) { 400 System.out.println("Macro " + key + "\t" + value); 401 } 402 403 } 404 405 for (Iterator<String> it = enum_UN.keySet().iterator(); it.hasNext();) { 406 String region = it.next(); 407 String englishName = getEnglishName(region); 408 if (englishName == null) { 409 englishName = "NULL"; // for debugging\ 410 } 411 String rfcName = getRFC3066Name(region); 412 if (!englishName.equals(rfcName)) { 413 System.out.println("Different names: {\"" + region + "\",\t\"" 414 + englishName + " (" + rfcName + ")\"},"); 415 } 416 } 417 418 getContainment(); 419 420 DateFormat[] simpleFormats = { new SimpleDateFormat("yyyy-MM-dd"), 421 new SimpleDateFormat("yyyy-MM"), new SimpleDateFormat("yyyy"), }; 422 Date today = new Date(); 423 Date longAgo = new Date(1000 - 1900, 1, 1); 424 currencyCodes = new TreeSet<>(); 425 unlimitedCurrencyCodes = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null); 426 for (Iterator<String> it = supplementalData 427 .iterator("//supplementalData/currencyData/region"); it.hasNext();) { 428 String path = it.next(); 429 XPathParts parts = XPathParts.getFrozenInstance(path); 430 String region = parts.findAttributeValue("region", "iso3166"); 431 String code = parts.findAttributeValue("currency", "iso4217"); 432 String to = parts.findAttributeValue("currency", "to"); 433 main: if (to == null) { 434 unlimitedCurrencyCodes.put(code, region); 435 } else { 436 for (int i = 0; i < simpleFormats.length; ++i) { 437 try { 438 Date foo = simpleFormats[i].parse(to); 439 if (foo.compareTo(longAgo) < 0) { 440 System.out.println("Date Error: can't parse " + to); 441 break main; 442 } else if (foo.compareTo(today) >= 0) { 443 unlimitedCurrencyCodes.put(code, region); 444 } 445 break main; 446 } catch (ParseException e) { 447 } 448 } 449 System.out.println("Date Error: can't parse " + to); 450 } 451 currencyCodes.add(code); 452 } 453 454 validCurrencyCodes = new TreeSet<>(); 455 Set<String> bcp47CurrencyCodes = supplementalDataInfo.getBcp47Keys().getAll("cu"); 456 for (String code : bcp47CurrencyCodes) { 457 validCurrencyCodes.add(code.toUpperCase()); 458 } 459 460 scripts = supplementalDataInfo.getCLDRScriptCodes(); 461 languages = supplementalDataInfo.getCLDRLanguageCodes(); 462 463 // Set availableCodes = new TreeSet(sc.getAvailableCodes("territory")); 464 // availableCodes.add("003"); 465 // for (Iterator it = availableCodes.iterator(); it.hasNext();) { 466 // String code = (String) next()) 467 // canonicalRegion_UN.put(alpha2, numeric); 468 // } 469 470 // for (Iterator it = availableCodes.iterator(); it.hasNext();) { 471 // String code = (String)it.next(); 472 // RegionCode region = map_id_canonical_RFC.get(code); 473 // if (region != null) continue; // skip others 474 // region = new RegionCode(code); 475 // map_id_canonical_RFC.put(code,region); 476 // map_canonical_id_RFC.put(region,code); 477 // if ("A".compareTo(code) > 0) { 478 // map_id_canonical_UN.put(code,region); 479 // map_canonical_id_UN.put(region,code); 480 // } else { 481 // map_id_canonical_A2.put(code,region); 482 // map_canonical_id_A2.put(region,code); 483 // } 484 // } 485 // for (Iterator it = goodAvailableCodes.iterator(); it.hasNext();) { 486 // String code = (String)it.next(); 487 // good.add(getInstance(code)); 488 // } 489 } 490 getContainment()491 public void getContainment() { 492 // <group type="001" contains="002 009 019 142 150"/> <!--World --> 493 for (Iterator<String> it = supplementalData 494 .iterator("//supplementalData/territoryContainment/group"); it.hasNext();) { 495 String path = it.next(); 496 String fullPath = supplementalData.getFullXPath(path); 497 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 498 String container = parts.getAttributeValue(parts.size() - 1, "type"); 499 final String containedString = parts.getAttributeValue(-1, "contains"); 500 List<String> contained = Arrays.asList(containedString.trim().split("\\s+")); 501 containment.put(container, contained); 502 } 503 // fix recursiveContainment. 504 // for (String region : (Collection<String>)containment.keySet()) { 505 // Set temp = new LinkedHashSet(); 506 // addContains(region, temp); 507 // recursiveContainment.put(region, temp); 508 // } 509 Set<String> startingFromWorld = new TreeSet<>(); 510 addContains("001", startingFromWorld); 511 compareSets("World", startingFromWorld, "CLDR", cldrCodes); 512 // generateContains(); 513 } 514 generateContains()515 private void generateContains() { 516 517 for (String region : containment.keySet()) { 518 List<String> plain = containment.get(region); 519 // Collection recursive = (Collection)recursiveContainment.get(region); 520 521 String setAsString = CldrUtility.join(plain, " "); 522 // String setAsString2 = recursive.equals(plain) ? "" : ", " + 523 // Utility.join(recursive," "); 524 Log.println("\t\tadd(\"" + region + "\", \"" + setAsString + "\");"); 525 } 526 } 527 528 Map<String, List<String>> containment = new TreeMap<>(); 529 530 // Map recursiveContainment = new TreeMap(); 531 addContains(String string, Set<String> startingFromWorld)532 private void addContains(String string, Set<String> startingFromWorld) { 533 startingFromWorld.add(string); 534 List<String> contained = containment.get(string); 535 if (contained == null) 536 return; 537 for (Iterator<String> it = contained.iterator(); it.hasNext();) { 538 addContains(it.next(), startingFromWorld); 539 } 540 } 541 542 @SuppressWarnings("rawtypes") compareSets(String name, Set availableCodes, String name2, Set cldrCodes)543 private void compareSets(String name, Set availableCodes, String name2, 544 Set cldrCodes) { 545 Set temp = new TreeSet(); 546 temp.addAll(availableCodes); 547 temp.removeAll(cldrCodes); 548 System.out.println("In " + name + " but not in " + name2 + ": " + temp); 549 temp.clear(); 550 temp.addAll(cldrCodes); 551 temp.removeAll(availableCodes); 552 System.out.println("Not in " + name + " but in " + name2 + ": " + temp); 553 } 554 555 @SuppressWarnings("rawtypes") checkDuplicates(Map m)556 private void checkDuplicates(Map m) { 557 Map backMap = new HashMap(); 558 for (Iterator it = m.keySet().iterator(); it.hasNext();) { 559 Object key = it.next(); 560 Object o = m.get(key); 561 Object otherKey = backMap.get(o); 562 if (otherKey != null) 563 System.out.println("Collision with: " + key + ",\t" + otherKey + ",\t" 564 + o); 565 else 566 backMap.put(o, key); 567 } 568 } 569 570 Set<String> corrigendum = new TreeSet<>(Arrays.asList(new String[] { "QE", "833", 571 "830", "172" })); // 003, 419 572 573 private ImmutableMap<String, String> extraNames = ImmutableMap.<String, String>builder() 574 .put("BU", "Burma").put("TP", "East Timor").put("YU", "Yugoslavia") 575 .put("ZR", "Zaire").put("CD", "Congo (Kinshasa, Democratic Republic)") 576 .put("CI", "Ivory Coast (Cote d'Ivoire)") 577 .put("FM", "Micronesia (Federated States)") 578 .put("TL", "East Timor (Timor-Leste)") 579 // .put("155", "Western Europe") 580 .build(); 581 582 private Set<String> currencyCodes; 583 584 private Set<String> validCurrencyCodes; 585 586 static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo 587 .getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 588 589 /** 590 * Get the RegionCode Enum 591 * 592 * @throws IOException 593 */ showRegionCodeInfo()594 private void showRegionCodeInfo() throws IOException { 595 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_enum.txt"); 596 System.out.println(); 597 System.out.println("Data for RegionCode"); 598 System.out.println(); 599 showGeneratedCommentStart(CODE_INDENT); 600 601 Set<String> reordered = new TreeSet<>(new LengthFirstComparator()); 602 reordered.addAll(enum_UN.keySet()); 603 Map<String, String> code_replacements = new TreeMap<>(); 604 int len = " /** Polynesia */ UN061," 605 .length(); 606 for (Iterator<String> it = reordered.iterator(); it.hasNext();) { 607 String region = it.next(); 608 printRow(Log.getLog(), region, null, "territory", code_replacements, len); 609 } 610 showGeneratedCommentEnd(CODE_INDENT); 611 Log.close(); 612 613 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_info.txt"); 614 Log.println(); 615 Log.println("Data for ISO Region Codes"); 616 Log.println(); 617 for (String territory : supplementalDataInfo 618 .getTerritoriesWithPopulationData()) { 619 if (territory.equals("ZZ")) { 620 continue; 621 } 622 PopulationData popData = supplementalDataInfo 623 .getPopulationDataForTerritory(territory); 624 // to("ak").add(Scope.Macrolanguage).add("aka"); 625 Log.formatln(" addRegion(RegionCode.%s, %s, %s, %s) // %s", territory, 626 format(popData.getPopulation()), format(popData 627 .getLiteratePopulation() 628 / popData.getPopulation()), 629 format(popData.getGdp()), english 630 .getName("territory", territory)); 631 // remove all the ISO 639-3 until they are part of BCP 47 632 // we need to remove in earlier pass so we have the count 633 Set<String> languages = new TreeSet<>(); 634 for (String language : supplementalDataInfo 635 .getLanguagesForTerritoryWithPopulationData(territory)) { 636 if (Iso639Data.getSource(language) == Iso639Data.Source.ISO_639_3) { 637 continue; 638 } 639 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData( 640 language, territory); 641 if (popData.getPopulation() == 0 642 || Double.isNaN(popData.getLiteratePopulation() 643 / popData.getPopulation())) { 644 continue; 645 } 646 languages.add(language); 647 } 648 int count = languages.size(); 649 for (String language : languages) { 650 --count; // we need to know the last one 651 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData( 652 language, territory); 653 Log.formatln(" .addLanguage(\"%s\", %s, %s)%s // %s", language, 654 format(popData.getPopulation()), format(popData 655 .getLiteratePopulation() 656 / popData.getPopulation()), 657 (count == 0 ? ";" : ""), english 658 .getName(language)); 659 } 660 } 661 Log.close(); 662 663 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_converters.txt"); 664 Log.println(); 665 Log.println("Data for ISO Region Codes"); 666 Log.println(); 667 showGeneratedCommentStart(DATA_INDENT); 668 // addInfo(RegionCode.US, 840, "USA", "US", "US/XX", ....); ... are 669 // containees 670 reordered = new TreeSet<>(new DeprecatedAndLengthFirstComparator("territory")); 671 reordered.addAll(enum_UN.keySet()); 672 for (Iterator<String> it = reordered.iterator(); it.hasNext();) { 673 String region = it.next(); 674 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 675 // UN 676 // name 677 // int un = Integer.parseInt((String) enum_UN.get(region)); // get around 678 // dumb octal 679 // syntax 680 String isoCode = enum_alpha3.get(region); 681 if (isoCode == null) 682 continue; 683 Log.println(DATA_INDENT + "add(" + quote(isoCode) + ", " + "RegionCode." 684 + region + ");"); 685 } 686 doAliases(code_replacements); 687 showGeneratedCommentEnd(DATA_INDENT); 688 Log.println(); 689 Log.println("Data for M.49 Region Codes"); 690 Log.println(); 691 showGeneratedCommentStart(DATA_INDENT); 692 693 for (Iterator<String> it = reordered.iterator(); it.hasNext();) { 694 String region = it.next(); 695 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 696 // UN 697 // name 698 int un = Integer.parseInt(enum_UN.get(region), 10); // get 699 // around 700 // dumb 701 // octal 702 // syntax 703 Log.println(DATA_INDENT + "add(" + un + ", " + "RegionCode." + region 704 + ");"); 705 } 706 doAliases(code_replacements); 707 708 System.out.println("Plain list"); 709 for (Iterator<String> it = reordered.iterator(); it.hasNext();) { 710 String region = it.next(); 711 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 712 // UN 713 // name 714 String newCode = code_replacements.get(region); 715 if (newCode != null) 716 continue; 717 718 int un = Integer.parseInt(enum_UN.get(region), 10); // get 719 // around 720 // dumb 721 // octal 722 // syntax 723 System.out.println(un + "\t" + region + "\t" 724 + english.getName("territory", region)); 725 } 726 727 showGeneratedCommentEnd(DATA_INDENT); 728 729 getContainment(); 730 Log.close(); 731 } 732 733 static NumberFormat nf = NumberFormat.getInstance(Locale.ENGLISH); 734 735 static NumberFormat sf = NumberFormat.getScientificInstance(Locale.ENGLISH); 736 static { 737 nf.setMaximumFractionDigits(3); 738 sf.setMaximumFractionDigits(3); 739 nf.setGroupingUsed(false); 740 } 741 format(double value)742 private String format(double value) { 743 double newValue = CldrUtility.roundToDecimals(value, 3); 744 String option1 = nf.format(newValue); 745 String option2 = sf.format(value); 746 return option1.length() <= option2.length() ? option1 : option2; 747 } 748 doAliases(Map<String, String> code_replacements)749 private void doAliases(Map<String, String> code_replacements) { 750 for (String code : code_replacements.keySet()) { 751 String newCode = code_replacements.get(code); 752 if (newCode.length() == 0) 753 newCode = "ZZ"; 754 Log.println(DATA_INDENT + "addAlias(" + "RegionCode." + code + ", \"" 755 + newCode + "\");"); 756 } 757 } 758 showGeneratedCommentEnd(String indent)759 private void showGeneratedCommentEnd(String indent) { 760 Log.println(indent + "/* End of generated code. */"); 761 } 762 showGeneratedCommentStart(String indent)763 private void showGeneratedCommentStart(String indent) { 764 Log.println(indent + "/*"); 765 Log.println(indent 766 + " * The following information is generated from a tool,"); 767 Log.println(indent + " * as described on"); 768 Log.println(indent + " * http://wiki/Main/InternationalIdentifierUpdates."); 769 Log.println(indent + " * Do not edit manually."); 770 Log.println(indent + " * Start of generated code."); 771 Log.println(indent + " */"); 772 } 773 774 public final static class LengthFirstComparator implements Comparator<Object> { 775 @Override compare(Object a, Object b)776 public int compare(Object a, Object b) { 777 String as = a.toString(); 778 String bs = b.toString(); 779 if (as.length() < bs.length()) 780 return -1; 781 if (as.length() > bs.length()) 782 return 1; 783 return as.compareTo(bs); 784 } 785 } 786 787 public final class DeprecatedAndLengthFirstComparator implements Comparator<Object> { 788 String type; 789 DeprecatedAndLengthFirstComparator(String type)790 DeprecatedAndLengthFirstComparator(String type) { 791 this.type = type; 792 } 793 794 @Override compare(Object a, Object b)795 public int compare(Object a, Object b) { 796 String as = a.toString(); 797 String bs = b.toString(); 798 String ar = getDeprecatedReplacement(type, as); 799 String br = getDeprecatedReplacement(type, bs); 800 // put the deprecated ones first, eg those that aren't null 801 if (ar != null) { 802 if (br == null) 803 return -1; 804 } 805 if (br != null) { 806 if (ar == null) 807 return 1; 808 } 809 // now check the length 810 if (as.length() < bs.length()) 811 return -1; 812 if (as.length() > bs.length()) 813 return 1; 814 return as.compareTo(bs); 815 } 816 } 817 818 /** 819 * Returns null if not deprecated, otherwise "" if there is no replacement, 820 * otherwise the replacement. 821 * 822 * @return 823 */ getDeprecatedReplacement(String type, String cldrTypeValue)824 public String getDeprecatedReplacement(String type, String cldrTypeValue) { 825 if (type.equals("currency")) { 826 return null; 827 } 828 String path = supplementalMetadata.getFullXPath( 829 "//supplementalData/metadata/alias/" + type + "Alias[@type=\"" 830 + cldrTypeValue + "\"]", 831 true); 832 if (path == null) { 833 return null; 834 } 835 XPathParts parts = XPathParts.getFrozenInstance(path); 836 String replacement = parts.findAttributeValue("territoryAlias", "replacement"); 837 if (replacement == null) { 838 return ""; 839 } 840 return replacement; 841 } 842 843 static Transliterator doFallbacks = Transliterator.createFromRules("id", 844 "[’ʻ] > ''; ", Transliterator.FORWARD); 845 printRow(PrintWriter out, String codeName, String englishName, String type, Map<String, String> code_replacements, int lineLength)846 private void printRow(PrintWriter out, String codeName, String englishName, 847 String type, Map<String, String> code_replacements, int lineLength) { 848 // int numeric = Integer.parseInt((String) enum_UN.get(codeName)); 849 // String alpha3 = (String) enum_alpha3.get(codeName); 850 String cldrName = codeName.length() < 5 ? codeName : codeName.substring(2); // fix 851 // UN 852 // name 853 String replacement = getDeprecatedReplacement(type, cldrName); 854 855 String resolvedEnglishName = englishName != null ? englishName : type 856 .equals("territory") ? getEnglishName(codeName) : type 857 .equals("currency") ? getName(codeName) : english.getName(CLDRFile.SCRIPT_NAME, codeName); 858 resolvedEnglishName = doFallbacks.transliterate(resolvedEnglishName); 859 860 String prefix = CODE_INDENT + "/** " + resolvedEnglishName; // + " - " + 861 // threeDigit.format(numeric); 862 String printedCodeName = codeName; 863 if (replacement != null) { 864 code_replacements.put(codeName, replacement); 865 out.println(prefix); 866 prefix = CODE_INDENT + " * @deprecated" 867 + (replacement.length() == 0 ? "" : " see " + replacement); 868 printedCodeName = "@Deprecated " + printedCodeName; 869 } 870 prefix += " */"; 871 872 if (codeName.equals("UN001")) { 873 out.println(); 874 } 875 if (prefix.length() > lineLength - (printedCodeName.length() + 1)) { 876 // break at last space 877 int lastFit = prefix.lastIndexOf(' ', lineLength 878 - (printedCodeName.length() + 1) - 2); 879 out.println(prefix.substring(0, lastFit)); 880 prefix = CODE_INDENT + " *" + prefix.substring(lastFit); 881 } 882 out.print(prefix); 883 out.print(Utility.repeat(" ", (lineLength 884 - (prefix.length() + printedCodeName.length() + 1)))); 885 out.println(printedCodeName + ","); 886 } 887 getEnglishName(String codeName)888 private String getEnglishName(String codeName) { 889 if (codeName.length() > 3) 890 codeName = codeName.substring(2); // fix UN name 891 String name = extraNames.get(codeName); 892 if (name != null) 893 return name; 894 name = english.getName(CLDRFile.TERRITORY_NAME, codeName); 895 if (name != null) 896 return name; 897 return codeName; 898 } 899 getRFC3066Name(String codeName)900 private String getRFC3066Name(String codeName) { 901 if (codeName.length() > 2) 902 codeName = codeName.substring(2); // fix UN name 903 List<String> list = sc.getFullData("territory", codeName); 904 if (list == null) 905 return null; 906 return list.get(0); 907 } 908 enumName(String codeName)909 private String enumName(String codeName) { 910 return codeName.charAt(0) < 'A' ? "UN" + codeName : codeName; 911 } 912 quote(Object input)913 static String quote(Object input) { 914 if (input != null) 915 return '"' + input.toString().trim() + '"'; 916 return null; 917 } 918 isPrivateUseRegion(String codeName)919 static boolean isPrivateUseRegion(String codeName) { 920 // AA, QM..QZ, XA..XZ, ZZ - CLDR codes 921 if (codeName.equals("EU") || codeName.equals("QO") || codeName.equals("ZZ")) { 922 return false; 923 } else if (codeName.equals("AA") || codeName.equals("ZZ")) { 924 return true; 925 } else if (codeName.compareTo("QM") >= 0 && codeName.compareTo("QZ") <= 0) { 926 return true; 927 } else if (codeName.compareTo("XA") >= 0 && codeName.compareTo("XZ") <= 0) { 928 return true; 929 } 930 return false; 931 } 932 /* 933 * <reset before="tertiary">ウ</reset> <x><context>ウ</context><t>ヽ</t></x> 934 * <x><context>ウ</context><i>ヽ</i></x> 935 * 936 * <x><context>う</context><i>ゝ</i></x> <x><context>ゥ</context><i>ヽ</i></x> 937 * <x><context>ゥ</context><i>ヽ</i></x> <x><context>ぅ</context><i>ゝ</i></x> 938 * <x><context>ヴ</context><i>ヽ</i></x> 939 * 940 * <x><context>ゔ</context><i>ゝ</i></x> <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> 941 * <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> <x><context>う</context><i>ゞ</i><extend>゙</extend></x> 942 * 943 * <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> 944 * <x><context>ぅ</context><i>ゞ</i><extend>゙</extend></x> <x><context>ヴ</context><i>ヾ</i><extend>゙</extend></x> 945 * 946 * <x><context>ゔ</context><i>ゞ</i><extend>゙</extend></x> 947 */ 948 }