1 package org.unicode.cldr.tool; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.text.ParseException; 7 import java.util.Arrays; 8 import java.util.Collection; 9 import java.util.Comparator; 10 import java.util.Date; 11 import java.util.HashMap; 12 import java.util.Iterator; 13 import java.util.List; 14 import java.util.Locale; 15 import java.util.Map; 16 import java.util.Set; 17 import java.util.TreeMap; 18 import java.util.TreeSet; 19 20 import org.unicode.cldr.util.CLDRFile; 21 import org.unicode.cldr.util.CLDRPaths; 22 import org.unicode.cldr.util.CldrUtility; 23 import org.unicode.cldr.util.Factory; 24 import org.unicode.cldr.util.Iso639Data; 25 import org.unicode.cldr.util.Iso639Data.Scope; 26 import org.unicode.cldr.util.Iso639Data.Type; 27 import org.unicode.cldr.util.Log; 28 import org.unicode.cldr.util.StandardCodes; 29 import org.unicode.cldr.util.StandardCodes.LstrType; 30 import org.unicode.cldr.util.SupplementalDataInfo; 31 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 32 import org.unicode.cldr.util.Validity; 33 import org.unicode.cldr.util.Validity.Status; 34 import org.unicode.cldr.util.XPathParts; 35 36 import com.ibm.icu.dev.util.CollectionUtilities; 37 import com.ibm.icu.impl.Relation; 38 import com.ibm.icu.impl.Utility; 39 import com.ibm.icu.text.Collator; 40 import com.ibm.icu.text.DateFormat; 41 import com.ibm.icu.text.DecimalFormat; 42 import com.ibm.icu.text.NumberFormat; 43 import com.ibm.icu.text.SimpleDateFormat; 44 import com.ibm.icu.text.Transliterator; 45 import com.ibm.icu.util.ULocale; 46 47 public class GenerateEnums { 48 private static final String CODE_INDENT = " "; 49 50 private static final String DATA_INDENT = " "; 51 52 private static final String LIST_INDENT = " "; 53 54 private StandardCodes sc = StandardCodes.make(); 55 56 private Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 57 58 // private Factory supplementalFactory = Factory.make( 59 // CLDRPaths.SUPPLEMENTAL_DIRECTORY, ".*"); 60 61 private Set<String> cldrCodes = new TreeSet<String>(); 62 63 // private Map enum_canonical = new TreeMap(); 64 private Map<String, String> enum_alpha3 = new TreeMap<String, String>(); 65 66 private Map<String, String> enum_UN = new TreeMap<String, String>(); 67 68 // private Map enum_FIPS10 = new TreeMap(); 69 70 // private Map enum_TLD = new TreeMap(); 71 72 private CLDRFile english = factory.make("en", false); 73 74 private CLDRFile supplementalMetadata = factory.make("supplementalMetadata", 75 false); 76 77 private CLDRFile supplementalData = factory.make("supplementalData", false); 78 79 private Relation<String, String> unlimitedCurrencyCodes; 80 81 private Set<String> scripts = new TreeSet<String>(); 82 83 private Set<String> languages = new TreeSet<String>(); 84 main(String[] args)85 public static void main(String[] args) throws IOException { 86 GenerateEnums gen = new GenerateEnums(); 87 gen.showLanguageInfo(); 88 gen.loadCLDRData(); 89 gen.showCounts(); 90 gen.showCurrencies(); 91 gen.showLanguages(); 92 gen.showScripts(); 93 gen.showRegionCodeInfo(); 94 System.out.println("DONE"); 95 } 96 showCounts()97 private void showCounts() { 98 System.out.format("Language Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes( 99 "language").size()); 100 System.out.format("Script Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes( 101 "script").size()); 102 System.out.format("Territory Subtags: %s" + CldrUtility.LINE_SEPARATOR, sc.getGoodAvailableCodes( 103 "territory").size()); 104 } 105 showCurrencies()106 private void showCurrencies() throws IOException { 107 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/currency_enum.txt"); 108 Log.println(); 109 Log.println("Currency Data"); 110 Log.println(); 111 showGeneratedCommentStart(CODE_INDENT); 112 compareSets("currencies from sup.data", currencyCodes, "valid currencies", 113 validCurrencyCodes); 114 Set<String> unused = new TreeSet<String>(validCurrencyCodes); 115 unused.removeAll(currencyCodes); 116 showCurrencies(currencyCodes); 117 Log.println(); 118 showCurrencies(unused); 119 Map<String, String> sorted = new TreeMap<String, String>(Collator 120 .getInstance(ULocale.ENGLISH)); 121 for (String code : validCurrencyCodes) { 122 if (unused.contains(code) && !code.equals("CLF")) 123 continue; // we include CLF for compatibility 124 sorted.put(getName(code), code); 125 } 126 int lineLength = " /** Belgian Franc */ BEF," 127 .length(); 128 for (String name : sorted.keySet()) { 129 printRow(Log.getLog(), sorted.get(name), name, "currency", null, 130 lineLength); 131 } 132 showGeneratedCommentEnd(CODE_INDENT); 133 Log.close(); 134 } 135 getName(String code)136 private String getName(String code) { 137 String result = english.getName(CLDRFile.CURRENCY_NAME, code); 138 if (result == null) { 139 result = code; 140 System.out.println("Failed to find: " + code); 141 } 142 return result; 143 } 144 showCurrencies(Set<String> both)145 private void showCurrencies(Set<String> both) { 146 // /** Afghani */ AFN, 147 for (Iterator<String> it = both.iterator(); it.hasNext();) { 148 String code = it.next(); 149 String englishName = getName(code); 150 if (englishName == null) { 151 } 152 Set<String> regions = unlimitedCurrencyCodes.getAll(code); 153 System.out 154 .println(code 155 + "\t" 156 + englishName 157 + "\t" 158 + (validCurrencyCodes.contains(code) ? currencyCodes 159 .contains(code) ? "" : "valid-only" : "supp-only") 160 + "\t" 161 + (regions != null ? regions : "unused")); 162 } 163 } 164 showScripts()165 private void showScripts() throws IOException { 166 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/script_enum.txt"); 167 Log.println(); 168 Log.println("Script Data"); 169 Log.println(); 170 171 showGeneratedCommentStart(CODE_INDENT); 172 Map<String, String> code_replacements = new TreeMap<String, String>(); 173 int len = " /** Arabic */ Arab," 174 .length(); 175 for (Iterator<String> it = scripts.iterator(); it.hasNext();) { 176 String code = it.next(); 177 String englishName = english.getName(CLDRFile.SCRIPT_NAME, code); 178 if (englishName == null) 179 continue; 180 printRow(Log.getLog(), code, null, "script", code_replacements, len); 181 // Log.println(" /**" + englishName + "*/ " + code + ","); 182 } 183 showGeneratedCommentEnd(CODE_INDENT); 184 Log.close(); 185 } 186 showLanguageInfo()187 private void showLanguageInfo() throws IOException { 188 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_info.txt"); 189 System.out.println(); 190 System.out.println("Language Converter"); 191 System.out.println(); 192 StringBuilder buffer = new StringBuilder(); 193 // language information 194 for (String language : sc.getAvailableCodes("language")) { 195 Scope scope = Iso639Data.getScope(language); 196 if (scope == Scope.PrivateUse) { 197 continue; 198 } 199 buffer.setLength(0); 200 String alpha3 = Iso639Data.toAlpha3(language); 201 if (alpha3 != null) { 202 buffer.append(".add(\"" + alpha3 + "\")"); 203 } 204 Type type = Iso639Data.getType(language); 205 if (type != Type.Living) { 206 buffer.append(".add(Type." + type + ")"); 207 } 208 if (scope != Scope.Individual) { 209 buffer.append(".add(Scope." + scope + ")"); 210 } 211 if (buffer.length() > 0) { 212 Log.println("\t\tto(\"" + language + "\")" + buffer + ";"); 213 } 214 } 215 Log.close(); 216 } 217 showLanguages()218 private void showLanguages() throws IOException { 219 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_enum.txt"); 220 System.out.println(); 221 System.out.println("Language Data"); 222 System.out.println(); 223 224 for (Iterator<String> it = languages.iterator(); it.hasNext();) { 225 String code = it.next(); 226 String englishName = english.getName(CLDRFile.LANGUAGE_NAME, code); 227 if (englishName == null) 228 continue; 229 System.out.println(" /**" + englishName + "*/ " + code + ","); 230 } 231 232 showGeneratedCommentStart(LIST_INDENT); 233 /* 234 * get the form: "anp frr frs gsw krl zxx aa ab ace ach ada ady ae af afa 235 * afh" + " ain ak akk ale alg alt am an ang apa ar arc arn arp art arw" + " 236 * as ast ath aus av awa ay az ba bad bai bal ban bas bat be" 237 */ 238 StringBuffer buffer = new StringBuffer(); 239 int lineLimit = 70 - LIST_INDENT.length(); 240 char lastChar = 0; 241 for (Iterator<String> it = languages.iterator(); it.hasNext();) { 242 String code = it.next(); 243 if (code.equals("root")) { 244 continue; 245 } 246 if (code.charAt(0) != lastChar 247 || buffer.length() + 1 + code.length() > lineLimit) { 248 if (buffer.length() != 0) 249 Log.println(LIST_INDENT + "+ \"" + buffer + "\""); 250 buffer.setLength(0); 251 lastChar = code.charAt(0); 252 } 253 buffer.append(code).append(' '); 254 } 255 // remove the very last space 256 if (buffer.charAt(buffer.length() - 1) == ' ') { 257 buffer.setLength(buffer.length() - 1); 258 } 259 Log.println(LIST_INDENT + "+ \"" + buffer + "\""); 260 261 showGeneratedCommentEnd(LIST_INDENT); 262 Log.close(); 263 } 264 265 @SuppressWarnings("rawtypes") join(Collection collection, String separator)266 private Object join(Collection collection, String separator) { 267 if (collection == null) 268 return null; 269 StringBuffer result = new StringBuffer(); 270 boolean first = true; 271 for (Iterator it = collection.iterator(); it.hasNext();) { 272 if (first) 273 first = false; 274 else 275 result.append(separator); 276 result.append(it.next()); 277 } 278 return result.toString(); 279 } 280 281 static NumberFormat threeDigit = new DecimalFormat("000"); 282 loadCLDRData()283 public void loadCLDRData() throws IOException { 284 // BufferedReader codes = Utility.getUTF8Data("territory_codes.txt"); 285 // while (true) { 286 // String line = codes.readLine(); 287 // if (line == null) 288 // break; 289 // line = line.split("#")[0].trim(); 290 // if (line.length() == 0) 291 // continue; 292 // String[] sourceValues = line.split("\\s+"); 293 // String[] values = new String[5]; 294 // for (int i = 0; i < values.length; ++i) { 295 // if (i >= sourceValues.length || sourceValues[i].equals("-")) 296 // values[i] = null; 297 // else 298 // values[i] = sourceValues[i]; 299 // } 300 // String alpha2 = values[0]; 301 // cldrCodes.add(alpha2); 302 // if (isPrivateUseRegion(alpha2)) 303 // continue; 304 // String numeric = values[1]; 305 // String alpha3 = values[2]; 306 // String internet = values[3]; 307 // if (internet != null) 308 // internet = internet.toUpperCase(); 309 // String fips10 = values[4]; 310 // String enumValue = enumName(alpha2); 311 // enum_alpha3.put(enumValue, alpha3); 312 // enum_UN.put(enumValue, numeric); 313 // enum_FIPS10.put(enumValue, fips10); 314 // enum_TLD.put(enumValue, internet); 315 // } 316 // codes.close(); 317 DecimalFormat threeDigits = new DecimalFormat("000"); 318 for (String value : supplementalDataInfo.getNumericTerritoryMapping().keySet()) { 319 cldrCodes.add(value); 320 if (isPrivateUseRegion(value)) continue; 321 enum_UN.put(value, 322 threeDigits.format(supplementalDataInfo.getNumericTerritoryMapping().getAll(value).iterator().next())); 323 } 324 for (String value : supplementalDataInfo.getAlpha3TerritoryMapping().keySet()) { 325 cldrCodes.add(value); 326 if (isPrivateUseRegion(value)) continue; 327 enum_alpha3.put(value, supplementalDataInfo.getAlpha3TerritoryMapping().getAll(value).iterator().next()); 328 } 329 330 BufferedReader codes = CldrUtility.getUTF8Data("UnMacroRegions.txt"); 331 Map<String, String> macro_name = new TreeMap<String, String>(); 332 while (true) { 333 String line = codes.readLine(); 334 if (line == null) 335 break; 336 line = line.trim(); 337 if (line.length() == 0) 338 continue; 339 if (line.charAt(0) < '0' || line.charAt(0) > '9') { 340 System.out.println("GenerateEnums: Skipping: " + line); 341 continue; 342 } 343 String[] sourceValues = line.split("\\s+"); 344 int code = Integer.parseInt(sourceValues[0]); 345 String codeName = threeDigit.format(code); 346 macro_name.put(codeName, line); 347 } 348 codes.close(); 349 // String values = supplementalDataInfo.getValidityInfo().get("$territory").get1().trim(); 350 Map<Status, Set<String>> validRegions = Validity.getInstance().getStatusToCodes(LstrType.region); 351 Set<String> regions = new TreeSet<String>(); 352 regions.addAll(validRegions.get(Status.regular)); 353 regions.addAll(validRegions.get(Status.macroregion)); 354 // String[] validTerritories = values.split("\\s+"); 355 // for (int i = 0; i < validTerritories.length; ++i) { 356 for (String region : regions) { 357 if (corrigendum.contains(region)) { 358 System.out.println("Skipping " + region + "\t\t" 359 + getEnglishName(region)); 360 continue; // exception, corrigendum 361 } 362 if (isPrivateUseRegion(region)) 363 continue; 364 if (region.charAt(0) < 'A') {// numeric 365 enum_UN.put(enumName(region), region); 366 cldrCodes.add(region); 367 } else { 368 if (enum_alpha3.get(region) == null) { 369 System.out.println("Missing alpha3 for: " + region); 370 } 371 } 372 } 373 checkDuplicates(enum_UN); 374 checkDuplicates(enum_alpha3); 375 Set<String> availableCodes = new TreeSet<String>(sc.getAvailableCodes("territory")); 376 compareSets("RFC 4646", availableCodes, "CLDR", cldrCodes); 377 Set<String> missing = new TreeSet<String>(availableCodes); 378 missing.removeAll(cldrCodes); 379 // don't care list: "003" 380 // missing.remove("003"); 381 // missing.remove("172"); 382 // Remove the following. They don't have numeric or alpha3 codes so they can't be found. 383 missing.remove("EA"); 384 missing.remove("EZ"); 385 missing.remove("IC"); 386 missing.remove("QU"); 387 missing.remove("UN"); 388 389 if (missing.size() != 0) { 390 throw new IllegalArgumentException("Codes in Registry but not in CLDR: " 391 + missing); 392 } 393 394 Set<String> UNValues = new TreeSet<String>(enum_UN.values()); 395 396 for (Iterator<String> it = macro_name.keySet().iterator(); it.hasNext();) { 397 Object key = it.next(); 398 Object value = macro_name.get(key); 399 if (!UNValues.contains(key)) { 400 System.out.println("Macro " + key + "\t" + value); 401 } 402 403 } 404 405 for (Iterator<String> it = enum_UN.keySet().iterator(); it.hasNext();) { 406 String region = it.next(); 407 String englishName = getEnglishName(region); 408 if (englishName == null) { 409 englishName = "NULL"; // for debugging\ 410 } 411 String rfcName = getRFC3066Name(region); 412 if (!englishName.equals(rfcName)) { 413 System.out.println("Different names: {\"" + region + "\",\t\"" 414 + englishName + " (" + rfcName + ")\"},"); 415 } 416 } 417 418 XPathParts parts = new XPathParts(); 419 getContainment(); 420 421 DateFormat[] simpleFormats = { new SimpleDateFormat("yyyy-MM-dd"), 422 new SimpleDateFormat("yyyy-MM"), new SimpleDateFormat("yyyy"), }; 423 Date today = new Date(); 424 Date longAgo = new Date(1000 - 1900, 1, 1); 425 currencyCodes = new TreeSet<String>(); 426 unlimitedCurrencyCodes = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null); 427 for (Iterator<String> it = supplementalData 428 .iterator("//supplementalData/currencyData/region"); it.hasNext();) { 429 String path = it.next(); 430 parts.set(path); 431 String region = parts.findAttributeValue("region", "iso3166"); 432 String code = parts.findAttributeValue("currency", "iso4217"); 433 String to = parts.findAttributeValue("currency", "to"); 434 main: if (to == null) { 435 unlimitedCurrencyCodes.put(code, region); 436 } else { 437 for (int i = 0; i < simpleFormats.length; ++i) { 438 try { 439 Date foo = simpleFormats[i].parse(to); 440 if (foo.compareTo(longAgo) < 0) { 441 System.out.println("Date Error: can't parse " + to); 442 break main; 443 } else if (foo.compareTo(today) >= 0) { 444 unlimitedCurrencyCodes.put(code, region); 445 } 446 break main; 447 } catch (ParseException e) { 448 } 449 } 450 System.out.println("Date Error: can't parse " + to); 451 } 452 currencyCodes.add(code); 453 } 454 455 validCurrencyCodes = new TreeSet<String>(); 456 Set<String> bcp47CurrencyCodes = supplementalDataInfo.getBcp47Keys().getAll("cu"); 457 for (String code : bcp47CurrencyCodes) { 458 validCurrencyCodes.add(code.toUpperCase()); 459 } 460 461 scripts = supplementalDataInfo.getCLDRScriptCodes(); 462 languages = supplementalDataInfo.getCLDRLanguageCodes(); 463 464 // Set availableCodes = new TreeSet(sc.getAvailableCodes("territory")); 465 // availableCodes.add("003"); 466 // for (Iterator it = availableCodes.iterator(); it.hasNext();) { 467 // String code = (String) next()) 468 // canonicalRegion_UN.put(alpha2, numeric); 469 // } 470 471 // for (Iterator it = availableCodes.iterator(); it.hasNext();) { 472 // String code = (String)it.next(); 473 // RegionCode region = map_id_canonical_RFC.get(code); 474 // if (region != null) continue; // skip others 475 // region = new RegionCode(code); 476 // map_id_canonical_RFC.put(code,region); 477 // map_canonical_id_RFC.put(region,code); 478 // if ("A".compareTo(code) > 0) { 479 // map_id_canonical_UN.put(code,region); 480 // map_canonical_id_UN.put(region,code); 481 // } else { 482 // map_id_canonical_A2.put(code,region); 483 // map_canonical_id_A2.put(region,code); 484 // } 485 // } 486 // for (Iterator it = goodAvailableCodes.iterator(); it.hasNext();) { 487 // String code = (String)it.next(); 488 // good.add(getInstance(code)); 489 // } 490 } 491 getContainment()492 public void getContainment() { 493 XPathParts parts = new XPathParts(); 494 // <group type="001" contains="002 009 019 142 150"/> <!--World --> 495 for (Iterator<String> it = supplementalData 496 .iterator("//supplementalData/territoryContainment/group"); it.hasNext();) { 497 String path = it.next(); 498 String fullPath = supplementalData.getFullXPath(path); 499 parts.set(fullPath); 500 String container = parts.getAttributeValue(parts.size() - 1, "type"); 501 final String containedString = parts.getAttributeValue(-1, "contains"); 502 List<String> contained = Arrays.asList(containedString.trim().split("\\s+")); 503 containment.put(container, contained); 504 } 505 // fix recursiveContainment. 506 // for (String region : (Collection<String>)containment.keySet()) { 507 // Set temp = new LinkedHashSet(); 508 // addContains(region, temp); 509 // recursiveContainment.put(region, temp); 510 // } 511 Set<String> startingFromWorld = new TreeSet<String>(); 512 addContains("001", startingFromWorld); 513 compareSets("World", startingFromWorld, "CLDR", cldrCodes); 514 // generateContains(); 515 } 516 generateContains()517 private void generateContains() { 518 519 for (String region : containment.keySet()) { 520 List<String> plain = containment.get(region); 521 // Collection recursive = (Collection)recursiveContainment.get(region); 522 523 String setAsString = CldrUtility.join(plain, " "); 524 // String setAsString2 = recursive.equals(plain) ? "" : ", " + 525 // Utility.join(recursive," "); 526 Log.println("\t\tadd(\"" + region + "\", \"" + setAsString + "\");"); 527 } 528 } 529 530 Map<String, List<String>> containment = new TreeMap<String, List<String>>(); 531 532 // Map recursiveContainment = new TreeMap(); 533 addContains(String string, Set<String> startingFromWorld)534 private void addContains(String string, Set<String> startingFromWorld) { 535 startingFromWorld.add(string); 536 List<String> contained = (List<String>) containment.get(string); 537 if (contained == null) 538 return; 539 for (Iterator<String> it = contained.iterator(); it.hasNext();) { 540 addContains(it.next(), startingFromWorld); 541 } 542 } 543 544 @SuppressWarnings("rawtypes") compareSets(String name, Set availableCodes, String name2, Set cldrCodes)545 private void compareSets(String name, Set availableCodes, String name2, 546 Set cldrCodes) { 547 Set temp = new TreeSet(); 548 temp.addAll(availableCodes); 549 temp.removeAll(cldrCodes); 550 System.out.println("In " + name + " but not in " + name2 + ": " + temp); 551 temp.clear(); 552 temp.addAll(cldrCodes); 553 temp.removeAll(availableCodes); 554 System.out.println("Not in " + name + " but in " + name2 + ": " + temp); 555 } 556 557 @SuppressWarnings("rawtypes") checkDuplicates(Map m)558 private void checkDuplicates(Map m) { 559 Map backMap = new HashMap(); 560 for (Iterator it = m.keySet().iterator(); it.hasNext();) { 561 Object key = it.next(); 562 Object o = m.get(key); 563 Object otherKey = backMap.get(o); 564 if (otherKey != null) 565 System.out.println("Collision with: " + key + ",\t" + otherKey + ",\t" 566 + o); 567 else 568 backMap.put(o, key); 569 } 570 } 571 572 Set<String> corrigendum = new TreeSet<String>(Arrays.asList(new String[] { "QE", "833", 573 "830", "172" })); // 003, 419 574 575 private Map extraNames = CollectionUtilities.asMap(new String[][] { 576 { "BU", "Burma" }, { "TP", "East Timor" }, { "YU", "Yugoslavia" }, 577 { "ZR", "Zaire" }, { "CD", "Congo (Kinshasa, Democratic Republic)" }, 578 { "CI", "Ivory Coast (Cote d'Ivoire)" }, 579 { "FM", "Micronesia (Federated States)" }, 580 { "TL", "East Timor (Timor-Leste)" }, 581 // {"155","Western Europe"}, 582 583 }); 584 585 private Set<String> currencyCodes; 586 587 private Set<String> validCurrencyCodes; 588 589 static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo 590 .getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 591 592 /** 593 * Get the RegionCode Enum 594 * 595 * @throws IOException 596 */ showRegionCodeInfo()597 private void showRegionCodeInfo() throws IOException { 598 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_enum.txt"); 599 System.out.println(); 600 System.out.println("Data for RegionCode"); 601 System.out.println(); 602 showGeneratedCommentStart(CODE_INDENT); 603 604 Set<String> reordered = new TreeSet<String>(new LengthFirstComparator()); 605 reordered.addAll(enum_UN.keySet()); 606 Map<String, String> code_replacements = new TreeMap<String, String>(); 607 int len = " /** Polynesia */ UN061," 608 .length(); 609 for (Iterator<String> it = reordered.iterator(); it.hasNext();) { 610 String region = it.next(); 611 printRow(Log.getLog(), region, null, "territory", code_replacements, len); 612 } 613 showGeneratedCommentEnd(CODE_INDENT); 614 Log.close(); 615 616 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_info.txt"); 617 Log.println(); 618 Log.println("Data for ISO Region Codes"); 619 Log.println(); 620 for (String territory : supplementalDataInfo 621 .getTerritoriesWithPopulationData()) { 622 if (territory.equals("ZZ")) { 623 continue; 624 } 625 PopulationData popData = supplementalDataInfo 626 .getPopulationDataForTerritory(territory); 627 // to("ak").add(Scope.Macrolanguage).add("aka"); 628 Log.formatln(" addRegion(RegionCode.%s, %s, %s, %s) // %s", territory, 629 format(popData.getPopulation()), format(popData 630 .getLiteratePopulation() 631 / popData.getPopulation()), 632 format(popData.getGdp()), english 633 .getName("territory", territory)); 634 // remove all the ISO 639-3 until they are part of BCP 47 635 // we need to remove in earlier pass so we have the count 636 Set<String> languages = new TreeSet<String>(); 637 for (String language : supplementalDataInfo 638 .getLanguagesForTerritoryWithPopulationData(territory)) { 639 if (Iso639Data.getSource(language) == Iso639Data.Source.ISO_639_3) { 640 continue; 641 } 642 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData( 643 language, territory); 644 if (popData.getPopulation() == 0 645 || Double.isNaN(popData.getLiteratePopulation() 646 / popData.getPopulation())) { 647 continue; 648 } 649 languages.add(language); 650 } 651 int count = languages.size(); 652 for (String language : languages) { 653 --count; // we need to know the last one 654 popData = supplementalDataInfo.getLanguageAndTerritoryPopulationData( 655 language, territory); 656 Log.formatln(" .addLanguage(\"%s\", %s, %s)%s // %s", language, 657 format(popData.getPopulation()), format(popData 658 .getLiteratePopulation() 659 / popData.getPopulation()), 660 (count == 0 ? ";" : ""), english 661 .getName(language)); 662 } 663 } 664 Log.close(); 665 666 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_converters.txt"); 667 Log.println(); 668 Log.println("Data for ISO Region Codes"); 669 Log.println(); 670 showGeneratedCommentStart(DATA_INDENT); 671 // addInfo(RegionCode.US, 840, "USA", "US", "US/XX", ....); ... are 672 // containees 673 reordered = new TreeSet<String>(new DeprecatedAndLengthFirstComparator("territory")); 674 reordered.addAll(enum_UN.keySet()); 675 for (Iterator<String> it = reordered.iterator(); it.hasNext();) { 676 String region = it.next(); 677 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 678 // UN 679 // name 680 // int un = Integer.parseInt((String) enum_UN.get(region)); // get around 681 // dumb octal 682 // syntax 683 String isoCode = (String) enum_alpha3.get(region); 684 if (isoCode == null) 685 continue; 686 Log.println(DATA_INDENT + "add(" + quote(isoCode) + ", " + "RegionCode." 687 + region + ");"); 688 } 689 doAliases(code_replacements); 690 showGeneratedCommentEnd(DATA_INDENT); 691 Log.println(); 692 Log.println("Data for M.49 Region Codes"); 693 Log.println(); 694 showGeneratedCommentStart(DATA_INDENT); 695 696 for (Iterator<String> it = reordered.iterator(); it.hasNext();) { 697 String region = it.next(); 698 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 699 // UN 700 // name 701 int un = Integer.parseInt((String) enum_UN.get(region), 10); // get 702 // around 703 // dumb 704 // octal 705 // syntax 706 Log.println(DATA_INDENT + "add(" + un + ", " + "RegionCode." + region 707 + ");"); 708 } 709 doAliases(code_replacements); 710 711 System.out.println("Plain list"); 712 for (Iterator<String> it = reordered.iterator(); it.hasNext();) { 713 String region = it.next(); 714 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 715 // UN 716 // name 717 String newCode = code_replacements.get(region); 718 if (newCode != null) 719 continue; 720 721 int un = Integer.parseInt((String) enum_UN.get(region), 10); // get 722 // around 723 // dumb 724 // octal 725 // syntax 726 System.out.println(un + "\t" + region + "\t" 727 + english.getName("territory", region)); 728 } 729 730 showGeneratedCommentEnd(DATA_INDENT); 731 732 getContainment(); 733 Log.close(); 734 } 735 736 static NumberFormat nf = NumberFormat.getInstance(Locale.ENGLISH); 737 738 static NumberFormat sf = NumberFormat.getScientificInstance(Locale.ENGLISH); 739 static { 740 nf.setMaximumFractionDigits(3); 741 sf.setMaximumFractionDigits(3); 742 nf.setGroupingUsed(false); 743 } 744 format(double value)745 private String format(double value) { 746 double newValue = CldrUtility.roundToDecimals(value, 3); 747 String option1 = nf.format(newValue); 748 String option2 = sf.format(value); 749 return option1.length() <= option2.length() ? option1 : option2; 750 } 751 doAliases(Map<String, String> code_replacements)752 private void doAliases(Map<String, String> code_replacements) { 753 for (String code : code_replacements.keySet()) { 754 String newCode = code_replacements.get(code); 755 if (newCode.length() == 0) 756 newCode = "ZZ"; 757 Log.println(DATA_INDENT + "addAlias(" + "RegionCode." + code + ", \"" 758 + newCode + "\");"); 759 } 760 } 761 showGeneratedCommentEnd(String indent)762 private void showGeneratedCommentEnd(String indent) { 763 Log.println(indent + "/* End of generated code. */"); 764 } 765 showGeneratedCommentStart(String indent)766 private void showGeneratedCommentStart(String indent) { 767 Log.println(indent + "/*"); 768 Log.println(indent 769 + " * The following information is generated from a tool,"); 770 Log.println(indent + " * as described on"); 771 Log.println(indent + " * http://wiki/Main/InternationalIdentifierUpdates."); 772 Log.println(indent + " * Do not edit manually."); 773 Log.println(indent + " * Start of generated code."); 774 Log.println(indent + " */"); 775 } 776 777 public final static class LengthFirstComparator implements Comparator<Object> { compare(Object a, Object b)778 public int compare(Object a, Object b) { 779 String as = a.toString(); 780 String bs = b.toString(); 781 if (as.length() < bs.length()) 782 return -1; 783 if (as.length() > bs.length()) 784 return 1; 785 return as.compareTo(bs); 786 } 787 } 788 789 public final class DeprecatedAndLengthFirstComparator implements Comparator<Object> { 790 String type; 791 DeprecatedAndLengthFirstComparator(String type)792 DeprecatedAndLengthFirstComparator(String type) { 793 this.type = type; 794 } 795 compare(Object a, Object b)796 public int compare(Object a, Object b) { 797 String as = a.toString(); 798 String bs = b.toString(); 799 String ar = getDeprecatedReplacement(type, as); 800 String br = getDeprecatedReplacement(type, bs); 801 // put the deprecated ones first, eg those that aren't null 802 if (ar != null) { 803 if (br == null) 804 return -1; 805 } 806 if (br != null) { 807 if (ar == null) 808 return 1; 809 } 810 // now check the length 811 if (as.length() < bs.length()) 812 return -1; 813 if (as.length() > bs.length()) 814 return 1; 815 return as.compareTo(bs); 816 } 817 } 818 819 /** 820 * Returns null if not deprecated, otherwise "" if there is no replacement, 821 * otherwise the replacement. 822 * 823 * @return 824 */ getDeprecatedReplacement(String type, String cldrTypeValue)825 public String getDeprecatedReplacement(String type, String cldrTypeValue) { 826 if (type.equals("currency")) { 827 return null; 828 } 829 String path = supplementalMetadata.getFullXPath( 830 "//supplementalData/metadata/alias/" + type + "Alias[@type=\"" 831 + cldrTypeValue + "\"]", 832 true); 833 if (path == null) 834 return null; 835 String replacement = new XPathParts().set(path).findAttributeValue( 836 "territoryAlias", "replacement"); 837 if (replacement == null) 838 return ""; 839 return replacement; 840 } 841 842 static Transliterator doFallbacks = Transliterator.createFromRules("id", 843 "[’ʻ] > ''; ", Transliterator.FORWARD); 844 printRow(PrintWriter out, String codeName, String englishName, String type, Map<String, String> code_replacements, int lineLength)845 private void printRow(PrintWriter out, String codeName, String englishName, 846 String type, Map<String, String> code_replacements, int lineLength) { 847 // int numeric = Integer.parseInt((String) enum_UN.get(codeName)); 848 // String alpha3 = (String) enum_alpha3.get(codeName); 849 String cldrName = codeName.length() < 5 ? codeName : codeName.substring(2); // fix 850 // UN 851 // name 852 String replacement = getDeprecatedReplacement(type, cldrName); 853 854 String resolvedEnglishName = englishName != null ? englishName : type 855 .equals("territory") ? getEnglishName(codeName) : type 856 .equals("currency") ? getName(codeName) : english.getName(CLDRFile.SCRIPT_NAME, codeName); 857 resolvedEnglishName = doFallbacks.transliterate(resolvedEnglishName); 858 859 String prefix = CODE_INDENT + "/** " + resolvedEnglishName; // + " - " + 860 // threeDigit.format(numeric); 861 String printedCodeName = codeName; 862 if (replacement != null) { 863 code_replacements.put(codeName, replacement); 864 out.println(prefix); 865 prefix = CODE_INDENT + " * @deprecated" 866 + (replacement.length() == 0 ? "" : " see " + replacement); 867 printedCodeName = "@Deprecated " + printedCodeName; 868 } 869 prefix += " */"; 870 871 if (codeName.equals("UN001")) { 872 out.println(); 873 } 874 if (prefix.length() > lineLength - (printedCodeName.length() + 1)) { 875 // break at last space 876 int lastFit = prefix.lastIndexOf(' ', lineLength 877 - (printedCodeName.length() + 1) - 2); 878 out.println(prefix.substring(0, lastFit)); 879 prefix = CODE_INDENT + " *" + prefix.substring(lastFit); 880 } 881 out.print(prefix); 882 out.print(Utility.repeat(" ", (lineLength 883 - (prefix.length() + printedCodeName.length() + 1)))); 884 out.println(printedCodeName + ","); 885 } 886 getEnglishName(String codeName)887 private String getEnglishName(String codeName) { 888 if (codeName.length() > 3) 889 codeName = codeName.substring(2); // fix UN name 890 String name = (String) extraNames.get(codeName); 891 if (name != null) 892 return name; 893 name = english.getName(CLDRFile.TERRITORY_NAME, codeName); 894 if (name != null) 895 return name; 896 return codeName; 897 } 898 getRFC3066Name(String codeName)899 private String getRFC3066Name(String codeName) { 900 if (codeName.length() > 2) 901 codeName = codeName.substring(2); // fix UN name 902 List<String> list = sc.getFullData("territory", codeName); 903 if (list == null) 904 return null; 905 return (String) list.get(0); 906 } 907 enumName(String codeName)908 private String enumName(String codeName) { 909 return codeName.charAt(0) < 'A' ? "UN" + codeName : codeName; 910 } 911 quote(Object input)912 static String quote(Object input) { 913 if (input != null) 914 return '"' + input.toString().trim() + '"'; 915 return null; 916 } 917 isPrivateUseRegion(String codeName)918 static boolean isPrivateUseRegion(String codeName) { 919 // AA, QM..QZ, XA..XZ, ZZ - CLDR codes 920 if (codeName.equals("EU") || codeName.equals("QO") || codeName.equals("ZZ")) { 921 return false; 922 } else if (codeName.equals("AA") || codeName.equals("ZZ")) { 923 return true; 924 } else if (codeName.compareTo("QM") >= 0 && codeName.compareTo("QZ") <= 0) { 925 return true; 926 } else if (codeName.compareTo("XA") >= 0 && codeName.compareTo("XZ") <= 0) { 927 return true; 928 } 929 return false; 930 } 931 /* 932 * <reset before="tertiary">ウ</reset> <x><context>ウ</context><t>ヽ</t></x> 933 * <x><context>ウ</context><i>ヽ</i></x> 934 * 935 * <x><context>う</context><i>ゝ</i></x> <x><context>ゥ</context><i>ヽ</i></x> 936 * <x><context>ゥ</context><i>ヽ</i></x> <x><context>ぅ</context><i>ゝ</i></x> 937 * <x><context>ヴ</context><i>ヽ</i></x> 938 * 939 * <x><context>ゔ</context><i>ゝ</i></x> <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> 940 * <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> <x><context>う</context><i>ゞ</i><extend>゙</extend></x> 941 * 942 * <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> 943 * <x><context>ぅ</context><i>ゞ</i><extend>゙</extend></x> <x><context>ヴ</context><i>ヾ</i><extend>゙</extend></x> 944 * 945 * <x><context>ゔ</context><i>ゞ</i><extend>゙</extend></x> 946 */ 947 }