1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004-2011, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import com.google.common.base.Joiner; 10 import com.google.common.collect.ImmutableMap; 11 import com.google.common.collect.Multimap; 12 import com.google.common.collect.Multimaps; 13 import com.google.common.collect.Sets; 14 import com.google.common.collect.TreeMultimap; 15 import com.ibm.icu.impl.Relation; 16 import com.ibm.icu.impl.Row.R2; 17 import com.ibm.icu.impl.Row.R4; 18 import com.ibm.icu.lang.UCharacter; 19 import com.ibm.icu.text.Collator; 20 import com.ibm.icu.text.Normalizer; 21 import com.ibm.icu.text.Normalizer2; 22 import com.ibm.icu.text.NumberFormat; 23 import com.ibm.icu.text.UTF16; 24 import com.ibm.icu.text.UnicodeSet; 25 import com.ibm.icu.util.ICUUncheckedIOException; 26 import com.ibm.icu.util.ULocale; 27 import java.io.IOException; 28 import java.io.PrintWriter; 29 import java.io.StringWriter; 30 import java.io.UnsupportedEncodingException; 31 import java.util.ArrayList; 32 import java.util.Arrays; 33 import java.util.Collection; 34 import java.util.Collections; 35 import java.util.Comparator; 36 import java.util.Date; 37 import java.util.EnumSet; 38 import java.util.HashMap; 39 import java.util.HashSet; 40 import java.util.Iterator; 41 import java.util.LinkedHashSet; 42 import java.util.List; 43 import java.util.Locale; 44 import java.util.Map; 45 import java.util.Map.Entry; 46 import java.util.Set; 47 import java.util.SortedMap; 48 import java.util.TreeMap; 49 import java.util.TreeSet; 50 import org.unicode.cldr.draft.FileUtilities; 51 import org.unicode.cldr.draft.ScriptMetadata; 52 import org.unicode.cldr.draft.ScriptMetadata.Info; 53 import org.unicode.cldr.util.ArrayComparator; 54 import org.unicode.cldr.util.CLDRConfig; 55 import org.unicode.cldr.util.CLDRFile; 56 import org.unicode.cldr.util.CLDRFile.WinningChoice; 57 import org.unicode.cldr.util.CLDRLocale; 58 import org.unicode.cldr.util.CLDRPaths; 59 import org.unicode.cldr.util.CLDRTool; 60 import org.unicode.cldr.util.CLDRURLS; 61 import org.unicode.cldr.util.CldrUtility; 62 import org.unicode.cldr.util.Factory; 63 import org.unicode.cldr.util.FileCopier; 64 import org.unicode.cldr.util.Iso639Data; 65 import org.unicode.cldr.util.Iso639Data.Scope; 66 import org.unicode.cldr.util.Iso639Data.Type; 67 import org.unicode.cldr.util.LanguageTagParser; 68 import org.unicode.cldr.util.Level; 69 import org.unicode.cldr.util.Log; 70 import org.unicode.cldr.util.Organization; 71 import org.unicode.cldr.util.StandardCodes; 72 import org.unicode.cldr.util.StandardCodes.CodeType; 73 import org.unicode.cldr.util.SupplementalDataInfo; 74 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 75 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 76 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 77 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 78 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 79 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 80 import org.unicode.cldr.util.TransliteratorUtilities; 81 import org.unicode.cldr.util.XPathParts; 82 83 @CLDRTool(alias = "showlanguages", description = "Generate Language info charts") 84 public class ShowLanguages { 85 private static final boolean SHOW_NATIVE = true; 86 87 static Comparator col = 88 new org.unicode.cldr.util.MultiComparator( 89 Collator.getInstance(new ULocale("en")), 90 new UTF16.StringComparator(true, false, 0)); 91 92 static StandardCodes sc = StandardCodes.make(); 93 94 static Factory cldrFactory = 95 CLDRConfig.getInstance().getCldrFactory(); // .make(CLDRPaths.MAIN_DIRECTORY, ".*"); 96 static CLDRFile english = CLDRConfig.getInstance().getEnglish(); 97 main(String[] args)98 public static void main(String[] args) throws IOException { 99 System.out.println("Writing into " + FormattedFileWriter.CHART_TARGET_DIR); 100 FileCopier.ensureDirectoryExists(FormattedFileWriter.CHART_TARGET_DIR); 101 FileCopier.copy(ShowLanguages.class, "index.css", FormattedFileWriter.CHART_TARGET_DIR); 102 FormattedFileWriter.copyIncludeHtmls(FormattedFileWriter.CHART_TARGET_DIR); 103 104 StringWriter sw = printLanguageData(cldrFactory, "index.html"); 105 writeSupplementalIndex("index.html", sw); 106 107 // cldrFactory = Factory.make(Utility.COMMON_DIRECTORY + "../dropbox/extra2/", ".*"); 108 // printLanguageData(cldrFactory, "language_info2.txt"); 109 System.out.println("Done - wrote into " + FormattedFileWriter.CHART_TARGET_DIR); 110 } 111 112 /** */ 113 public static FormattedFileWriter.Anchors SUPPLEMENTAL_INDEX_ANCHORS = 114 new FormattedFileWriter.Anchors(); 115 116 static SupplementalDataInfo supplementalDataInfo = 117 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 118 printLanguageData(Factory cldrFactory, String filename)119 private static StringWriter printLanguageData(Factory cldrFactory, String filename) 120 throws IOException { 121 StringWriter sw = new StringWriter(); 122 PrintWriter pw = new PrintWriter(sw); 123 124 LanguageInfo linfo = new LanguageInfo(cldrFactory); 125 linfo.showCoverageGoals(pw); 126 127 new ChartDtdDelta().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 128 ShowLocaleCoverage.showCoverage(SUPPLEMENTAL_INDEX_ANCHORS, null); 129 130 new ChartDayPeriods().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 131 new ChartLanguageMatching().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 132 new ChartLanguageGroups().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 133 new ChartSubdivisions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 134 if (ToolConstants.CHART_VERSION.compareTo("37") >= 0) { 135 new ChartUnitConversions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 136 new ChartUnitPreferences().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 137 } 138 // since we don't want these listed on the supplemental page, use null 139 140 new ShowPlurals().printPlurals(english, null, pw, cldrFactory); 141 142 linfo.printLikelySubtags(pw); 143 144 linfo.showCountryLanguageInfo(pw); 145 146 linfo.showLanguageCountryInfo(pw); 147 148 // linfo.showTerritoryInfo(); 149 // linfo.printCountryData(pw); 150 151 // linfo.printDeprecatedItems(pw); 152 153 // PrintWriter pw1 = new PrintWriter(new FormattedFileWriter(pw, "Languages and 154 // Territories", null)); 155 // pw1.println("<tr><th>Language \u2192 Territories"); 156 // pw1.println("</th><th>Territory \u2192 Language"); 157 // pw1.println("</th><th>Territories Not Represented"); 158 // pw1.println("</th><th>Languages Not Represented"); 159 // pw1.println("</th></tr>"); 160 // 161 // pw1.println("<tr><td>"); 162 // linfo.print(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 163 // pw1.println("</td><td>"); 164 // linfo.print(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.LANGUAGE_NAME); 165 // pw1.println("</td><td>"); 166 // linfo.printMissing(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.TERRITORY_NAME); 167 // pw1.println("</td><td>"); 168 // linfo.printMissing(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 169 // pw1.println("</td></tr>"); 170 // 171 // pw1.close(); 172 173 printLanguageScript(linfo, pw); 174 printScriptLanguageTerritory(linfo, pw); 175 176 linfo.showCorrespondances(); 177 178 // linfo.showCalendarData(pw); 179 180 linfo.showCountryInfo(pw); 181 linfo.printCurrency(pw); 182 linfo.printContains(pw); 183 184 linfo.printWindows_Tzid(pw); 185 linfo.printAliases(pw); 186 187 linfo.printCharacters(pw); 188 189 pw.close(); 190 191 return sw; 192 } 193 writeSupplementalIndex(String filename, StringWriter sw)194 private static void writeSupplementalIndex(String filename, StringWriter sw) 195 throws IOException { 196 String[] replacements = { 197 "%date%", CldrUtility.isoFormatDateOnly(new Date()), 198 "%contents%", SUPPLEMENTAL_INDEX_ANCHORS.toString(), 199 "%data%", sw.toString(), 200 "%index%", "../index.html" 201 }; 202 PrintWriter pw2 = 203 org.unicode.cldr.draft.FileUtilities.openUTF8Writer( 204 FormattedFileWriter.CHART_TARGET_DIR, filename); 205 FileUtilities.appendFile(ShowLanguages.class, "supplemental.html", replacements, pw2); 206 pw2.close(); 207 } 208 printLanguageScript(LanguageInfo linfo, PrintWriter pw)209 private static void printLanguageScript(LanguageInfo linfo, PrintWriter pw) throws IOException { 210 PrintWriter pw1; 211 TablePrinter tablePrinter = 212 new TablePrinter() 213 .addColumn("Language", "class='source'", null, "class='source'", true) 214 .setSpanRows(true) 215 .setSortPriority(0) 216 .setBreakSpans(true) 217 .addColumn( 218 "Code", 219 "class='source'", 220 CldrUtility.getDoubleLinkMsg(), 221 "class='source'", 222 true) 223 .setSpanRows(true) 224 .addColumn( 225 "ML", 226 "class='target' title='modern language'", 227 null, 228 "class='target'", 229 true) 230 .setSpanRows(true) 231 .setSortPriority(1) 232 .addColumn( 233 "P", "class='target' title='primary'", null, "class='target'", true) 234 .setSortPriority(3) 235 .addColumn("Script", "class='target'", null, "class='target'", true) 236 .setSortPriority(3) 237 .addColumn("Code", "class='target'", null, "class='target'", true) 238 .addColumn( 239 "MS", 240 "class='target' title='modern script'", 241 null, 242 "class='target'", 243 true) 244 .setSortPriority(2); 245 246 TablePrinter tablePrinter2 = 247 new TablePrinter() 248 .addColumn("Script", "class='source'", null, "class='source'", true) 249 .setSpanRows(true) 250 .setSortPriority(0) 251 .setBreakSpans(true) 252 .addColumn( 253 "Code", 254 "class='source'", 255 CldrUtility.getDoubleLinkMsg(), 256 "class='source'", 257 true) 258 .setSpanRows(true) 259 .addColumn( 260 "MS", 261 "class='target' title='modern script'", 262 null, 263 "class='target'", 264 true) 265 .setSpanRows(true) 266 .setSortPriority(1) 267 .addColumn("Language", "class='target'", null, "class='target'", true) 268 .setSortPriority(3) 269 .addColumn("Code", "class='target'", null, "class='target'", true) 270 .addColumn( 271 "ML", 272 "class='target' title='modern language'", 273 null, 274 "class='target'", 275 true) 276 .setSortPriority(2) 277 .addColumn( 278 "P", "class='target' title='primary'", null, "class='target'", true) 279 .setSortPriority(3); 280 281 // get the codes so we can show the remainder 282 Set<String> remainingScripts = 283 new TreeSet<>(getScriptsToShow()); // StandardCodes.MODERN_SCRIPTS); 284 UnicodeSet temp = new UnicodeSet(); 285 for (String script : getScriptsToShow()) { 286 temp.clear(); 287 try { 288 temp.applyPropertyAlias("script", script); 289 } catch (RuntimeException e) { 290 } // fall through 291 if (temp.size() == 0) { 292 remainingScripts.remove(script); 293 System.out.println("Removing: " + script); 294 } else { 295 System.out.println("Keeping: " + script); 296 } 297 } 298 remainingScripts.remove("Brai"); 299 remainingScripts.remove("Hira"); 300 remainingScripts.remove("Qaai"); 301 remainingScripts.remove("Hrkt"); 302 remainingScripts.remove("Zzzz"); 303 remainingScripts.remove("Zyyy"); 304 305 Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow()); 306 for (String language : getLanguagesToShow()) { 307 Scope s = Iso639Data.getScope(language); 308 Type t = Iso639Data.getType(language); 309 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 310 remainingLanguages.remove(language); 311 } 312 } 313 314 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 315 for (String language : languages) { 316 Set<BasicLanguageData> basicLanguageData = 317 supplementalDataInfo.getBasicLanguageData(language); 318 for (BasicLanguageData basicData : basicLanguageData) { 319 String secondary = 320 isOfficial(language) // basicData.getType() == 321 // BasicLanguageData.Type.primary 322 ? "\u00A0" 323 : "N"; 324 for (String script : basicData.getScripts()) { 325 addLanguageScriptCells( 326 tablePrinter, tablePrinter2, language, script, secondary); 327 remainingScripts.remove(script); 328 remainingLanguages.remove(language); 329 } 330 } 331 } 332 for (String language : remainingLanguages) { 333 addLanguageScriptCells(tablePrinter, tablePrinter2, language, "Zzzz", "?"); 334 } 335 for (String script : remainingScripts) { 336 addLanguageScriptCells(tablePrinter, tablePrinter2, "und", script, "?"); 337 } 338 339 pw1 = 340 new PrintWriter( 341 new FormattedFileWriter( 342 null, "Languages and Scripts", null, SUPPLEMENTAL_INDEX_ANCHORS)); 343 pw1.println(tablePrinter.toTable()); 344 pw1.close(); 345 346 pw1 = 347 new PrintWriter( 348 new FormattedFileWriter( 349 null, "Scripts and Languages", null, SUPPLEMENTAL_INDEX_ANCHORS)); 350 pw1.println(tablePrinter2.toTable()); 351 pw1.close(); 352 } 353 354 static final Map<String, OfficialStatus> languageToBestStatus = new HashMap<>(); 355 356 static { 357 for (String language : supplementalDataInfo.getLanguagesForTerritoriesPopulationData()) { 358 Set<String> territories = 359 supplementalDataInfo.getTerritoriesForPopulationData(language); 360 if (territories == null) { 361 continue; 362 } 363 int underbar = language.indexOf('_'); 364 String base = underbar < 0 ? null : language.substring(0, underbar); 365 366 for (String territory : territories) { 367 PopulationData data = 368 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 369 language, territory); 370 OfficialStatus status = data.getOfficialStatus(); 371 OfficialStatus old; 372 old = languageToBestStatus.get(language); 373 if (old == null || status.compareTo(old) > 0) { 374 languageToBestStatus.put(language, status); 375 } 376 if (base != null) { 377 old = languageToBestStatus.get(base); 378 if (old == null || status.compareTo(old) > 0) { 379 languageToBestStatus.put(base, status); 380 } 381 } 382 } 383 } 384 } 385 386 private static boolean isOfficial(String language) { 387 OfficialStatus status = languageToBestStatus.get(language); 388 if (status != null && status.isMajor()) { 389 return true; 390 } 391 int underbar = language.indexOf('_'); 392 if (underbar < 0) { 393 return false; 394 } 395 return isOfficial(language.substring(0, underbar)); 396 } 397 398 private static Set<String> getLanguagesToShow() { 399 return getEnglishTypes("language", CLDRFile.LANGUAGE_NAME); 400 } 401 402 private static Set<String> getEnglishTypes(String type, int code) { 403 Set<String> result = new HashSet<>(sc.getSurveyToolDisplayCodes(type)); 404 for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext(); ) { 405 XPathParts parts = XPathParts.getFrozenInstance(it.next()); 406 String newType = parts.getAttributeValue(-1, "type"); 407 if (!result.contains(newType)) { 408 result.add(newType); 409 } 410 } 411 return result; 412 } 413 414 private static Set<String> getScriptsToShow() { 415 return getEnglishTypes("script", CLDRFile.SCRIPT_NAME); 416 } 417 418 private static void printScriptLanguageTerritory(LanguageInfo linfo, PrintWriter pw) 419 throws IOException { 420 PrintWriter pw1; 421 TablePrinter tablePrinter2 = 422 new TablePrinter() 423 .addColumn( 424 "Sample Char", 425 "class='source'", 426 null, 427 "class='source sample'", 428 true) 429 .setSpanRows(true) 430 .addColumn("Script", "class='source'", null, "class='source'", true) 431 .setSpanRows(true) 432 .setSortPriority(0) 433 .setBreakSpans(true) 434 .addColumn( 435 "Code", 436 "class='source'", 437 CldrUtility.getDoubleLinkMsg(), 438 "class='source'", 439 true) 440 .setSpanRows(true) 441 .addColumn("T", "class='target'", null, "class='target'", true) 442 .setSortPriority(1) 443 .addColumn("Language", "class='target'", null, "class='target'", true) 444 .setSortPriority(2) 445 .addColumn("Native", "class='target'", null, "class='target'", true) 446 .addColumn("Code", "class='target'", null, "class='target'", true) 447 .addColumn("T", "class='target'", null, "class='target'", true) 448 .setSortPriority(3) 449 .addColumn("Territory", "class='target'", null, "class='target'", true) 450 .setSortPriority(4) 451 .addColumn("Native", "class='target'", null, "class='target'", true) 452 .addColumn("Code", "class='target'", null, "class='target'", true); 453 454 // get the codes so we can show the remainder 455 Set<String> remainingScripts = new TreeSet<>(getScriptsToShow()); 456 Set<String> remainingTerritories = new TreeSet<>(sc.getGoodAvailableCodes("territory")); 457 UnicodeSet temp = new UnicodeSet(); 458 for (String script : getScriptsToShow()) { 459 temp.clear(); 460 try { 461 temp.applyPropertyAlias("script", script); 462 } catch (RuntimeException e) { 463 } // fall through 464 if (temp.size() == 0) { 465 remainingScripts.remove(script); 466 System.out.println("Removing: " + script); 467 } else { 468 System.out.println("Keeping: " + script); 469 } 470 } 471 remainingScripts.remove("Brai"); 472 remainingScripts.remove("Hira"); 473 remainingScripts.remove("Qaai"); 474 remainingScripts.remove("Hrkt"); 475 remainingScripts.remove("Zzzz"); 476 remainingScripts.remove("Zyyy"); 477 478 Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow()); 479 for (String language : getLanguagesToShow()) { 480 Scope s = Iso639Data.getScope(language); 481 Type t = Iso639Data.getType(language); 482 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 483 remainingLanguages.remove(language); 484 } 485 } 486 487 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 488 for (String language : languages) { 489 Set<BasicLanguageData> basicLanguageData = 490 supplementalDataInfo.getBasicLanguageData(language); 491 for (BasicLanguageData basicData : basicLanguageData) { 492 if (basicData.getType() != BasicLanguageData.Type.primary) { 493 continue; 494 } 495 Set<String> mainTerritories = getTerritories(language); 496 if (mainTerritories.size() == 0) { 497 continue; 498 // mainTerritories.add("ZZ"); 499 } 500 501 TreeSet<String> mainScripts = new TreeSet<>(basicData.getScripts()); 502 if (mainScripts.size() == 0) { 503 continue; 504 } 505 for (String script : mainScripts) { 506 for (String territory : mainTerritories) { 507 addLanguageScriptCells2(tablePrinter2, language, script, territory); 508 remainingTerritories.remove(territory); 509 } 510 remainingScripts.remove(script); 511 } 512 } 513 remainingLanguages.remove(language); 514 } 515 // for (String language : remainingLanguages) { 516 // addLanguageScriptCells2( tablePrinter2, language, "Zzzz", "ZZ"); 517 // } 518 // for (String script : remainingScripts) { 519 // addLanguageScriptCells2( tablePrinter2, "und", script, "ZZ"); 520 // } 521 // for (String territory : remainingTerritories) { 522 // addLanguageScriptCells2( tablePrinter2, "und", "Zzzz", territory); 523 // } 524 525 pw1 = 526 new PrintWriter( 527 new FormattedFileWriter( 528 null, 529 "Scripts, Languages, and Territories", 530 null, 531 SUPPLEMENTAL_INDEX_ANCHORS)); 532 pw1.println(tablePrinter2.toTable()); 533 pw1.close(); 534 } 535 536 private static Relation<String, String> territoryFix; 537 getTerritories(String language)538 private static Set<String> getTerritories(String language) { 539 if (territoryFix == null) { // set up the data 540 initTerritoryFix(); 541 } 542 Set<String> territories = territoryFix.getAll(language); 543 if (territories == null) { 544 territories = new TreeSet<>(); 545 } 546 return territories; 547 } 548 initTerritoryFix()549 private static void initTerritoryFix() { 550 territoryFix = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 551 Set<String> languages = supplementalDataInfo.getLanguages(); 552 LanguageTagParser ltp = new LanguageTagParser(); 553 for (String language2 : languages) { 554 if (language2.contains("_")) { 555 ltp.set(language2).getLanguage(); 556 addOfficialTerritory(ltp, language2, ltp.getLanguage()); 557 } else { 558 addOfficialTerritory(ltp, language2, language2); 559 } 560 } 561 } 562 addOfficialTerritory( LanguageTagParser ltp, String language, String baseLanguage)563 private static void addOfficialTerritory( 564 LanguageTagParser ltp, String language, String baseLanguage) { 565 // territoryFix.putAll(baseLanguage, 566 // supplementalDataInfo.getTerritoriesForPopulationData(language)); 567 Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language); 568 if (territories == null) { 569 return; 570 } 571 for (String territory : territories) { 572 PopulationData data = 573 supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory); 574 OfficialStatus status = data.getOfficialStatus(); 575 if (status.isMajor()) { 576 territoryFix.put(baseLanguage, territory); 577 System.out.println( 578 "\tAdding\t" + baseLanguage + "\t" + territory + "\t" + language); 579 } 580 } 581 } 582 addLanguageScriptCells2( TablePrinter tablePrinter2, String language, String script, String territory)583 private static void addLanguageScriptCells2( 584 TablePrinter tablePrinter2, String language, String script, String territory) { 585 CLDRFile nativeLanguage = null; 586 if (SHOW_NATIVE) { 587 try { 588 nativeLanguage = cldrFactory.make(language + "_" + script + "_" + territory, true); 589 } catch (RuntimeException e) { 590 try { 591 nativeLanguage = cldrFactory.make(language + "_" + script, true); 592 } catch (RuntimeException e2) { 593 try { 594 nativeLanguage = cldrFactory.make(language, true); 595 } catch (RuntimeException e3) { 596 } 597 } 598 } 599 // check for overlap 600 if (nativeLanguage != null 601 && !script.equals("Jpan") 602 && !script.equals("Hans") 603 && !script.equals("Hant")) { 604 UnicodeSet scriptSet; 605 try { 606 String tempScript = script.equals("Kore") ? "Hang" : script; 607 scriptSet = new UnicodeSet("[:script=" + tempScript + ":]"); 608 } catch (RuntimeException e) { 609 scriptSet = new UnicodeSet(); 610 } 611 UnicodeSet exemplars = nativeLanguage.getExemplarSet("", WinningChoice.WINNING); 612 if (scriptSet.containsNone(exemplars)) { 613 System.out.println( 614 "Skipping CLDR file -- exemplars differ: " 615 + language 616 + "\t" 617 + nativeLanguage.getLocaleID() 618 + "\t" 619 + scriptSet 620 + "\t" 621 + exemplars); 622 nativeLanguage = null; 623 } 624 } 625 } 626 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 627 if (languageName == null) languageName = "???"; 628 String isLanguageTranslated = ""; 629 String nativeLanguageName = 630 nativeLanguage == null 631 ? null 632 : nativeLanguage.getName(CLDRFile.LANGUAGE_NAME, language); 633 if (nativeLanguageName == null || nativeLanguageName.equals(language)) { 634 nativeLanguageName = "<i>n/a</i>"; 635 isLanguageTranslated = "n"; 636 } 637 638 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 639 // String nativeScriptName = nativeLanguage == null ? null : 640 // nativeLanguage.getName(CLDRFile.SCRIPT_NAME,script); 641 // if (nativeScriptName != null && !nativeScriptName.equals(script)) { 642 // scriptName = nativeScriptName + "[" + scriptName + "]"; 643 // } 644 645 String isTerritoryTranslated = ""; 646 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territory); 647 String nativeTerritoryName = 648 nativeLanguage == null 649 ? null 650 : nativeLanguage.getName(CLDRFile.TERRITORY_NAME, territory); 651 if (nativeTerritoryName == null || nativeTerritoryName.equals(territory)) { 652 nativeTerritoryName = "<i>n/a</i>"; 653 isTerritoryTranslated = "n"; 654 } 655 656 // Type t = Iso639Data.getType(language); 657 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == 658 // Type.Living) { 659 // // ok 660 // } else if (!language.equals("und")){ 661 // scriptModern = "N"; 662 // } 663 // String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : 664 // ""; 665 666 Info scriptMetatdata = ScriptMetadata.getInfo(script); 667 tablePrinter2 668 .addRow() 669 .addCell(scriptMetatdata.sampleChar) 670 .addCell(scriptName) 671 .addCell(script) 672 .addCell(isLanguageTranslated) 673 .addCell(languageName) 674 .addCell(nativeLanguageName) 675 .addCell(language) 676 .addCell(isTerritoryTranslated) 677 .addCell(territoryName) 678 .addCell(nativeTerritoryName) 679 .addCell(territory) 680 .finishRow(); 681 } 682 683 static ImmutableMap<String, String> fixScriptGif = 684 ImmutableMap.<String, String>builder() 685 .put("hangul", "hangulsyllables") 686 .put("japanese", "hiragana") 687 .put("unknown or invalid script", "unknown") 688 .put("Hant", "Hant") 689 .put("Hans", "Hans") 690 .build(); 691 getGifName(String script)692 private static String getGifName(String script) { 693 String temp = fixScriptGif.get(script); 694 if (temp != null) { 695 return temp; 696 } 697 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 698 scriptName = scriptName.toLowerCase(Locale.ENGLISH); 699 temp = fixScriptGif.get(scriptName); 700 if (temp != null) { 701 return temp; 702 } 703 return scriptName; 704 } 705 706 private static Set<Type> oldLanguage = 707 Collections.unmodifiableSet( 708 EnumSet.of(Type.Ancient, Type.Extinct, Type.Historical, Type.Constructed)); 709 addLanguageScriptCells( TablePrinter tablePrinter, TablePrinter tablePrinter2, String language, String script, String secondary)710 private static void addLanguageScriptCells( 711 TablePrinter tablePrinter, 712 TablePrinter tablePrinter2, 713 String language, 714 String script, 715 String secondary) { 716 try { 717 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 718 if (languageName == null) { 719 languageName = "¿" + language + "?"; 720 System.err.println("No English Language Name for:" + language); 721 } 722 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 723 if (scriptName == null) { 724 scriptName = "¿" + script + "?"; 725 System.err.println("No English Language Name for:" + script); 726 } 727 String scriptModern = 728 StandardCodes.isScriptModern(script) ? "" : script.equals("Zzzz") ? "n/a" : "N"; 729 // Scope s = Iso639Data.getScope(language); 730 Type t = Iso639Data.getType(language); 731 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t 732 // == Type.Living) { 733 // // ok 734 // } else if (!language.equals("und")){ 735 // scriptModern = "N"; 736 // } 737 String languageModern = 738 oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : ""; 739 740 tablePrinter 741 .addRow() 742 .addCell(languageName) 743 .addCell(language) 744 .addCell(languageModern) 745 .addCell(secondary) 746 .addCell(scriptName) 747 .addCell(script) 748 .addCell(scriptModern) 749 .finishRow(); 750 751 tablePrinter2 752 .addRow() 753 .addCell(scriptName) 754 .addCell(script) 755 .addCell(scriptModern) 756 .addCell(languageName) 757 .addCell(language) 758 .addCell(languageModern) 759 .addCell(secondary) 760 .finishRow(); 761 } catch (RuntimeException e) { 762 throw e; 763 } 764 } 765 766 // TODO This is old code that read supplemental data. Should be replaced by using 767 // SupplementalDataInfo. 768 // https://unicode-org.atlassian.net/browse/CLDR-15673 769 770 static class LanguageInfo { 771 private static final Map<String, Map<String, String>> localeAliasInfo = new TreeMap<>(); 772 773 Multimap<String, String> language_scripts = TreeMultimap.create(); 774 775 Multimap<String, String> language_territories = TreeMultimap.create(); 776 777 List<Map<String, String>> deprecatedItems = new ArrayList<>(); 778 779 Multimap<String, String> territory_languages; 780 781 Multimap<String, String> script_languages; 782 783 // Map group_contains = new TreeMap(); 784 785 Set<String[]> aliases = 786 new TreeSet<String[]>( 787 new ArrayComparator(new Comparator[] {new UTF16.StringComparator(), col})); 788 789 Comparator col3 = new ArrayComparator(new Comparator[] {col, col, col}); 790 791 Map<String, String> currency_fractions = new TreeMap<String, String>(col); 792 793 Map<String, Set> currency_territory = new TreeMap<String, Set>(col); 794 795 Map<String, Set> territory_currency = new TreeMap<String, Set>(col); 796 797 Set<String> territoriesWithCurrencies = new TreeSet<>(); 798 799 Set<String> currenciesWithTerritories = new TreeSet<>(); 800 801 Map<String, Map<String, Set<String>>> territoryData = new TreeMap<>(); 802 803 Set<String> territoryTypes = new TreeSet<>(); 804 805 Map<String, LinkedHashSet<String>> charSubstitutions = 806 new TreeMap<String, LinkedHashSet<String>>(col); 807 808 String defaultDigits = null; 809 810 Map<String, Map<String, Object>> territoryLanguageData = new TreeMap<>(); 811 812 private Relation<String, String> territoriesToModernCurrencies = 813 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null); 814 LanguageInfo(Factory cldrFactory)815 public LanguageInfo(Factory cldrFactory) throws IOException { 816 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 817 for (Iterator<String> it = supp.iterator(); it.hasNext(); ) { 818 String path = it.next(); 819 String fullPath = supp.getFullXPath(path); 820 if (fullPath == null) { 821 supp.getFullXPath(path); 822 } 823 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 824 825 // <zoneItem type="America/Adak" territory="US" aliases="America/Atka US/Aleutian"/> 826 if (path.indexOf("/zoneItem") >= 0) { 827 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 828 String type = attributes.get("type"); 829 // String territory = attributes.get("territory"); 830 String aliasAttributes = attributes.get("aliases"); 831 if (aliasAttributes != null) { 832 String[] aliasesList = aliasAttributes.split("\\s+"); 833 834 for (int i = 0; i < aliasesList.length; ++i) { 835 String alias = aliasesList[i]; 836 aliases.add(new String[] {"timezone", alias, type}); 837 } 838 } 839 // TODO territory, multizone 840 continue; 841 } 842 843 if (path.indexOf("/currencyData") >= 0) { 844 if (path.indexOf("/fractions") >= 0) { 845 // <info iso4217="ADP" digits="0" rounding="0"/> 846 String element = parts.getElement(parts.size() - 1); 847 if (!element.equals("info")) 848 throw new IllegalArgumentException( 849 "Unexpected fractions element: " + element); 850 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 851 String iso4217 = attributes.get("iso4217"); 852 String digits = attributes.get("digits"); 853 String rounding = attributes.get("rounding"); 854 digits = digits + (rounding.equals("0") ? "" : " (" + rounding + ")"); 855 if (iso4217.equals("DEFAULT")) defaultDigits = digits; 856 else 857 currency_fractions.put( 858 getName(CLDRFile.CURRENCY_NAME, iso4217, false), digits); 859 continue; 860 } 861 // <region iso3166="AR"> 862 // <currency iso4217="ARS" from="1992-01-01"/> 863 if (path.indexOf("/region") >= 0) { 864 Map<String, String> attributes = parts.getAttributes(parts.size() - 2); 865 String iso3166 = attributes.get("iso3166"); 866 attributes = parts.getAttributes(parts.size() - 1); 867 String iso4217 = attributes.get("iso4217"); 868 String to = attributes.get("to"); 869 if (to == null) to = "\u221E"; 870 String from = attributes.get("from"); 871 if (from == null) from = "-\u221E"; 872 String countryName = getName(CLDRFile.TERRITORY_NAME, iso3166, false); 873 String currencyName = getName(CLDRFile.CURRENCY_NAME, iso4217, false); 874 Set info = territory_currency.get(countryName); 875 if (info == null) 876 territory_currency.put(countryName, info = new TreeSet(col3)); 877 info.add(new String[] {from, to, currencyName}); 878 info = currency_territory.get(currencyName); 879 if (info == null) 880 currency_territory.put(currencyName, info = new TreeSet(col)); 881 territoriesWithCurrencies.add(iso3166); 882 currenciesWithTerritories.add(iso4217); 883 if (to.equals("\u221E") || to.compareTo("2006") > 0) { 884 territoriesToModernCurrencies.put(iso3166, iso4217); 885 info.add("<b>" + countryName + "</b>"); 886 887 } else { 888 info.add("<i>" + countryName + "</i>"); 889 } 890 continue; 891 } 892 } 893 894 if (path.indexOf("/languageData") >= 0) { 895 Map<String, String> attributes = parts.findAttributes("language"); 896 String language = attributes.get("type"); 897 String alt = attributes.get("alt"); 898 addTokens(language, attributes.get("scripts"), " ", language_scripts); 899 // mark the territories 900 if (alt == null) 901 ; // nothing 902 else if ("secondary".equals(alt)) language += "*"; 903 else language += "*" + alt; 904 // <language type="af" scripts="Latn" territories="ZA"/> 905 addTokens(language, attributes.get("territories"), " ", language_territories); 906 continue; 907 } 908 909 if (path.indexOf("/deprecatedItems") >= 0) { 910 deprecatedItems.add(parts.findAttributes("deprecatedItems")); 911 continue; 912 } 913 if (path.indexOf("/calendarData") >= 0) { 914 Map<String, String> attributes = parts.findAttributes("calendar"); 915 if (attributes == null) { 916 System.err.println( 917 "Err: on path " 918 + fullPath 919 + " , no attributes on 'calendar'. Probably, this tool is out of date."); 920 } else { 921 String type = attributes.get("type"); 922 String territories = attributes.get("territories"); 923 if (territories == null) { 924 System.err.println( 925 "Err: on path " 926 + fullPath 927 + ", missing territories. Probably, this tool is out of date."); 928 } else if (type == null) { 929 System.err.println( 930 "Err: on path " 931 + fullPath 932 + ", missing type. Probably, this tool is out of date."); 933 } else { 934 addTerritoryInfo(territories, "calendar", type); 935 } 936 } 937 } 938 if (path.indexOf("/weekData") >= 0 || path.indexOf("measurementData") >= 0) { 939 String element = parts.getElement(parts.size() - 1); 940 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 941 // later, make this a table 942 String key = "count"; 943 String display = "Days in week (min)"; 944 boolean useTerritory = true; 945 switch (element) { 946 case "firstDay": 947 key = "day"; 948 display = "First day of week"; 949 break; 950 case "weekendStart": 951 key = "day"; 952 display = "First day of weekend"; 953 break; 954 case "weekendEnd": 955 key = "day"; 956 display = "Last day of weekend"; 957 break; 958 case "measurementSystem": 959 // <measurementSystem type="metric" territories="001"/> 960 key = "type"; 961 display = "Meas. system"; 962 break; 963 case "paperSize": 964 key = "type"; 965 display = "Paper Size"; 966 break; 967 case "weekOfPreference": 968 useTerritory = false; 969 break; 970 } 971 if (useTerritory) { 972 String type = attributes.get(key); 973 String territories = attributes.get("territories"); 974 addTerritoryInfo(territories, display, type); 975 } 976 } 977 if (path.indexOf("/generation") >= 0 || path.indexOf("/version") >= 0) continue; 978 System.out.println("Skipped Element: " + path); 979 } 980 981 for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) { 982 for (String language : 983 supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( 984 territory)) { 985 language_territories.put(language, territory); 986 } 987 } 988 territory_languages = Multimaps.invertFrom(language_territories, TreeMultimap.create()); 989 script_languages = Multimaps.invertFrom(language_scripts, TreeMultimap.create()); 990 991 // now get some metadata 992 localeAliasInfo.put("language", new TreeMap<String, String>()); 993 localeAliasInfo.put("script", new TreeMap<String, String>()); 994 localeAliasInfo.put("territory", new TreeMap<String, String>()); 995 localeAliasInfo.put("variant", new TreeMap<String, String>()); 996 localeAliasInfo.put("zone", new TreeMap<String, String>()); 997 localeAliasInfo.put("subdivision", new TreeMap<String, String>()); 998 localeAliasInfo.put("unit", new TreeMap<String, String>()); 999 localeAliasInfo.put("usage", new TreeMap<String, String>()); 1000 1001 // localeAliasInfo.get("language").put("nb", "no"); 1002 localeAliasInfo.get("language").put("zh_CN", "zh_Hans_CN"); 1003 localeAliasInfo.get("language").put("zh_SG", "zh_Hans_SG"); 1004 localeAliasInfo.get("language").put("zh_TW", "zh_Hant_TW"); 1005 localeAliasInfo.get("language").put("zh_MO", "zh_Hant_MO"); 1006 localeAliasInfo.get("language").put("zh_HK", "zh_Hant_HK"); 1007 1008 // CLDRFile supp2 = cldrFactory.make(CLDRFile.SUPPLEMENTAL_METADATA, false); 1009 Map<String, Map<String, R2<List<String>, String>>> localeAliasInfo2 = 1010 supplementalDataInfo.getLocaleAliasInfo(); 1011 for (Entry<String, Map<String, R2<List<String>, String>>> entry1 : 1012 localeAliasInfo2.entrySet()) { 1013 String element = entry1.getKey(); 1014 for (Entry<String, R2<List<String>, String>> entry2 : 1015 entry1.getValue().entrySet()) { 1016 String type = entry2.getKey(); 1017 R2<List<String>, String> replacementReason = entry2.getValue(); 1018 List<String> replacementList = replacementReason.get0(); 1019 String replacement = 1020 replacementList == null ? null : Joiner.on(" ").join(replacementList); 1021 String reason = replacementReason.get1(); 1022 if (element.equals("timezone")) { 1023 element = "zone"; 1024 } 1025 try { 1026 localeAliasInfo 1027 .get(element) 1028 .put(type, replacement == null ? "?" : replacement); 1029 } catch (Exception e) { 1030 // TODO Auto-generated catch block 1031 throw new IllegalArgumentException( 1032 "Can't find alias data for '" + element + "'", e); 1033 } 1034 1035 String name = ""; 1036 if (replacement == null) { 1037 name = "(none)"; 1038 } else if (element.equals("language")) { 1039 name = getName(replacement, false); 1040 } else if (element.equals("zone")) { 1041 element = "timezone"; 1042 name = replacement + "*"; 1043 } else { 1044 int typeCode = CLDRFile.typeNameToCode(element); 1045 if (typeCode >= 0) { 1046 name = getName(typeCode, replacement, false); 1047 } else { 1048 name = "*" + replacement; 1049 } 1050 } 1051 if (element.equals("territory")) { 1052 territoryAliases.put(type, name); 1053 aliases.add( 1054 new String[] { 1055 element, 1056 getName(CLDRFile.TERRITORY_NAME, type, false), 1057 name, 1058 reason 1059 }); 1060 } else { 1061 aliases.add(new String[] {element, type, name, reason}); 1062 } 1063 continue; 1064 } 1065 } 1066 Log.setLog(CLDRPaths.CHART_DIRECTORY + "supplemental/", "characterLog.txt"); 1067 Log.close(); 1068 } 1069 printLikelySubtags(PrintWriter index)1070 public void printLikelySubtags(PrintWriter index) throws IOException { 1071 1072 PrintWriter pw = 1073 new PrintWriter( 1074 new FormattedFileWriter( 1075 null, "Likely Subtags", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1076 1077 TablePrinter tablePrinter = 1078 new TablePrinter() 1079 .addColumn( 1080 "Source Lang", "class='source'", null, "class='source'", true) 1081 .setSortPriority(1) 1082 .setSpanRows(false) 1083 .addColumn( 1084 "Source Script", "class='source'", null, "class='source'", true) 1085 .setSortPriority(0) 1086 .setSpanRows(false) 1087 .setBreakSpans(true) 1088 .addColumn( 1089 "Source Region", "class='source'", null, "class='source'", true) 1090 .setSortPriority(2) 1091 .setSpanRows(false) 1092 .addColumn( 1093 "Target Lang", "class='target'", null, "class='target'", true) 1094 .setSortPriority(3) 1095 .setBreakSpans(true) 1096 .addColumn( 1097 "Target Script", "class='target'", null, "class='target'", true) 1098 .setSortPriority(4) 1099 .addColumn( 1100 "Target Region", "class='target'", null, "class='target'", true) 1101 .setSortPriority(5) 1102 .addColumn( 1103 "Source ID", 1104 "class='source'", 1105 CldrUtility.getDoubleLinkMsg(), 1106 "class='source'", 1107 true) 1108 .addColumn("Target ID", "class='target'", null, "class='target'", true); 1109 Map<String, String> subtags = supplementalDataInfo.getLikelySubtags(); 1110 LanguageTagParser sourceParsed = new LanguageTagParser(); 1111 LanguageTagParser targetParsed = new LanguageTagParser(); 1112 for (String source : subtags.keySet()) { 1113 String target = subtags.get(source); 1114 sourceParsed.set(source); 1115 targetParsed.set(target); 1116 tablePrinter 1117 .addRow() 1118 .addCell(getName(CLDRFile.LANGUAGE_NAME, sourceParsed.getLanguage())) 1119 .addCell(getName(CLDRFile.SCRIPT_NAME, sourceParsed.getScript())) 1120 .addCell(getName(CLDRFile.TERRITORY_NAME, sourceParsed.getRegion())) 1121 .addCell(getName(CLDRFile.LANGUAGE_NAME, targetParsed.getLanguage())) 1122 .addCell(getName(CLDRFile.SCRIPT_NAME, targetParsed.getScript())) 1123 .addCell(getName(CLDRFile.TERRITORY_NAME, targetParsed.getRegion())) 1124 .addCell(source) 1125 .addCell(target) 1126 .finishRow(); 1127 } 1128 pw.println(tablePrinter.toTable()); 1129 pw.close(); 1130 } 1131 1132 static class LanguageData extends R4<Double, Double, Double, String> { LanguageData(Double a, Double b, Double c, String d)1133 public LanguageData(Double a, Double b, Double c, String d) { 1134 super(a, b, c, d); 1135 } 1136 } 1137 getName(final int type, final String value)1138 private String getName(final int type, final String value) { 1139 if (value == null || value.equals("") || value.equals("und")) { 1140 return "\u00A0"; 1141 } 1142 String result = english.getName(type, value); 1143 if (result == null) { 1144 result = value; 1145 } 1146 return result; 1147 } 1148 1149 static final Comparator INVERSE_COMPARABLE = 1150 new Comparator() { 1151 @Override 1152 public int compare(Object o1, Object o2) { 1153 return ((Comparable) o2).compareTo(o1); 1154 } 1155 }; 1156 1157 // http://www.faqs.org/rfcs/rfc2396.html 1158 // delims = "<" | ">" | "#" | "%" | <"> 1159 // "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" 1160 // Within a query component, the characters ";", "/", "?", ":", "@", 1161 // "&", "=", "+", ",", and "$" are reserved. 1162 static final UnicodeSet ESCAPED_URI_QUERY = 1163 new UnicodeSet( 1164 "[\\u0000-\\u0020\\u007F <>#%\"\\{}|\\\\\\^\\[\\]`;/?:@\\&=+,$\\u0080-\\U0001FFFF]") 1165 .freeze(); 1166 1167 private static final int MINIMAL_BIG_VENDOR = 8; 1168 1169 static { System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement())1170 System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement()); 1171 } 1172 urlEncode(String input)1173 private String urlEncode(String input) { 1174 try { 1175 byte[] utf8 = input.getBytes("utf-8"); 1176 StringBuffer output = new StringBuffer(); 1177 for (int i = 0; i < utf8.length; ++i) { 1178 int b = utf8[i] & 0xFF; 1179 if (ESCAPED_URI_QUERY.contains(b)) { 1180 output.append('%'); 1181 if (b < 0x10) output.append('0'); 1182 output.append(Integer.toString(b, 16)); 1183 } else { 1184 output.append((char) b); 1185 } 1186 } 1187 return output.toString(); 1188 } catch (UnsupportedEncodingException e) { 1189 throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); 1190 } 1191 } 1192 addBug( int bugNumber, String text, String from, String subject, String body)1193 private String addBug( 1194 int bugNumber, String text, String from, String subject, String body) { 1195 return "<a target='_blank' href='" + CLDRURLS.CLDR_NEWTICKET_URL + "'>" + text + "</a>"; 1196 } 1197 showLanguageCountryInfo(PrintWriter pw)1198 private void showLanguageCountryInfo(PrintWriter pw) throws IOException { 1199 FormattedFileWriter ffw = 1200 new FormattedFileWriter( 1201 null, 1202 "Language-Territory Information", 1203 null 1204 // "<div style='margin:1em'><p>The language data is provided for 1205 // localization testing, and is under development for CLDR 1.5. " 1206 // + 1207 // "To add a new territory for a language, see the <i>add new</i> links 1208 // below. " + 1209 // "For more information, see <a 1210 // href=\"territory_language_information.html\">Territory-Language 1211 // Information.</a>" 1212 // + 1213 // "<p></div>" 1214 , 1215 SUPPLEMENTAL_INDEX_ANCHORS); 1216 PrintWriter pw21 = new PrintWriter(ffw); 1217 PrintWriter pw2 = pw21; 1218 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1219 nf.setGroupingUsed(true); 1220 // NumberFormat percent = new DecimalFormat("000.0%"); 1221 TablePrinter tablePrinter = 1222 new TablePrinter() 1223 // tablePrinter.setSortPriorities(0,5) 1224 .addColumn("L", "class='source'", null, "class='source'", true) 1225 .setSortPriority(0) 1226 .setBreakSpans(true) 1227 .setRepeatHeader(true) 1228 .setHidden(true) 1229 .addColumn("Language", "class='source'", null, "class='source'", true) 1230 .setSortPriority(0) 1231 .setBreakSpans(true) 1232 .addColumn( 1233 "Code", 1234 "class='source'", 1235 CldrUtility.getDoubleLinkMsg(), 1236 "class='source'", 1237 true) 1238 // .addColumn("Report Bug", "class='target'", null, "class='target'", 1239 // false) 1240 .addColumn("Territory", "class='target'", null, "class='target'", true) 1241 .addColumn( 1242 "Code", 1243 "class='target'", 1244 "<a href=\"territory_language_information.html#{0}\">{0}</a>", 1245 "class='target'", 1246 true) 1247 .addColumn( 1248 "Language Population", 1249 "class='target'", 1250 "{0,number,#,#@@}", 1251 "class='targetRight'", 1252 true) 1253 .setSortPriority(1) 1254 .setSortAscending(false) 1255 // .addColumn("Territory Population", "class='target'", "{0,number,#,##0}", 1256 // "class='targetRight'", true) 1257 // .addColumn("Language Literacy", "class='target'", "{0,number,00.0}%", 1258 // "class='targetRight'", true) 1259 // .addColumn("Territory Literacy", "class='target'", "{0,number,00.0}%", 1260 // "class='targetRight'", true) 1261 // .addColumn("Territory GDP (PPP)", "class='target'", "{0,number,#,##0}", 1262 // "class='targetRight'", true) 1263 ; 1264 TreeSet<String> languages = new TreeSet<>(); 1265 Collection<Comparable[]> data = new ArrayList<>(); 1266 String msg = "<br><i>Please click on each country code</i>"; 1267 1268 Collection<Comparable[]> plainData = new ArrayList<>(); 1269 1270 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1271 // PopulationData territoryData = 1272 // supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1273 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1274 for (String languageCode : 1275 supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( 1276 territoryCode)) { 1277 PopulationData languageData = 1278 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 1279 languageCode, territoryCode); 1280 languages.add(languageCode); 1281 Comparable[] items = 1282 new Comparable[] { 1283 getFirstPrimaryWeight(getLanguageName(languageCode)), 1284 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, 1285 // languageCode), 1286 languageCode, 1287 // bug, 1288 territoryName + getOfficialStatus(territoryCode, languageCode), 1289 territoryCode, 1290 languageData.getPopulation(), 1291 // population, 1292 // languageliteracy, 1293 // territoryLiteracy, 1294 // gdp 1295 }; 1296 Comparable[] plainItems = 1297 new Comparable[] { 1298 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, 1299 // languageCode), 1300 languageCode, 1301 territoryName, 1302 territoryCode, 1303 getRawOfficialStatus(territoryCode, languageCode), 1304 languageData.getPopulation(), 1305 languageData.getLiteratePopulation() 1306 }; 1307 1308 data.add(items); 1309 plainData.add(plainItems); 1310 } 1311 } 1312 for (String languageCode : languages) { 1313 Comparable[] items = 1314 new Comparable[] { 1315 getFirstPrimaryWeight(getLanguageName(languageCode)), 1316 getLanguageName( 1317 languageCode), // + getLanguagePluralMessage(msg, languageCode), 1318 languageCode, 1319 // bug, 1320 addBug( 1321 1217, 1322 "<i>add new</i>", 1323 "<email>", 1324 "Add territory to " 1325 + getLanguageName(languageCode) 1326 + " (" 1327 + languageCode 1328 + ")", 1329 "<territory, speaker population in territory, and references>"), 1330 "", 1331 0.0d, 1332 // 0.0d, 1333 // 0.0d, 1334 // 0.0d, 1335 // gdp 1336 }; 1337 data.add(items); 1338 } 1339 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1340 String value = tablePrinter.addRows(flattened).toTable(); 1341 pw2.println(value); 1342 pw2.close(); 1343 try (PrintWriter pw21plain = 1344 FileUtilities.openUTF8Writer(ffw.getDir(), ffw.getBaseFileName() + ".txt")) { 1345 for (Comparable[] row : plainData) { 1346 pw21plain.println(Joiner.on("\t").join(row)); 1347 } 1348 } 1349 } 1350 getLanguagePluralMessage(String msg, String languageCode)1351 private String getLanguagePluralMessage(String msg, String languageCode) { 1352 String mainLanguageCode = new LanguageTagParser().set(languageCode).getLanguage(); 1353 String messageWithPlurals = 1354 msg 1355 + ", on <a href='language_plural_rules.html#" 1356 + mainLanguageCode 1357 + "'>plurals</a>" 1358 + ", and on <a href='likely_subtags.html#" 1359 + mainLanguageCode 1360 + "'>likely-subtags</a>"; 1361 return messageWithPlurals; 1362 } 1363 getLanguageName(String languageCode)1364 private String getLanguageName(String languageCode) { 1365 String result = english.getName(languageCode, true, CLDRFile.SHORT_ALTS); 1366 if (!result.equals(languageCode)) return result; 1367 Set<String> names = Iso639Data.getNames(languageCode); 1368 if (names != null && names.size() != 0) { 1369 return names.iterator().next(); 1370 } 1371 return languageCode; 1372 } 1373 1374 static final Set<Organization> TC_Vendors = 1375 Sets.union( 1376 Organization.getTCOrgs(), 1377 // This adds the CLDR org at the end of the list 1378 Set.of(Organization.cldr)); 1379 showCoverageGoals(PrintWriter pw)1380 private void showCoverageGoals(PrintWriter pw) throws IOException { 1381 try (PrintWriter pw2 = 1382 new PrintWriter( 1383 new FormattedFileWriter( 1384 null, 1385 "Coverage Goals", 1386 null, 1387 SUPPLEMENTAL_INDEX_ANCHORS)); 1388 PrintWriter coverageGoalsTsv = 1389 FileUtilities.openUTF8Writer( 1390 CLDRPaths.CHART_DIRECTORY + "tsv/", "coverage_goals.tsv"); ) { 1391 1392 TablePrinter tablePrinter = 1393 new TablePrinter() 1394 // tablePrinter.setSortPriorities(0,4) 1395 .addColumn( 1396 "Language", "class='source'", null, "class='source'", false) 1397 .setSortPriority(0) 1398 .setBreakSpans(false) 1399 .addColumn( 1400 "Code", 1401 "class='source'", 1402 "<a href=\"" 1403 + CLDRURLS.CLDR_REPO_ROOT 1404 + "/blob/main/common/main/{0}.xml\">{0}</a>", 1405 "class='source'", 1406 false) 1407 .addColumn( 1408 "D. Votes", 1409 "class='target'", 1410 null, 1411 "class='target'", 1412 false); 1413 1414 Map<Organization, Map<String, Level>> vendordata = sc.getLocaleTypes(); 1415 Set<String> locales = new TreeSet<>(); 1416 Set<Organization> vendors = new LinkedHashSet<>(); 1417 Set<Organization> smallVendors = new LinkedHashSet<>(); 1418 1419 for (Organization organization : TC_Vendors) { 1420 // if (vendor.equals(Organization.java)) continue; 1421 Map<String, Level> data = vendordata.get(organization); 1422 vendors.add(organization); 1423 tablePrinter 1424 .addColumn( 1425 organization.getDisplayName(), 1426 "class='target'", 1427 null, 1428 "class='target'", 1429 false) 1430 .setSpanRows(false); 1431 locales.addAll(data.keySet()); 1432 showTabbedOrgLevels(coverageGoalsTsv, organization, data); 1433 } 1434 1435 for (Entry<Organization, Map<String, Level>> vendorData : vendordata.entrySet()) { 1436 Organization organization = vendorData.getKey(); 1437 if (!TC_Vendors.contains(organization)) { 1438 smallVendors.add(organization); 1439 Map<String, Level> data = vendordata.get(organization); 1440 showTabbedOrgLevels(coverageGoalsTsv, organization, data); 1441 continue; 1442 } 1443 } 1444 1445 Collection<Comparable[]> data = new ArrayList<>(); 1446 List<String> list = new ArrayList<>(); 1447 LanguageTagParser ltp = new LanguageTagParser(); 1448 // String alias2 = getAlias("sh_YU"); 1449 1450 pw2.append("<h2>TC Orgs</h2>"); 1451 1452 for (String locale : locales) { 1453 list.clear(); 1454 String localeCode = locale.equals("*") ? "und" : locale; 1455 String alias = getAlias(localeCode); 1456 if (!alias.equals(localeCode)) { 1457 throw new IllegalArgumentException( 1458 "Should use canonical form: " + locale + " => " + alias); 1459 } 1460 // String baseLang = ltp.set(localeCode).getLanguage(); 1461 String baseLangName = getLanguageName(localeCode); 1462 list.add("und".equals(localeCode) ? "other" : baseLangName); 1463 list.add(locale); 1464 int defaultVotes = 1465 supplementalDataInfo.getRequiredVotes( 1466 CLDRLocale.getInstance(locale), null); 1467 list.add(String.valueOf(defaultVotes)); 1468 for (Organization vendor : vendors) { 1469 String status = getVendorStatus(locale, vendor, vendordata); 1470 // if (!baseLang.equals(locale) && 1471 // !status.startsWith("<")) { 1472 // String langStatus = getVendorStatus(baseLang, 1473 // vendor, 1474 // vendordata); 1475 // if (!langStatus.equals(status)) { 1476 // status += "*"; 1477 // } 1478 // } 1479 list.add(status); 1480 } 1481 data.add(list.toArray(new String[list.size()])); 1482 } 1483 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1484 String value = tablePrinter.addRows(flattened).toTable(); 1485 pw2.println(value); 1486 1487 pw2.append("<h2>Others</h2><div align='left'><ul>"); 1488 1489 for (Organization vendor2 : smallVendors) { 1490 pw2.append("<li><b>"); 1491 pw2.append(TransliteratorUtilities.toHTML.transform(vendor2.getDisplayName())) 1492 .append(": </b>"); 1493 boolean first1 = true; 1494 for (Level level : Level.values()) { 1495 boolean first2 = true; 1496 Level other = null; 1497 for (Entry<String, Level> data2 : vendordata.get(vendor2).entrySet()) { 1498 String key = data2.getKey(); 1499 Level level2 = data2.getValue(); 1500 if (level != level2) { 1501 continue; 1502 } 1503 if (key.equals("*")) { 1504 other = level2; 1505 continue; 1506 } 1507 if (first2) { 1508 if (first1) { 1509 first1 = false; 1510 } else { 1511 pw2.append("; "); 1512 } 1513 pw2.append(level2.toString()).append(": "); 1514 first2 = false; 1515 } else { 1516 pw2.append(", "); 1517 } 1518 pw2.append(TransliteratorUtilities.toHTML.transform(key)); 1519 } 1520 if (other != null) { 1521 if (first2) { 1522 if (first1) { 1523 first1 = false; 1524 } else { 1525 pw2.append("; "); 1526 } 1527 pw2.append(level.toString()).append(": "); 1528 first2 = false; 1529 } else { 1530 pw2.append(", "); 1531 } 1532 pw2.append("<i>other</i>"); 1533 } 1534 } 1535 pw2.append("</li>"); 1536 } 1537 pw2.append("</ul></div>"); 1538 } 1539 } 1540 showTabbedOrgLevels( PrintWriter coverageGoalsTsv, Organization organization, Map<String, Level> data)1541 public void showTabbedOrgLevels( 1542 PrintWriter coverageGoalsTsv, Organization organization, Map<String, Level> data) { 1543 coverageGoalsTsv.println( 1544 String.format( 1545 "\n#%s\t;\t%s\t;\t%s\t;\t%s\n", 1546 "Org", "Locale", "Level", "Locale Name")); 1547 for (Entry<String, Level> entry : data.entrySet()) { 1548 String locale = entry.getKey(); 1549 Level level = entry.getValue(); 1550 final String name = 1551 locale.equals("*") 1552 ? "ALL" 1553 : english.getName(locale, true, CLDRFile.SHORT_ALTS); 1554 coverageGoalsTsv.println( 1555 String.format( 1556 "%s\t;\t%s\t;\t%s\t;\t%s", organization, locale, level, name)); 1557 } 1558 } 1559 1560 LanguageTagParser lpt2 = new LanguageTagParser(); 1561 1562 // TODO replace this with standard call. 1563 getAlias(String locale)1564 private String getAlias(String locale) { 1565 lpt2.set(locale); 1566 locale = lpt2.toString(); // normalize 1567 // String language = lpt2.getLanguage(); 1568 String script = lpt2.getScript(); 1569 String region = lpt2.getRegion(); 1570 // List variants = lpt2.getVariants(); 1571 String temp; 1572 for (String old : localeAliasInfo.get("language").keySet()) { 1573 if (locale.startsWith(old)) { 1574 // the above is a rough check, and will fail with old=moh and locale=mo 1575 if (!locale.equals(old) && !locale.startsWith(old + "_")) { 1576 continue; 1577 } 1578 temp = localeAliasInfo.get("language").get(old); 1579 lpt2.setLanguage(temp.split("\\s+")[0] + locale.substring(old.length())); 1580 break; 1581 } 1582 } 1583 temp = localeAliasInfo.get("script").get(script); 1584 if (temp != null) { 1585 lpt2.setScript(temp.split("\\s+")[0]); 1586 } 1587 temp = localeAliasInfo.get("territory").get(region); 1588 if (temp != null) { 1589 lpt2.setRegion(temp.split("\\s+")[0]); 1590 } 1591 return lpt2.toString(); 1592 } 1593 getVendorStatus( String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata)1594 private String getVendorStatus( 1595 String locale, 1596 Organization vendor, 1597 Map<Organization, Map<String, Level>> vendordata) { 1598 Level statusLevel = vendordata.get(vendor).get(locale); 1599 return statusLevel == null ? "" : statusLevel.toString(); 1600 // String status = statusLevel == null ? null : statusLevel.toString(); 1601 // String curLocale = locale; 1602 // while (status == null) { 1603 // curLocale = LocaleIDParser.getParent(curLocale); 1604 // if ("root".equals(curLocale)) { 1605 // status = " "; 1606 // break; 1607 // } 1608 // statusLevel = vendordata.get(vendor).get(curLocale); 1609 // if (statusLevel != null) { 1610 // status = statusLevel + "†"; 1611 // } 1612 // } 1613 // return status; 1614 } 1615 showCountryLanguageInfo(PrintWriter pw)1616 private void showCountryLanguageInfo(PrintWriter pw) throws IOException { 1617 PrintWriter pw21 = 1618 new PrintWriter( 1619 new FormattedFileWriter( 1620 null, 1621 "Territory-Language Information", 1622 null, 1623 SUPPLEMENTAL_INDEX_ANCHORS)); 1624 PrintWriter pw2 = pw21; 1625 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1626 nf.setGroupingUsed(true); 1627 // NumberFormat percent = new DecimalFormat("000.0%"); 1628 TablePrinter tablePrinter = 1629 new TablePrinter() 1630 // tablePrinter.setSortPriorities(0,4) 1631 .addColumn("T", "class='source'", null, "class='source'", true) 1632 .setSortPriority(0) 1633 .setBreakSpans(true) 1634 .setRepeatHeader(true) 1635 .setHidden(true) 1636 .addColumn("Territory", "class='source'", null, "class='source'", true) 1637 .setSortPriority(0) 1638 .setBreakSpans(true) 1639 .addColumn( 1640 "Code", 1641 "class='source'", 1642 CldrUtility.getDoubleLinkMsg(), 1643 "class='source'", 1644 true) 1645 .addColumn( 1646 "Terr. Literacy", 1647 "class='target'", 1648 "{0,number,@@}%", 1649 "class='targetRight'", 1650 true); 1651 1652 tablePrinter 1653 .addColumn("Language", "class='target'", null, "class='target'", false) 1654 .addColumn( 1655 "Code", 1656 "class='target'", 1657 "<a href=\"language_territory_information.html#{0}\">{0}</a>", 1658 "class='target'", 1659 false) 1660 .addColumn( 1661 "Lang. Pop.", 1662 "class='target'", 1663 "{0,number,#,#@@}", 1664 "class='targetRight'", 1665 true) 1666 .addColumn( 1667 "Pop.%", 1668 "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1669 .setSortAscending(false) 1670 .setSortPriority(1) 1671 .addColumn( 1672 "Literacy%", 1673 "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1674 .addColumn( 1675 "Written%", 1676 "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1677 .addColumn("Report Bug", "class='target'", null, "class='target'", false); 1678 1679 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1680 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1681 PopulationData territoryData2 = 1682 supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1683 double territoryLiteracy = territoryData2.getLiteratePopulationPercent(); 1684 1685 for (String languageCode : 1686 supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( 1687 territoryCode)) { 1688 PopulationData languageData = 1689 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 1690 languageCode, territoryCode); 1691 double languagePopulationPercent = 1692 100 * languageData.getPopulation() / territoryData2.getPopulation(); 1693 double languageliteracy = languageData.getLiteratePopulationPercent(); 1694 double writingFrequency = languageData.getWritingPercent(); 1695 1696 tablePrinter 1697 .addRow() 1698 .addCell(getFirstPrimaryWeight(territoryName)) 1699 .addCell(territoryName) 1700 .addCell(territoryCode) 1701 .addCell(territoryLiteracy) 1702 .addCell( 1703 getLanguageName(languageCode) 1704 + getOfficialStatus(territoryCode, languageCode)) 1705 .addCell(languageCode) 1706 .addCell(languageData.getPopulation()) 1707 .addCell(languagePopulationPercent) 1708 .addCell(languageliteracy) 1709 .addCell(writingFrequency) 1710 .addCell( 1711 addBug( 1712 1217, 1713 "<i>bug</i>", 1714 "<email>", 1715 "Fix info for " 1716 + getLanguageName(languageCode) 1717 + " (" 1718 + languageCode 1719 + ")" 1720 + " in " 1721 + territoryName 1722 + " (" 1723 + territoryCode 1724 + ")", 1725 "<fixed data for territory, plus references>")) 1726 .finishRow(); 1727 } 1728 1729 tablePrinter 1730 .addRow() 1731 .addCell(getFirstPrimaryWeight(territoryName)) 1732 .addCell(territoryName) 1733 .addCell(territoryCode) 1734 .addCell(territoryLiteracy) 1735 .addCell( 1736 addBug( 1737 1217, 1738 "<i>add new</i>", 1739 "<email>", 1740 "Add language to " 1741 + territoryName 1742 + "(" 1743 + territoryCode 1744 + ")", 1745 "<language, speaker pop. and literacy in territory, plus references>")) 1746 .addCell("") 1747 .addCell(0.0d) 1748 .addCell(0.0d) 1749 .addCell(0.0d) 1750 .addCell(0.0d) 1751 .addCell("") 1752 .finishRow(); 1753 } 1754 String value = tablePrinter.toTable(); 1755 pw2.println(value); 1756 pw2.close(); 1757 } 1758 showCountryInfo(PrintWriter pw)1759 private void showCountryInfo(PrintWriter pw) throws IOException { 1760 PrintWriter pw21 = 1761 new PrintWriter( 1762 new FormattedFileWriter( 1763 null, 1764 "Territory Information", 1765 null, 1766 SUPPLEMENTAL_INDEX_ANCHORS)); 1767 PrintWriter pw2 = pw21; 1768 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1769 nf.setGroupingUsed(true); 1770 // NumberFormat percent = new DecimalFormat("000.0%"); 1771 TablePrinter tablePrinter = 1772 new TablePrinter() 1773 // tablePrinter.setSortPriorities(0,4) 1774 .addColumn("T", "class='source'", null, "class='source'", true) 1775 .setSortPriority(0) 1776 .setBreakSpans(true) 1777 .setRepeatHeader(true) 1778 .setHidden(true) 1779 .addColumn("Territory", "class='source'", null, "class='source'", true) 1780 .setSortPriority(0) 1781 .setBreakSpans(true) 1782 .addColumn( 1783 "Code", 1784 "class='source'", 1785 CldrUtility.getDoubleLinkMsg(), 1786 "class='source'", 1787 true) 1788 .addColumn( 1789 "Terr. Pop (M)", 1790 "class='target'", 1791 "{0,number,#,#@@}", 1792 "class='targetRight'", 1793 true) 1794 .addColumn( 1795 "Terr. GDP ($M PPP)", 1796 "class='target'", 1797 "{0,number,#,#@@}", 1798 "class='targetRight'", 1799 true) 1800 .addColumn( 1801 "Currencies (2006...)", 1802 "class='target'", 1803 null, 1804 "class='target'", 1805 true); 1806 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext(); ) { 1807 String header = it.next(); 1808 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1809 tablePrinter 1810 .addColumn(header) 1811 .setHeaderAttributes("class='target'") 1812 .setCellAttributes("class='target'") 1813 .setSpanRows(true); 1814 } 1815 1816 tablePrinter.addColumn("Report Bug", "class='target'", null, "class='target'", false); 1817 1818 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1819 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1820 PopulationData territoryData2 = 1821 supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1822 double population = territoryData2.getPopulation() / 1000000; 1823 double gdp = territoryData2.getGdp() / 1000000; 1824 1825 Map<String, Set<String>> worldData = 1826 territoryData.get(getName(CLDRFile.TERRITORY_NAME, "001", false)); 1827 Map<String, Set<String>> countryData = 1828 territoryData.get(getName(CLDRFile.TERRITORY_NAME, territoryCode, false)); 1829 1830 tablePrinter 1831 .addRow() 1832 .addCell(getFirstPrimaryWeight(territoryName)) 1833 .addCell(territoryName) 1834 .addCell(territoryCode) 1835 .addCell(population) 1836 .addCell(gdp) 1837 .addCell(getCurrencyNames(territoryCode)); 1838 1839 addOtherCountryData(tablePrinter, worldData, countryData); 1840 1841 tablePrinter 1842 .addCell( 1843 addBug( 1844 1217, 1845 "<i>bug</i>", 1846 "<email>", 1847 "Fix info for " 1848 + territoryName 1849 + " (" 1850 + territoryCode 1851 + ")", 1852 "<fixed data for territory, plus references>")) 1853 .finishRow(); 1854 } 1855 String value = tablePrinter.toTable(); 1856 pw2.println(value); 1857 pw2.close(); 1858 } 1859 1860 static Normalizer2 nfd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE); 1861 1862 // Do just an approximation for now 1863 getFirstPrimaryWeight(String territoryName)1864 private String getFirstPrimaryWeight(String territoryName) { 1865 char first = territoryName.charAt(0); 1866 String result = nfd.getDecomposition(first); 1867 if (result == null) { 1868 return UTF16.valueOf(first); 1869 } 1870 return UTF16.valueOf(result.codePointAt(0)); 1871 } 1872 1873 // private String getTerritoryWithLikelyLink(String territoryCode) { 1874 // return "<a href='likely_subtags.html#und_"+ territoryCode + "'>" + territoryCode + 1875 // "</a>"; 1876 // } 1877 getOfficialStatus(String territoryCode, String languageCode)1878 private String getOfficialStatus(String territoryCode, String languageCode) { 1879 PopulationData x = 1880 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 1881 languageCode, territoryCode); 1882 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1883 return " <span title='" 1884 + x.getOfficialStatus().toString().replace('_', ' ') 1885 + "'>{" 1886 + x.getOfficialStatus().toShortString() 1887 + "}</span>"; 1888 } 1889 getRawOfficialStatus(String territoryCode, String languageCode)1890 private String getRawOfficialStatus(String territoryCode, String languageCode) { 1891 PopulationData x = 1892 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 1893 languageCode, territoryCode); 1894 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1895 return x.getOfficialStatus().toString(); 1896 } 1897 addOtherCountryData( TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData)1898 private void addOtherCountryData( 1899 TablePrinter tablePrinter, 1900 Map<String, Set<String>> worldData, 1901 Map<String, Set<String>> countryData) { 1902 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext(); ) { 1903 String type = it2.next(); 1904 Set<String> worldResults = worldData.get(type); 1905 Set<String> territoryResults = null; 1906 if (countryData != null) { 1907 territoryResults = countryData.get(type); 1908 } 1909 if (territoryResults == null) { 1910 territoryResults = worldResults; 1911 } 1912 String out = ""; 1913 if (territoryResults != null) { 1914 out = territoryResults + ""; 1915 out = out.substring(1, out.length() - 1); // remove [ and ] 1916 } 1917 tablePrinter.addCell(out); 1918 } 1919 } 1920 getCurrencyNames(String territoryCode)1921 private String getCurrencyNames(String territoryCode) { 1922 Set<String> currencies = territoriesToModernCurrencies.getAll(territoryCode); 1923 if (currencies == null || currencies.size() == 0) return ""; 1924 StringBuilder buffer = new StringBuilder(); 1925 for (String code : currencies) { 1926 if (buffer.length() != 0) buffer.append(",<br>"); 1927 buffer.append(getName(CLDRFile.CURRENCY_NAME, code, false)); 1928 } 1929 return buffer.toString(); 1930 } 1931 addCharSubstitution(String value, String substitute)1932 private void addCharSubstitution(String value, String substitute) { 1933 if (substitute.equals(value)) return; 1934 LinkedHashSet<String> already = charSubstitutions.get(value); 1935 if (already == null) charSubstitutions.put(value, already = new LinkedHashSet<>(0)); 1936 already.add(substitute); 1937 Log.logln(hex(value, " ") + "; " + hex(substitute, " ")); 1938 } 1939 1940 /** */ 1941 // public void showTerritoryInfo() { 1942 // Map territory_parent = new TreeMap(); 1943 // gather("001", territory_parent); 1944 // for (Iterator it = territory_parent.keySet().iterator(); it.hasNext();) { 1945 // String territory = (String) it.next(); 1946 // String parent = (String) territory_parent.get(territory); 1947 // System.out.println(territory + "\t" + 1948 // english.getName(english.TERRITORY_NAME, territory) + "\t" 1949 // + parent + "\t" + english.getName(english.TERRITORY_NAME, parent)); 1950 // } 1951 // } 1952 1953 // private void gather(String item, Map territory_parent) { 1954 // Collection containedByItem = (Collection) group_contains.get(item); 1955 // if (containedByItem == null) 1956 // return; 1957 // for (Iterator it = containedByItem.iterator(); it.hasNext();) { 1958 // String contained = (String) it.next(); 1959 // territory_parent.put(contained, item); 1960 // gather(contained, territory_parent); 1961 // } 1962 // } 1963 addTerritoryInfo(String territoriesList, String type, String info)1964 private void addTerritoryInfo(String territoriesList, String type, String info) { 1965 String[] territories = territoriesList.split("\\s+"); 1966 territoryTypes.add(type); 1967 for (int i = 0; i < territories.length; ++i) { 1968 String territory = getName(CLDRFile.TERRITORY_NAME, territories[i], false); 1969 Map<String, Set<String>> s = territoryData.get(territory); 1970 if (s == null) { 1971 territoryData.put(territory, s = new TreeMap<>()); 1972 } 1973 Set<String> ss = s.get(type); 1974 if (ss == null) { 1975 s.put(type, ss = new TreeSet<>()); 1976 } 1977 ss.add(info); 1978 } 1979 } 1980 showCalendarData(PrintWriter pw0)1981 public void showCalendarData(PrintWriter pw0) throws IOException { 1982 PrintWriter pw = 1983 new PrintWriter( 1984 new FormattedFileWriter( 1985 null, 1986 "Other Territory Data", 1987 null, 1988 SUPPLEMENTAL_INDEX_ANCHORS)); 1989 pw.println("<table>"); 1990 pw.println("<tr><th class='source'>Territory</th>"); 1991 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext(); ) { 1992 String header = it.next(); 1993 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1994 pw.println("<th class='target'>" + header + "</th>"); 1995 } 1996 pw.println("</tr>"); 1997 1998 String worldName = getName(CLDRFile.TERRITORY_NAME, "001", false); 1999 Map<String, Set<String>> worldData = territoryData.get(worldName); 2000 for (Iterator<String> it = territoryData.keySet().iterator(); it.hasNext(); ) { 2001 String country = it.next(); 2002 if (country.equals(worldName)) continue; 2003 showCountry(pw, country, country, worldData); 2004 } 2005 showCountry(pw, worldName, "Other", worldData); 2006 pw.println("</table>"); 2007 pw.close(); 2008 } 2009 showCountry( PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData)2010 private void showCountry( 2011 PrintWriter pw, 2012 String country, 2013 String countryTitle, 2014 Map<String, Set<String>> worldData) { 2015 pw.println("<tr><td class='source'>" + countryTitle + "</td>"); 2016 Map<String, Set<String>> data = territoryData.get(country); 2017 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext(); ) { 2018 String type = it2.next(); 2019 String target = "target"; 2020 Set<String> results = data.get(type); 2021 Set<String> worldResults = worldData.get(type); 2022 if (results == null) { 2023 results = worldResults; 2024 target = "target2"; 2025 } else if (results.equals(worldResults)) { 2026 target = "target2"; 2027 } 2028 String out = ""; 2029 if (results != null) { 2030 out = results + ""; 2031 out = out.substring(1, out.length() - 1); // remove [ and ] 2032 } 2033 pw.println("<td class='" + target + "'>" + out + "</td>"); 2034 } 2035 pw.println("</tr>"); 2036 } 2037 showCorrespondances()2038 public void showCorrespondances() { 2039 // show correspondances between language and script 2040 Map<String, String> name_script = new TreeMap<>(); 2041 for (Iterator<String> it = sc.getAvailableCodes("script").iterator(); it.hasNext(); ) { 2042 String script = it.next(); 2043 String name = english.getName(CLDRFile.SCRIPT_NAME, script); 2044 if (name == null) name = script; 2045 name_script.put(name, script); 2046 /* 2047 * source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages 2048 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories 2049 * : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages 2050 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts 2051 */ } 2052 String delimiter = "\\P{L}+"; 2053 Map<String, String> name_language = new TreeMap<>(); 2054 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); 2055 it.hasNext(); ) { 2056 String language = it.next(); 2057 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 2058 if (names == null) names = language; 2059 name_language.put(names, language); 2060 } 2061 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); 2062 it.hasNext(); ) { 2063 String language = it.next(); 2064 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 2065 if (names == null) names = language; 2066 String[] words = names.split(delimiter); 2067 if (words.length > 1) { 2068 // System.out.println(names); 2069 } 2070 for (int i = 0; i < words.length; ++i) { 2071 String name = words[i]; 2072 String script = name_script.get(name); 2073 if (script != null) { 2074 Set<String> langSet = (Set<String>) script_languages.asMap().get(script); 2075 if (langSet != null && langSet.contains(language)) System.out.print("*"); 2076 System.out.println( 2077 "\t" + name + " [" + language + "]\t=> " + name + " [" + script 2078 + "]"); 2079 } else { 2080 String language2 = name_language.get(name); 2081 if (language2 != null && !language.equals(language2)) { 2082 Set<String> langSet = (Set<String>) language_scripts.get(language); 2083 if (langSet != null) System.out.print("*"); 2084 System.out.print( 2085 "?\tSame script?\t + " 2086 + getName(CLDRFile.LANGUAGE_NAME, language, false) 2087 + "\t & " 2088 + getName(CLDRFile.LANGUAGE_NAME, language2, false)); 2089 langSet = (Set<String>) language_scripts.get(language2); 2090 if (langSet != null) System.out.print("*"); 2091 System.out.println(); 2092 } 2093 } 2094 } 2095 } 2096 } 2097 2098 /** 2099 * @throws IOException 2100 */ printCurrency(PrintWriter index)2101 public void printCurrency(PrintWriter index) throws IOException { 2102 PrintWriter pw = 2103 new PrintWriter( 2104 new FormattedFileWriter( 2105 null, 2106 "Detailed Territory-Currency Information", 2107 null 2108 // "<p>The following table shows when currencies were in use in 2109 // different countries. " + 2110 // "See also <a href='#format_info'>Decimal Digits and 2111 // Rounding</a>. " + 2112 // "To correct any information here, please file a " + 2113 // addBug(1274, "bug", "<email>", "Currency Bug", 2114 // "<currency, country, and references supporting change>") + 2115 // ".</p>" 2116 , 2117 SUPPLEMENTAL_INDEX_ANCHORS)); 2118 String section1 = "Territory to Currency"; 2119 String section2 = "Decimal Digits and Rounding"; 2120 showContents(pw, "territory_currency", section1, "format_info", section2); 2121 2122 pw.println( 2123 "<h2>" 2124 + CldrUtility.getDoubleLinkedText( 2125 "territory_currency", "1. " + section1) 2126 + "</h2>"); 2127 2128 // doTitle(pw, "Territory \u2192 Currency"); 2129 pw.println("<table>"); 2130 pw.println( 2131 "<tr><th class='source'>Territory</th>" 2132 + "<th class='source'>Code</th>" 2133 + "<th class='target'>From</th>" 2134 + "<th class='target'>To</th>" 2135 + "<th class='target'>Currency</th>" 2136 + "<th class='target'>Name</th>" 2137 + "</tr>"); 2138 2139 Relation<String, String> currencyToTerritory = 2140 Relation.of(new HashMap<String, Set<String>>(), HashSet.class); 2141 Relation<String, String> modernCurrencyToTerritory = 2142 Relation.of(new HashMap<String, Set<String>>(), HashSet.class); 2143 2144 for (Entry<String, String> nameCode : NAME_TO_REGION.entrySet()) { 2145 String name = nameCode.getKey(); 2146 String regionCode = nameCode.getValue(); 2147 if (!StandardCodes.isCountry(regionCode)) { 2148 continue; 2149 } 2150 if (sc.isLstregPrivateUse("region", regionCode)) { 2151 continue; 2152 } 2153 Set<CurrencyDateInfo> info = supplementalDataInfo.getCurrencyDateInfo(regionCode); 2154 2155 int infoSize = 1; 2156 if (info != null) { 2157 infoSize = info.size(); 2158 } 2159 pw.println( 2160 "<tr>" 2161 + "<td class='source' rowSpan='" 2162 + infoSize 2163 + "'>" 2164 + name 2165 + "</td>" 2166 + "<td class='source' rowSpan='" 2167 + infoSize 2168 + "'>" 2169 + CldrUtility.getDoubleLinkedText(regionCode) 2170 + "</td>"); 2171 if (info == null) { 2172 pw.println( 2173 "<td class='target'>" 2174 + "<i>na</i>" 2175 + "</td>" 2176 + "<td class='target'>" 2177 + "<i>na</i>" 2178 + "</td>" 2179 + "<td class='target'>" 2180 + "<i>na</i>" 2181 + "</td>" 2182 + "<td class='target'>" 2183 + "<i>na</i>" 2184 + "</td>" 2185 + "</tr>"); 2186 continue; 2187 } 2188 boolean first = true; 2189 for (CurrencyDateInfo infoItem : info) { 2190 Date endData = infoItem.getEnd(); 2191 if (endData.equals(CurrencyDateInfo.END_OF_TIME)) { 2192 modernCurrencyToTerritory.put( 2193 infoItem.getCurrency(), getTerritoryName(regionCode)); 2194 } else { 2195 currencyToTerritory.put( 2196 infoItem.getCurrency(), getTerritoryName(regionCode)); 2197 } 2198 if (first) first = false; 2199 else pw.println("<tr>"); 2200 pw.println( 2201 "<td class='target'>" 2202 + CurrencyDateInfo.formatDate(infoItem.getStart()) 2203 + "</td>" 2204 + "<td class='target'>" 2205 + CurrencyDateInfo.formatDate(endData) 2206 + "</td>" 2207 + "<td class='target'>" 2208 + infoItem.getCurrency() 2209 + "</td>" 2210 + "<td class='target'>" 2211 + english.getName("currency", infoItem.getCurrency()) 2212 + "</td>" 2213 + "</tr>"); 2214 } 2215 } 2216 // doFooter(pw); 2217 // pw.close(); 2218 // pw = new PrintWriter(new FormattedFileWriter(index, "Currency Format Info", null)); 2219 pw.write("</table>"); 2220 2221 pw.println( 2222 "<h2>" 2223 + CldrUtility.getDoubleLinkedText("format_info", "2. " + section2) 2224 + "</h2>"); 2225 2226 pw.write( 2227 "<p>This table shows the number of digits used for each currency, " 2228 + " and the countries where it is or was in use. " 2229 + "Countries where the currency is in current use are bolded. " 2230 + "If the currency uses ‘nickel rounding’ in transactions, the digits are followed by ‘(5)’. " 2231 + "Where the values are different in a cash context, that is shown in a second column." 2232 + "</p>"); 2233 pw.write("<div align='center'><table>"); 2234 2235 // doTitle(pw, "Currency Format Info"); 2236 // <info iso4217="CZK" digits="2" rounding="0" cashDigits="0" 2237 // cashRounding="0"/> 2238 2239 pw.println( 2240 "<tr>" 2241 + "<th class='source nowrap'>Name</th>" 2242 + "<th class='source'>Currency</th>" 2243 + "<th class='target'>Digits</th>" 2244 + "<th class='target'>Cash Digits</th>" 2245 + "<th class='target'>Countries</th>" 2246 + "</tr>"); 2247 Set<String> currencyList = new TreeSet<String>(col); 2248 currencyList.addAll(currency_fractions.keySet()); 2249 currencyList.addAll(currency_territory.keySet()); 2250 2251 for (Entry<String, String> nameCode : NAME_TO_CURRENCY.entrySet()) { 2252 // String name = nameCode.getKey(); 2253 String currency = nameCode.getValue(); 2254 CurrencyNumberInfo info = supplementalDataInfo.getCurrencyNumberInfo(currency); 2255 Set<String> territories = currencyToTerritory.get(currency); 2256 Set<String> modernTerritories = modernCurrencyToTerritory.get(currency); 2257 2258 // String fractions = (String) currency_fractions.get(currency); 2259 // if (fractions == null) 2260 // fractions = defaultDigits; 2261 // Set territories = (Set) currency_territory.get(currency); 2262 pw.print( 2263 "<tr>" 2264 + "<td class='source nowrap'>" 2265 + TransliteratorUtilities.toHTML.transform( 2266 english.getName("currency", currency)) 2267 + "</td>" 2268 + "<td class='source'>" 2269 + CldrUtility.getDoubleLinkedText(currency) 2270 + "</td>" 2271 + "<td class='target'>" 2272 + info.getDigits() 2273 + (info.getRounding() == 0 ? "" : " (" + info.getRounding() + ")") 2274 + "</td>" 2275 + "<td class='target'>" 2276 + (info.cashDigits == info.getDigits() 2277 && info.cashRounding == info.getRounding() 2278 ? "" 2279 : (info.cashDigits 2280 + (info.cashRounding == 0 2281 ? "" 2282 : " (" + info.cashRounding + ")"))) 2283 + "</td>" 2284 + "<td class='target'>"); 2285 boolean first = true; 2286 boolean needBreak = false; 2287 if (modernTerritories != null) { 2288 needBreak = true; 2289 for (String territory : modernTerritories) { 2290 if (first) first = false; 2291 else pw.print(", "); 2292 pw.print("<b>" + territory + "</b>"); 2293 } 2294 } 2295 // boolean haveBreak = true; 2296 if (territories != null) { 2297 for (String territory : territories) { 2298 if (first) first = false; 2299 else if (!needBreak) pw.print(", "); 2300 else { 2301 pw.print(",<br>"); 2302 needBreak = false; 2303 } 2304 pw.print(territory); 2305 } 2306 } 2307 pw.println("</td></tr>"); 2308 } 2309 pw.println("</table>"); 2310 pw.close(); 2311 // doFooter(pw); 2312 2313 // if (false) { 2314 // doTitle(pw, "Territories Versus Currencies"); 2315 // pw.println("<tr><th>Territories Without Currencies</th><th>Currencies Without 2316 // Territories</th></tr>"); 2317 // pw.println("<tr><td class='target'>"); 2318 // Set territoriesWithoutCurrencies = new TreeSet(); 2319 // territoriesWithoutCurrencies.addAll(sc.getGoodAvailableCodes("territory")); 2320 // territoriesWithoutCurrencies.removeAll(territoriesWithCurrencies); 2321 // territoriesWithoutCurrencies.removeAll(group_contains.keySet()); 2322 // boolean first = true; 2323 // for (Iterator it = territoriesWithoutCurrencies.iterator(); it.hasNext();) { 2324 // if (first) first = false; 2325 // else pw.print(", "); 2326 // pw.print(english.getName(CLDRFile.TERRITORY_NAME, it.next().toString(), false)); 2327 // } 2328 // pw.println("</td><td class='target'>"); 2329 // Set currenciesWithoutTerritories = new TreeSet(); 2330 // currenciesWithoutTerritories.addAll(sc.getGoodAvailableCodes("currency")); 2331 // currenciesWithoutTerritories.removeAll(currenciesWithTerritories); 2332 // first = true; 2333 // for (Iterator it = currenciesWithoutTerritories.iterator(); it.hasNext();) { 2334 // if (first) first = false; 2335 // else pw.print(", "); 2336 // pw.print(english.getName(CLDRFile.CURRENCY_NAME, it.next().toString(), false)); 2337 // } 2338 // pw.println("</td></tr>"); 2339 // doFooter(pw); 2340 // } 2341 } 2342 getTerritoryName(String territory)2343 private String getTerritoryName(String territory) { 2344 String name; 2345 name = english.getName("territory", territory); 2346 if (name == null) { 2347 name = sc.getData("territory", territory); 2348 } 2349 if (name != null) { 2350 return TransliteratorUtilities.toHTML.transform(name) + " (" + territory + ")"; 2351 } else { 2352 return territory; 2353 } 2354 } 2355 2356 /** 2357 * @throws IOException 2358 */ printAliases(PrintWriter index)2359 public void printAliases(PrintWriter index) throws IOException { 2360 PrintWriter pw = 2361 new PrintWriter( 2362 new FormattedFileWriter( 2363 null, "Aliases", null, SUPPLEMENTAL_INDEX_ANCHORS)); 2364 2365 // doTitle(pw, "Aliases"); 2366 pw.println("<table>"); 2367 pw.println( 2368 "<tr><th class='source'>" 2369 + "Type" 2370 + "</th>" 2371 + "<th class='source'>" 2372 + "Code" 2373 + "</th>" 2374 + "<th class='target'>" 2375 + "Reason" 2376 + "</th>" 2377 + "<th class='target'>" 2378 + "Substitute (if available)" 2379 + "</th></tr>"); 2380 for (Iterator<String[]> it = aliases.iterator(); it.hasNext(); ) { 2381 String[] items = it.next(); 2382 pw.println( 2383 "<tr><td class='source'>" 2384 + items[0] 2385 + "</td>" 2386 + "<td class='source'>" 2387 + CldrUtility.getDoubleLinkedText(items[1]) 2388 + "</td>" 2389 + "<td class='target'>" 2390 + items[3] 2391 + "</td>" 2392 + "<td class='target'>" 2393 + items[2] 2394 + "</td></tr>"); 2395 } 2396 // doFooter(pw); 2397 pw.println("</table>"); 2398 pw.close(); 2399 } 2400 2401 // deprecatedItems 2402 // public void printDeprecatedItems(PrintWriter pw) { 2403 // doTitle(pw, "Deprecated Items"); 2404 // pw.print("<tr><td class='z0'><b>Type</b></td><td class='z1'><b>Elements</b></td><td 2405 // class='z2'><b>Attributes</b></td><td class='z4'><b>Values</b></td>"); 2406 // for (Iterator it = deprecatedItems.iterator(); it.hasNext();) { 2407 // Map source = (Map)it.next(); 2408 // Object item; 2409 // pw.print("<tr>"); 2410 // pw.print("<td class='z0'>" + ((item = source.get("type")) != null ? item : "<i>any</i>") 2411 // + "</td>"); 2412 // pw.print("<td class='z1'>" + ((item = source.get("elements")) != null ? item : 2413 // "<i>any</i>") + "</td>"); 2414 // pw.print("<td class='z2'>" + ((item = source.get("attributes")) != null ? item : 2415 // "<i>any</i>") + "</td>"); 2416 // pw.print("<td class='z4'>" + ((item = source.get("values")) != null ? item : 2417 // "<i>any</i>") + "</td>"); 2418 // pw.print("</tr>"); 2419 // } 2420 // doFooter(pw); 2421 // } 2422 printWindows_Tzid(PrintWriter index)2423 public void printWindows_Tzid(PrintWriter index) throws IOException { 2424 Map<String, Map<String, Map<String, String>>> zoneMapping = 2425 supplementalDataInfo.getTypeToZoneToRegionToZone(); 2426 PrintWriter pw = 2427 new PrintWriter( 2428 new FormattedFileWriter( 2429 null, "Zone \u2192 Tzid", null, SUPPLEMENTAL_INDEX_ANCHORS)); 2430 for (Entry<String, Map<String, Map<String, String>>> typeAndZoneToRegionToZone : 2431 zoneMapping.entrySet()) { 2432 String type = typeAndZoneToRegionToZone.getKey(); 2433 Map<String, Map<String, String>> zoneToRegionToZone = 2434 typeAndZoneToRegionToZone.getValue(); 2435 pw.println("<br><h1>Mapping for: " + type + "</h1><br>"); 2436 // doTitle(pw, "Windows \u2192 Tzid"); 2437 pw.println("<table>"); 2438 pw.println( 2439 "<tr><th class='source'>" 2440 + type 2441 + "</th><th class='source'>" 2442 + "Region" 2443 + "</th><th class='target'>" 2444 + "TZID" 2445 + "</th></tr>"); 2446 2447 for (Entry<String, Map<String, String>> zoneAndregionToZone : 2448 zoneToRegionToZone.entrySet()) { 2449 String source = zoneAndregionToZone.getKey(); 2450 Map<String, String> regionToZone = zoneAndregionToZone.getValue(); 2451 for (Entry<String, String> regionAndZone : regionToZone.entrySet()) { 2452 String region = regionAndZone.getKey(); 2453 String target = regionAndZone.getValue(); 2454 if (region == null) region = "<i>any</a>"; 2455 pw.println( 2456 "<tr><td class='source'>" 2457 + source 2458 + "</td><td class='source'>" 2459 + region 2460 + "</td><td class='target'>" 2461 + target 2462 + "</td></tr>"); 2463 } 2464 } 2465 // doFooter(pw); 2466 pw.println("</table>"); 2467 } 2468 pw.close(); 2469 } 2470 2471 // <info iso4217="ADP" digits="0" rounding="0"/> 2472 printCharacters(PrintWriter index)2473 public void printCharacters(PrintWriter index) throws IOException { 2474 String title = "Character Fallback Substitutions"; 2475 2476 PrintWriter pw = 2477 new PrintWriter( 2478 new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 2479 // doTitle(pw, title); 2480 pw.println("<table>"); 2481 2482 pw.println( 2483 "<tr><th colSpan='3'>Substitute for character (if not in repertoire)</th><th colSpan='4'>The following (in priority order, first string that <i>is</i> in repertoire)</th></tr>"); 2484 UnicodeSet chars = new UnicodeSet("[:NFKC_QuickCheck=N:]"); 2485 for (com.ibm.icu.text.UnicodeSetIterator it = 2486 new com.ibm.icu.text.UnicodeSetIterator(chars); 2487 it.next(); ) { 2488 String value = it.getString(); 2489 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFC)); 2490 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFKC)); 2491 } 2492 int[] counts = new int[4]; 2493 for (Iterator<String> it = charSubstitutions.keySet().iterator(); it.hasNext(); ) { 2494 String value = it.next(); 2495 LinkedHashSet<String> substitutes = charSubstitutions.get(value); 2496 String nfc = Normalizer.normalize(value, Normalizer.NFC); 2497 String nfkc = Normalizer.normalize(value, Normalizer.NFKC); 2498 2499 String sourceTag = "<td class='source'>"; 2500 if (substitutes.size() > 1) { 2501 sourceTag = "<td class='source' rowSpan='" + substitutes.size() + "'>"; 2502 } 2503 boolean first = true; 2504 for (Iterator<String> it2 = substitutes.iterator(); it2.hasNext(); ) { 2505 String substitute = it2.next(); 2506 String type = "Explicit"; 2507 String targetTag = "<td class='target3'>"; 2508 if (substitute.equals(nfc)) { 2509 type = "NFC"; 2510 targetTag = "<td class='target'>"; 2511 counts[2]++; 2512 } else if (substitute.equals(nfkc)) { 2513 type = "NFKC"; 2514 targetTag = "<td class='target4'>"; 2515 counts[3]++; 2516 } else { 2517 counts[0]++; 2518 } 2519 pw.println( 2520 "<tr>" 2521 + (!first 2522 ? "" 2523 : sourceTag 2524 + hex(value, ", ") 2525 + "</td>" 2526 + sourceTag 2527 + TransliteratorUtilities.toHTML.transliterate( 2528 value) 2529 + "</td>" 2530 + sourceTag 2531 + UCharacter.getName(value, ", ") 2532 + "</td>") 2533 + targetTag 2534 + type 2535 + "</td>" 2536 + targetTag 2537 + hex(substitute, ", ") 2538 + "</td>" 2539 + targetTag 2540 + TransliteratorUtilities.toHTML.transliterate(substitute) 2541 + "</td>" 2542 + targetTag 2543 + UCharacter.getName(substitute, ", ") 2544 + "</td></tr>"); 2545 first = false; 2546 } 2547 } 2548 // doFooter(pw); 2549 pw.println("</table>"); 2550 2551 pw.close(); 2552 for (int i = 0; i < counts.length; ++i) { 2553 System.out.println("Count\t" + i + "\t" + counts[i]); 2554 } 2555 } 2556 hex(String s, String separator)2557 public static String hex(String s, String separator) { 2558 StringBuffer result = new StringBuffer(); 2559 int cp; 2560 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 2561 cp = UTF16.charAt(s, i); 2562 if (i != 0) result.append(separator); 2563 result.append(com.ibm.icu.impl.Utility.hex(cp)); 2564 } 2565 return result.toString(); 2566 } 2567 2568 /** */ 2569 // private PrintWriter doTitle(PrintWriter pw, String title) { 2570 // //String anchor = FileUtilities.anchorize(title); 2571 // pw.println("<div align='center'><table>"); 2572 // //anchors.put(title, anchor); 2573 // //PrintWriter result = null; 2574 // //return result; 2575 // } 2576 2577 // private void doFooter(PrintWriter pw) { 2578 // pw.println("</table></div>"); 2579 // } printContains2( PrintWriter pw, String lead, String start, int depth, boolean isFirst)2580 public void printContains2( 2581 PrintWriter pw, String lead, String start, int depth, boolean isFirst) { 2582 String name = depth == 4 ? start : getName(CLDRFile.TERRITORY_NAME, start, false); 2583 if (!isFirst) pw.print(lead); 2584 int count = getTotalContainedItems(start, depth); 2585 pw.print( 2586 "<td class='z" 2587 + depth 2588 + "' rowSpan='" 2589 + count 2590 + "'>" 2591 + name 2592 + "</td>"); // colSpan='" + (5 - 2593 // depth) + "' 2594 if (depth == 4) pw.println("</tr>"); 2595 Collection<String> contains = getContainedCollection(start, depth); 2596 if (contains != null) { 2597 Collection<String> contains2 = new TreeSet<String>(territoryNameComparator); 2598 contains2.addAll(contains); 2599 boolean first = true; 2600 for (Iterator<String> it = contains2.iterator(); it.hasNext(); ) { 2601 String item = it.next(); 2602 printContains2(pw, lead, item, depth + 1, first); // + "<td> </td>" 2603 first = false; 2604 } 2605 } 2606 } 2607 getTotalContainedItems(String start, int depth)2608 private int getTotalContainedItems(String start, int depth) { 2609 Collection<String> c = getContainedCollection(start, depth); 2610 if (c == null) return 1; 2611 int sum = 0; 2612 for (Iterator<String> it = c.iterator(); it.hasNext(); ) { 2613 sum += getTotalContainedItems(it.next(), depth + 1); 2614 } 2615 return sum; 2616 } 2617 2618 /** */ getContainedCollection(String start, int depth)2619 private Collection<String> getContainedCollection(String start, int depth) { 2620 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2621 if (contains == null) { 2622 contains = sc.getCountryToZoneSet().get(start); 2623 if (contains == null && depth == 3) { 2624 contains = new TreeSet<>(); 2625 if (start.compareTo("A") >= 0) { 2626 contains.add("<font color='red'>MISSING TZID</font>"); 2627 } else { 2628 contains.add("<font color='red'>Not yet ISO code</font>"); 2629 } 2630 } 2631 } 2632 return contains; 2633 } 2634 2635 /** 2636 * @param table TODO 2637 */ printMissing(PrintWriter pw, int source, int table)2638 public void printMissing(PrintWriter pw, int source, int table) { 2639 Set<String> missingItems = new HashSet<>(); 2640 String type = null; 2641 if (source == CLDRFile.TERRITORY_NAME) { 2642 type = "territory"; 2643 missingItems.addAll(sc.getAvailableCodes(type)); 2644 missingItems.removeAll(territory_languages.keySet()); 2645 missingItems.removeAll(supplementalDataInfo.getContainmentCore().keySet()); 2646 missingItems.remove("200"); // czechoslovakia 2647 } else if (source == CLDRFile.SCRIPT_NAME) { 2648 type = "script"; 2649 missingItems.addAll(sc.getAvailableCodes(type)); 2650 missingItems.removeAll(script_languages.keySet()); 2651 } else if (source == CLDRFile.LANGUAGE_NAME) { 2652 type = "language"; 2653 missingItems.addAll(sc.getAvailableCodes(type)); 2654 if (table == CLDRFile.SCRIPT_NAME) 2655 missingItems.removeAll(language_scripts.keySet()); 2656 if (table == CLDRFile.TERRITORY_NAME) 2657 missingItems.removeAll(language_territories.keySet()); 2658 } else { 2659 throw new IllegalArgumentException("Illegal code"); 2660 } 2661 Set<String> missingItemsNamed = new TreeSet<String>(col); 2662 for (Iterator<String> it = missingItems.iterator(); it.hasNext(); ) { 2663 String item = it.next(); 2664 List<String> data = sc.getFullData(type, item); 2665 if (data.get(0).equals("PRIVATE USE")) continue; 2666 if (data.size() < 3) continue; 2667 if (!"".equals(data.get(2))) continue; 2668 2669 String itemName = getName(source, item, true); 2670 missingItemsNamed.add(itemName); 2671 } 2672 pw.println("<div align='center'><table>"); 2673 for (Iterator<String> it = missingItemsNamed.iterator(); it.hasNext(); ) { 2674 pw.println("<tr><td class='target'>" + it.next() + "</td></tr>"); 2675 } 2676 pw.println("</table></div>"); 2677 } 2678 2679 // source, eg english.TERRITORY_NAME 2680 // target, eg english.LANGUAGE_NAME print(PrintWriter pw, int source, int target)2681 public void print(PrintWriter pw, int source, int target) { 2682 Multimap<String, String> data = 2683 source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME 2684 ? territory_languages 2685 : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME 2686 ? language_territories 2687 : source == CLDRFile.SCRIPT_NAME 2688 && target == CLDRFile.LANGUAGE_NAME 2689 ? script_languages 2690 : source == CLDRFile.LANGUAGE_NAME 2691 && target == CLDRFile.SCRIPT_NAME 2692 ? language_scripts 2693 : null; 2694 // transform into names, and sort 2695 Map<String, Set<String>> territory_languageNames = 2696 new TreeMap<String, Set<String>>(col); 2697 for (Iterator<String> it = data.keySet().iterator(); it.hasNext(); ) { 2698 String territory = it.next(); 2699 String territoryName = getName(source, territory, true); 2700 Set<String> s = territory_languageNames.get(territoryName); 2701 if (s == null) 2702 territory_languageNames.put(territoryName, s = new TreeSet<String>(col)); 2703 for (Iterator<String> it2 = data.get(territory).iterator(); it2.hasNext(); ) { 2704 String language = it2.next(); 2705 String languageName = getName(target, language, true); 2706 s.add(languageName); 2707 } 2708 } 2709 2710 pw.println("<div align='center'><table>"); 2711 2712 for (Iterator<String> it = territory_languageNames.keySet().iterator(); 2713 it.hasNext(); ) { 2714 String territoryName = it.next(); 2715 pw.println("<tr><td class='source' colspan='2'>" + territoryName + "</td></tr>"); 2716 Set<String> s = territory_languageNames.get(territoryName); 2717 for (Iterator<String> it2 = s.iterator(); it2.hasNext(); ) { 2718 String languageName = it2.next(); 2719 pw.println( 2720 "<tr><td> </td><td class='target'>" + languageName + "</td></tr>"); 2721 } 2722 } 2723 pw.println("</table></div>"); 2724 } 2725 2726 /** 2727 * @param codeFirst TODO 2728 */ getName(int type, String oldcode, boolean codeFirst)2729 private String getName(int type, String oldcode, boolean codeFirst) { 2730 if (oldcode.contains(" ")) { 2731 String[] result = oldcode.split("\\s+"); 2732 for (int i = 0; i < result.length; ++i) { 2733 result[i] = getName(type, result[i], codeFirst); 2734 } 2735 return CldrUtility.join(Arrays.asList(result), ", "); 2736 } else { 2737 int pos = oldcode.indexOf('*'); 2738 String code = pos < 0 ? oldcode : oldcode.substring(0, pos); 2739 String ename = english.getName(type, code); 2740 String nameString = ename == null ? code : ename; 2741 return nameString.equals(oldcode) 2742 ? nameString 2743 : codeFirst 2744 ? "[" + oldcode + "]" + "\t" + nameString 2745 : nameString + "\t" + "[" + oldcode + "]"; 2746 } 2747 } 2748 2749 private String getName(String locale, boolean codeFirst) { 2750 String ename = getLanguageName(locale); 2751 return codeFirst 2752 ? "[" + locale + "]\t" + (ename == null ? locale : ename) 2753 : (ename == null ? locale : ename) + "\t[" + locale + "]"; 2754 } 2755 2756 Comparator territoryNameComparator = 2757 new Comparator() { 2758 @Override 2759 public int compare(Object o1, Object o2) { 2760 return col.compare( 2761 getName(CLDRFile.TERRITORY_NAME, (String) o1, false), 2762 getName(CLDRFile.TERRITORY_NAME, (String) o2, false)); 2763 } 2764 }; 2765 2766 static String[] stringArrayPattern = new String[0]; 2767 static String[][] string2ArrayPattern = new String[0][]; 2768 2769 public static Map<String, String> territoryAliases = new HashMap<>(); 2770 2771 public void printContains(PrintWriter index) throws IOException { 2772 String title = "Territory Containment (UN M.49)"; 2773 2774 PrintWriter pw = 2775 new PrintWriter( 2776 new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 2777 // doTitle(pw, title); 2778 List<String[]> rows = new ArrayList<>(); 2779 printContains3("001", rows, new ArrayList<String>()); 2780 TablePrinter tablePrinter = 2781 new TablePrinter() 2782 .addColumn("World", "class='source'", null, "class='z0'", true) 2783 .setSortPriority(0) 2784 .addColumn("Continent", "class='source'", null, "class='z1'", true) 2785 .setSortPriority(1) 2786 .addColumn("Subcontinent", "class='source'", null, "class='z2'", true) 2787 .setSortPriority(2) 2788 .addColumn( 2789 "Country (Territory)", 2790 "class='source'", 2791 null, 2792 "class='z3'", 2793 true) 2794 .setSortPriority(3) 2795 .addColumn("Time Zone", "class='source'", null, "class='z4'", true) 2796 .setSortPriority(4); 2797 String[][] flatData = rows.toArray(string2ArrayPattern); 2798 pw.println(tablePrinter.addRows(flatData).toTable()); 2799 2800 showSubtable( 2801 pw, ContainmentStyle.grouping, "Groupings", "Grouping", "Contained Regions"); 2802 showSubtable( 2803 pw, 2804 ContainmentStyle.deprecated, 2805 "Deprecated", 2806 "Container", 2807 "Deprecated Region"); 2808 2809 // Relation<String, String> deprecated = supplementalDataInfo 2810 // .getTerritoryToContained(ContainmentStyle.deprecated); 2811 // 2812 // for (String region : deprecated.keySet()) { 2813 // nameToContainers.add(region); 2814 // } 2815 // pw.println("<h2>Groupings and Deprecated Regions</h2>"); 2816 // for (String region : nameToContainers) { 2817 // String name = getName(CLDRFile.TERRITORY_NAME, region, false); 2818 // Set<String> dep = deprecated.get(region); 2819 // Set<String> gro = grouping.get(region); 2820 // Iterator<String> depIt = (dep == null ? Collections.EMPTY_SET : 2821 // dep).iterator(); 2822 // Iterator<String> groIt = (gro == null ? Collections.EMPTY_SET : 2823 // gro).iterator(); 2824 // while (depIt.hasNext() || groIt.hasNext()) { 2825 // String dep1 = depIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, 2826 // depIt.next(), false) : ""; 2827 // String gro1 = groIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, 2828 // groIt.next(), false) : ""; 2829 // tablePrinter2.addRow() 2830 // .addCell(name) 2831 // .addCell(gro1) 2832 // .addCell(dep1) 2833 // .finishRow(); 2834 // } 2835 // } 2836 // pw.println(tablePrinter2.toTable()); 2837 // pw.println("<h2>Other Groupings</h2>"); 2838 // for (Entry<String, Set<String>> regionContained : grouping.keyValuesSet()) 2839 // { 2840 // showContainers(pw, regionContained); 2841 // } 2842 // 2843 // pw.println("<h2>Deprecated Codes</h2>"); 2844 // for (Entry<String, Set<String>> regionContained : 2845 // deprecated.keyValuesSet()) { 2846 // showContainers(pw, regionContained); 2847 // } 2848 pw.close(); 2849 } 2850 2851 public void showSubtable( 2852 PrintWriter pw, 2853 ContainmentStyle containmentStyle, 2854 String title, 2855 String containerTitle, 2856 String containeeTitle) { 2857 pw.println("<h2>" + title + "</h2>"); 2858 TablePrinter tablePrinter2 = 2859 new TablePrinter() 2860 .addColumn(containerTitle, "class='source'", null, "class='z0'", true) 2861 .setSortPriority(0) 2862 .addColumn(containeeTitle, "class='source'", null, "class='z4'", true) 2863 .setSortPriority(1); 2864 2865 Relation<String, String> grouping = 2866 supplementalDataInfo.getTerritoryToContained(containmentStyle); 2867 2868 for (Entry<String, String> containerRegion : grouping.keyValueSet()) { 2869 String container = 2870 getName(CLDRFile.TERRITORY_NAME, containerRegion.getKey(), false); 2871 String containee = 2872 getName(CLDRFile.TERRITORY_NAME, containerRegion.getValue(), false); 2873 tablePrinter2.addRow().addCell(container).addCell(containee).finishRow(); 2874 } 2875 pw.println(tablePrinter2.toTable()); 2876 } 2877 2878 public void showContainers(PrintWriter pw, Entry<String, Set<String>> regionContained) { 2879 String region = regionContained.getKey(); 2880 Set<String> contained = regionContained.getValue(); 2881 pw.println("<ul><li>" + getName(CLDRFile.TERRITORY_NAME, region, false) + "<ul>"); 2882 for (String sub : contained) { 2883 pw.println("<li>" + getName(CLDRFile.TERRITORY_NAME, sub, false) + "</li>"); 2884 } 2885 pw.println("</ul></li></ul>"); 2886 } 2887 2888 private void printContains3( 2889 String start, List<String[]> rows, ArrayList<String> currentRow) { 2890 int len = currentRow.size(); 2891 if (len > 3) { 2892 return; // skip long items 2893 } 2894 currentRow.add(getName(CLDRFile.TERRITORY_NAME, start, false)); 2895 // Collection<String> contains = (Collection<String>) group_contains.get(start); 2896 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2897 if (contains == null) { 2898 contains = sc.getCountryToZoneSet().get(start); 2899 currentRow.add(""); 2900 if (contains == null) { 2901 currentRow.set(len + 1, "???"); 2902 rows.add(currentRow.toArray(stringArrayPattern)); 2903 } else { 2904 for (String item : contains) { 2905 currentRow.set(len + 1, item); 2906 rows.add(currentRow.toArray(stringArrayPattern)); 2907 } 2908 } 2909 currentRow.remove(len + 1); 2910 } else { 2911 for (String item : contains) { 2912 if (territoryAliases.keySet().contains(item)) { 2913 continue; 2914 } 2915 printContains3(item, rows, currentRow); 2916 } 2917 } 2918 currentRow.remove(len); 2919 } 2920 } 2921 2922 /** */ getInverse( Map<String, Set<String>> language_territories)2923 private static Map<String, Set<String>> getInverse( 2924 Map<String, Set<String>> language_territories) { 2925 // get inverse relation 2926 Map<String, Set<String>> territory_languages = new TreeMap<>(); 2927 for (Iterator<String> it = language_territories.keySet().iterator(); it.hasNext(); ) { 2928 String language = it.next(); 2929 Set<String> territories = language_territories.get(language); 2930 for (Iterator<String> it2 = territories.iterator(); it2.hasNext(); ) { 2931 String territory = it2.next(); 2932 Set<String> languages = territory_languages.get(territory); 2933 if (languages == null) 2934 territory_languages.put(territory, languages = new TreeSet<String>(col)); 2935 languages.add(language); 2936 } 2937 } 2938 return territory_languages; 2939 } 2940 2941 static final Map<String, String> NAME_TO_REGION = getNameToCode(CodeType.territory, "region"); 2942 static final Map<String, String> NAME_TO_CURRENCY = 2943 getNameToCode(CodeType.currency, "currency"); 2944 getNameToCode(CodeType codeType, String cldrCodeType)2945 private static SortedMap<String, String> getNameToCode(CodeType codeType, String cldrCodeType) { 2946 SortedMap<String, String> temp = new TreeMap<String, String>(col); 2947 for (String territory : StandardCodes.make().getAvailableCodes(codeType)) { 2948 String name = english.getName(cldrCodeType, territory); 2949 temp.put(name == null ? territory : name, territory); 2950 } 2951 temp = Collections.unmodifiableSortedMap(temp); 2952 return temp; 2953 } 2954 2955 /** 2956 * @param value_delimiter TODO 2957 */ addTokens( String key, String values, String value_delimiter, Map<String, Set<String>> key_value)2958 private static void addTokens( 2959 String key, String values, String value_delimiter, Map<String, Set<String>> key_value) { 2960 if (values != null) { 2961 Set<String> s = key_value.get(key); 2962 if (s == null) key_value.put(key, s = new TreeSet<String>(col)); 2963 s.addAll(Arrays.asList(values.split(value_delimiter))); 2964 } 2965 } 2966 addTokens( String key, String values, String value_delimiter, Multimap<String, String> key_value)2967 private static void addTokens( 2968 String key, String values, String value_delimiter, Multimap<String, String> key_value) { 2969 if (values != null) { 2970 key_value.putAll(key, Arrays.asList(values.split(value_delimiter))); 2971 } 2972 } 2973 showContents(Appendable pw, String... items)2974 public static void showContents(Appendable pw, String... items) { 2975 try { 2976 pw.append("</div>" + System.lineSeparator()); 2977 pw.append("<h3>Contents</h3>" + System.lineSeparator()); 2978 pw.append("<ol>" + System.lineSeparator()); 2979 for (int i = 0; i < items.length; i += 2) { 2980 pw.append( 2981 "<li><a href='#" 2982 + items[i] 2983 + "'>" 2984 + items[i + 1] 2985 + "</a></li>" 2986 + System.lineSeparator()); 2987 } 2988 pw.append("</ol><hr>" + System.lineSeparator()); 2989 2990 pw.append("<div align='center'>" + System.lineSeparator()); 2991 } catch (IOException e) { 2992 throw new ICUUncheckedIOException(e); 2993 } 2994 } 2995 } 2996