1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004-2011, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import java.io.IOException; 10 import java.io.PrintWriter; 11 import java.io.StringWriter; 12 import java.io.UnsupportedEncodingException; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collection; 16 import java.util.Collections; 17 import java.util.Comparator; 18 import java.util.Date; 19 import java.util.EnumSet; 20 import java.util.HashMap; 21 import java.util.HashSet; 22 import java.util.Iterator; 23 import java.util.LinkedHashSet; 24 import java.util.List; 25 import java.util.Locale; 26 import java.util.Map; 27 import java.util.Map.Entry; 28 import java.util.Set; 29 import java.util.SortedMap; 30 import java.util.TreeMap; 31 import java.util.TreeSet; 32 33 import org.unicode.cldr.draft.FileUtilities; 34 import org.unicode.cldr.draft.ScriptMetadata; 35 import org.unicode.cldr.draft.ScriptMetadata.Info; 36 import org.unicode.cldr.util.ArrayComparator; 37 import org.unicode.cldr.util.CLDRConfig; 38 import org.unicode.cldr.util.CLDRFile; 39 import org.unicode.cldr.util.CLDRFile.WinningChoice; 40 import org.unicode.cldr.util.CLDRPaths; 41 import org.unicode.cldr.util.CLDRTool; 42 import org.unicode.cldr.util.CLDRURLS; 43 import org.unicode.cldr.util.CldrUtility; 44 import org.unicode.cldr.util.Factory; 45 import org.unicode.cldr.util.FileCopier; 46 import org.unicode.cldr.util.Iso639Data; 47 import org.unicode.cldr.util.Iso639Data.Scope; 48 import org.unicode.cldr.util.Iso639Data.Type; 49 import org.unicode.cldr.util.LanguageTagParser; 50 import org.unicode.cldr.util.Level; 51 import org.unicode.cldr.util.LocaleIDParser; 52 import org.unicode.cldr.util.Log; 53 import org.unicode.cldr.util.Organization; 54 import org.unicode.cldr.util.StandardCodes; 55 import org.unicode.cldr.util.StandardCodes.CodeType; 56 import org.unicode.cldr.util.SupplementalDataInfo; 57 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 58 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 61 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 62 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 63 import org.unicode.cldr.util.TransliteratorUtilities; 64 import org.unicode.cldr.util.XPathParts; 65 66 import com.google.common.base.Joiner; 67 import com.google.common.collect.ImmutableMap; 68 import com.google.common.collect.Multimap; 69 import com.google.common.collect.Multimaps; 70 import com.google.common.collect.TreeMultimap; 71 import com.ibm.icu.impl.Relation; 72 import com.ibm.icu.impl.Row.R2; 73 import com.ibm.icu.impl.Row.R4; 74 import com.ibm.icu.lang.UCharacter; 75 import com.ibm.icu.text.Collator; 76 import com.ibm.icu.text.Normalizer; 77 import com.ibm.icu.text.Normalizer2; 78 import com.ibm.icu.text.NumberFormat; 79 import com.ibm.icu.text.UTF16; 80 import com.ibm.icu.text.UnicodeSet; 81 import com.ibm.icu.util.ICUUncheckedIOException; 82 import com.ibm.icu.util.ULocale; 83 84 @CLDRTool(alias = "showlanguages", description = "Generate Language info charts") 85 public class ShowLanguages { 86 private static final boolean SHOW_NATIVE = true; 87 88 static Comparator col = new org.unicode.cldr.util.MultiComparator( 89 Collator.getInstance(new ULocale("en")), 90 new UTF16.StringComparator(true, false, 0)); 91 92 static StandardCodes sc = StandardCodes.make(); 93 94 static Factory cldrFactory = CLDRConfig.getInstance().getCldrFactory();//.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 95 static CLDRFile english = CLDRConfig.getInstance().getEnglish(); 96 main(String[] args)97 public static void main(String[] args) throws IOException { 98 System.out.println("Writing into " + FormattedFileWriter.CHART_TARGET_DIR); 99 FileCopier.ensureDirectoryExists(FormattedFileWriter.CHART_TARGET_DIR); 100 FileCopier.copy(ShowLanguages.class, "index.css", FormattedFileWriter.CHART_TARGET_DIR); 101 FormattedFileWriter.copyIncludeHtmls(FormattedFileWriter.CHART_TARGET_DIR); 102 103 StringWriter sw = printLanguageData(cldrFactory, "index.html"); 104 writeSupplementalIndex("index.html", sw); 105 106 // cldrFactory = Factory.make(Utility.COMMON_DIRECTORY + "../dropbox/extra2/", ".*"); 107 // printLanguageData(cldrFactory, "language_info2.txt"); 108 System.out.println("Done - wrote into " + FormattedFileWriter.CHART_TARGET_DIR); 109 } 110 111 /** 112 * 113 */ 114 public static FormattedFileWriter.Anchors SUPPLEMENTAL_INDEX_ANCHORS = new FormattedFileWriter.Anchors(); 115 116 static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo 117 .getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 118 printLanguageData(Factory cldrFactory, String filename)119 private static StringWriter printLanguageData(Factory cldrFactory, String filename) throws IOException { 120 StringWriter sw = new StringWriter(); 121 PrintWriter pw = new PrintWriter(sw); 122 123 new ChartDtdDelta().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 124 ShowLocaleCoverage.showCoverage(SUPPLEMENTAL_INDEX_ANCHORS, null); 125 126 new ChartDayPeriods().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 127 new ChartLanguageMatching().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 128 new ChartLanguageGroups().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 129 new ChartSubdivisions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 130 if (ToolConstants.CHART_VERSION.compareTo("37") >= 0) { 131 new ChartUnitConversions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 132 new ChartUnitPreferences().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 133 } 134 // since we don't want these listed on the supplemental page, use null 135 136 new ShowPlurals().printPlurals(english, null, pw, cldrFactory); 137 138 LanguageInfo linfo = new LanguageInfo(cldrFactory); 139 140 linfo.showCoverageGoals(pw); 141 142 linfo.printLikelySubtags(pw); 143 144 linfo.showCountryLanguageInfo(pw); 145 146 linfo.showLanguageCountryInfo(pw); 147 148 // linfo.showTerritoryInfo(); 149 // linfo.printCountryData(pw); 150 151 // linfo.printDeprecatedItems(pw); 152 153 // PrintWriter pw1 = new PrintWriter(new FormattedFileWriter(pw, "Languages and Territories", null)); 154 // pw1.println("<tr><th>Language \u2192 Territories"); 155 // pw1.println("</th><th>Territory \u2192 Language"); 156 // pw1.println("</th><th>Territories Not Represented"); 157 // pw1.println("</th><th>Languages Not Represented"); 158 // pw1.println("</th></tr>"); 159 // 160 // pw1.println("<tr><td>"); 161 // linfo.print(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 162 // pw1.println("</td><td>"); 163 // linfo.print(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.LANGUAGE_NAME); 164 // pw1.println("</td><td>"); 165 // linfo.printMissing(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.TERRITORY_NAME); 166 // pw1.println("</td><td>"); 167 // linfo.printMissing(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 168 // pw1.println("</td></tr>"); 169 // 170 // pw1.close(); 171 172 printLanguageScript(linfo, pw); 173 printScriptLanguageTerritory(linfo, pw); 174 175 linfo.showCorrespondances(); 176 177 // linfo.showCalendarData(pw); 178 179 linfo.showCountryInfo(pw); 180 linfo.printCurrency(pw); 181 linfo.printContains(pw); 182 183 linfo.printWindows_Tzid(pw); 184 linfo.printAliases(pw); 185 186 linfo.printCharacters(pw); 187 188 pw.close(); 189 190 return sw; 191 } 192 writeSupplementalIndex(String filename, StringWriter sw)193 private static void writeSupplementalIndex(String filename, StringWriter sw) throws IOException { 194 String[] replacements = { 195 "%date%", CldrUtility.isoFormatDateOnly(new Date()), 196 "%contents%", SUPPLEMENTAL_INDEX_ANCHORS.toString(), 197 "%data%", sw.toString(), 198 "%index%", "../index.html" }; 199 PrintWriter pw2 = org.unicode.cldr.draft.FileUtilities.openUTF8Writer(FormattedFileWriter.CHART_TARGET_DIR, filename); 200 FileUtilities.appendFile(ShowLanguages.class, "supplemental.html", replacements, pw2); 201 pw2.close(); 202 } 203 printLanguageScript(LanguageInfo linfo, PrintWriter pw)204 private static void printLanguageScript(LanguageInfo linfo, PrintWriter pw) throws IOException { 205 PrintWriter pw1; 206 TablePrinter tablePrinter = new TablePrinter() 207 .addColumn("Language", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0) 208 .setBreakSpans(true) 209 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 210 .setSpanRows(true) 211 .addColumn("ML", "class='target' title='modern language'", null, "class='target'", true).setSpanRows(true) 212 .setSortPriority(1) 213 .addColumn("P", "class='target' title='primary'", null, "class='target'", true).setSortPriority(3) 214 .addColumn("Script", "class='target'", null, "class='target'", true).setSortPriority(3) 215 .addColumn("Code", "class='target'", null, "class='target'", true) 216 .addColumn("MS", "class='target' title='modern script'", null, "class='target'", true).setSortPriority(2); 217 218 TablePrinter tablePrinter2 = new TablePrinter() 219 .addColumn("Script", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0) 220 .setBreakSpans(true) 221 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 222 .setSpanRows(true) 223 .addColumn("MS", "class='target' title='modern script'", null, "class='target'", true).setSpanRows(true) 224 .setSortPriority(1) 225 .addColumn("Language", "class='target'", null, "class='target'", true).setSortPriority(3) 226 .addColumn("Code", "class='target'", null, "class='target'", true) 227 .addColumn("ML", "class='target' title='modern language'", null, "class='target'", true).setSortPriority(2) 228 .addColumn("P", "class='target' title='primary'", null, "class='target'", true).setSortPriority(3); 229 230 // get the codes so we can show the remainder 231 Set<String> remainingScripts = new TreeSet<>(getScriptsToShow()); // StandardCodes.MODERN_SCRIPTS); 232 UnicodeSet temp = new UnicodeSet(); 233 for (String script : getScriptsToShow()) { 234 temp.clear(); 235 try { 236 temp.applyPropertyAlias("script", script); 237 } catch (RuntimeException e) { 238 } // fall through 239 if (temp.size() == 0) { 240 remainingScripts.remove(script); 241 System.out.println("Removing: " + script); 242 } else { 243 System.out.println("Keeping: " + script); 244 } 245 } 246 remainingScripts.remove("Brai"); 247 remainingScripts.remove("Hira"); 248 remainingScripts.remove("Qaai"); 249 remainingScripts.remove("Hrkt"); 250 remainingScripts.remove("Zzzz"); 251 remainingScripts.remove("Zyyy"); 252 253 Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow()); 254 for (String language : getLanguagesToShow()) { 255 Scope s = Iso639Data.getScope(language); 256 Type t = Iso639Data.getType(language); 257 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 258 remainingLanguages.remove(language); 259 } 260 } 261 262 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 263 for (String language : languages) { 264 Set<BasicLanguageData> basicLanguageData = supplementalDataInfo.getBasicLanguageData(language); 265 for (BasicLanguageData basicData : basicLanguageData) { 266 String secondary = isOfficial(language) // basicData.getType() == BasicLanguageData.Type.primary 267 ? "\u00A0" 268 : "N"; 269 for (String script : basicData.getScripts()) { 270 addLanguageScriptCells(tablePrinter, tablePrinter2, language, script, secondary); 271 remainingScripts.remove(script); 272 remainingLanguages.remove(language); 273 } 274 } 275 } 276 for (String language : remainingLanguages) { 277 addLanguageScriptCells(tablePrinter, tablePrinter2, language, "Zzzz", "?"); 278 } 279 for (String script : remainingScripts) { 280 addLanguageScriptCells(tablePrinter, tablePrinter2, "und", script, "?"); 281 } 282 283 pw1 = new PrintWriter(new FormattedFileWriter(null, "Languages and Scripts", null, SUPPLEMENTAL_INDEX_ANCHORS)); 284 pw1.println(tablePrinter.toTable()); 285 pw1.close(); 286 287 pw1 = new PrintWriter(new FormattedFileWriter(null, "Scripts and Languages", null, SUPPLEMENTAL_INDEX_ANCHORS)); 288 pw1.println(tablePrinter2.toTable()); 289 pw1.close(); 290 291 } 292 293 static final Map<String, OfficialStatus> languageToBestStatus = new HashMap<>(); 294 static { 295 for (String language : supplementalDataInfo.getLanguagesForTerritoriesPopulationData()) { 296 Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language); 297 if (territories == null) { 298 continue; 299 } 300 int underbar = language.indexOf('_'); 301 String base = underbar < 0 ? null : language.substring(0, underbar); 302 303 for (String territory : territories) { 304 PopulationData data = supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory); 305 OfficialStatus status = data.getOfficialStatus(); 306 OfficialStatus old; 307 old = languageToBestStatus.get(language); 308 if (old == null || status.compareTo(old) > 0) { 309 languageToBestStatus.put(language, status); 310 } 311 if (base != null) { 312 old = languageToBestStatus.get(base); 313 if (old == null || status.compareTo(old) > 0) { 314 languageToBestStatus.put(base, status); 315 } 316 } 317 } 318 } 319 } 320 321 private static boolean isOfficial(String language) { 322 OfficialStatus status = languageToBestStatus.get(language); 323 if (status != null && status.isMajor()) { 324 return true; 325 } 326 int underbar = language.indexOf('_'); 327 if (underbar < 0) { 328 return false; 329 } 330 return isOfficial(language.substring(0, underbar)); 331 } 332 333 private static Set<String> getLanguagesToShow() { 334 return getEnglishTypes("language", CLDRFile.LANGUAGE_NAME); 335 } 336 337 private static Set<String> getEnglishTypes(String type, int code) { 338 Set<String> result = new HashSet<>(sc.getSurveyToolDisplayCodes(type)); 339 for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext();) { 340 XPathParts parts = XPathParts.getFrozenInstance(it.next()); 341 String newType = parts.getAttributeValue(-1, "type"); 342 if (!result.contains(newType)) { 343 result.add(newType); 344 } 345 } 346 return result; 347 } 348 349 private static Set<String> getScriptsToShow() { 350 return getEnglishTypes("script", CLDRFile.SCRIPT_NAME); 351 } 352 353 private static void printScriptLanguageTerritory(LanguageInfo linfo, PrintWriter pw) throws IOException { 354 PrintWriter pw1; 355 TablePrinter tablePrinter2 = new TablePrinter() 356 .addColumn("Sample Char", "class='source'", null, "class='source sample'", true).setSpanRows(true) 357 .addColumn("Script", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0) 358 .setBreakSpans(true) 359 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 360 .setSpanRows(true) 361 .addColumn("T", "class='target'", null, "class='target'", true).setSortPriority(1) 362 .addColumn("Language", "class='target'", null, "class='target'", true).setSortPriority(2) 363 .addColumn("Native", "class='target'", null, "class='target'", true) 364 .addColumn("Code", "class='target'", null, "class='target'", true) 365 .addColumn("T", "class='target'", null, "class='target'", true).setSortPriority(3) 366 .addColumn("Territory", "class='target'", null, "class='target'", true).setSortPriority(4) 367 .addColumn("Native", "class='target'", null, "class='target'", true) 368 .addColumn("Code", "class='target'", null, "class='target'", true); 369 370 // get the codes so we can show the remainder 371 Set<String> remainingScripts = new TreeSet<>(getScriptsToShow()); 372 Set<String> remainingTerritories = new TreeSet<>(sc.getGoodAvailableCodes("territory")); 373 UnicodeSet temp = new UnicodeSet(); 374 for (String script : getScriptsToShow()) { 375 temp.clear(); 376 try { 377 temp.applyPropertyAlias("script", script); 378 } catch (RuntimeException e) { 379 } // fall through 380 if (temp.size() == 0) { 381 remainingScripts.remove(script); 382 System.out.println("Removing: " + script); 383 } else { 384 System.out.println("Keeping: " + script); 385 } 386 } 387 remainingScripts.remove("Brai"); 388 remainingScripts.remove("Hira"); 389 remainingScripts.remove("Qaai"); 390 remainingScripts.remove("Hrkt"); 391 remainingScripts.remove("Zzzz"); 392 remainingScripts.remove("Zyyy"); 393 394 Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow()); 395 for (String language : getLanguagesToShow()) { 396 Scope s = Iso639Data.getScope(language); 397 Type t = Iso639Data.getType(language); 398 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 399 remainingLanguages.remove(language); 400 } 401 } 402 403 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 404 for (String language : languages) { 405 Set<BasicLanguageData> basicLanguageData = supplementalDataInfo.getBasicLanguageData(language); 406 for (BasicLanguageData basicData : basicLanguageData) { 407 if (basicData.getType() != BasicLanguageData.Type.primary) { 408 continue; 409 } 410 Set<String> mainTerritories = getTerritories(language); 411 if (mainTerritories.size() == 0) { 412 continue; 413 // mainTerritories.add("ZZ"); 414 } 415 416 TreeSet<String> mainScripts = new TreeSet<>(basicData.getScripts()); 417 if (mainScripts.size() == 0) { 418 continue; 419 } 420 for (String script : mainScripts) { 421 for (String territory : mainTerritories) { 422 addLanguageScriptCells2(tablePrinter2, language, script, territory); 423 remainingTerritories.remove(territory); 424 } 425 remainingScripts.remove(script); 426 } 427 } 428 remainingLanguages.remove(language); 429 } 430 // for (String language : remainingLanguages) { 431 // addLanguageScriptCells2( tablePrinter2, language, "Zzzz", "ZZ"); 432 // } 433 // for (String script : remainingScripts) { 434 // addLanguageScriptCells2( tablePrinter2, "und", script, "ZZ"); 435 // } 436 // for (String territory : remainingTerritories) { 437 // addLanguageScriptCells2( tablePrinter2, "und", "Zzzz", territory); 438 // } 439 440 pw1 = new PrintWriter(new FormattedFileWriter(null, "Scripts, Languages, and Territories", null, SUPPLEMENTAL_INDEX_ANCHORS)); 441 pw1.println(tablePrinter2.toTable()); 442 pw1.close(); 443 } 444 445 private static Relation<String, String> territoryFix; 446 getTerritories(String language)447 private static Set<String> getTerritories(String language) { 448 if (territoryFix == null) { // set up the data 449 initTerritoryFix(); 450 } 451 Set<String> territories = territoryFix.getAll(language); 452 if (territories == null) { 453 territories = new TreeSet<>(); 454 } 455 return territories; 456 } 457 initTerritoryFix()458 private static void initTerritoryFix() { 459 territoryFix = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 460 Set<String> languages = supplementalDataInfo.getLanguages(); 461 LanguageTagParser ltp = new LanguageTagParser(); 462 for (String language2 : languages) { 463 if (language2.contains("_")) { 464 ltp.set(language2).getLanguage(); 465 addOfficialTerritory(ltp, language2, ltp.getLanguage()); 466 } else { 467 addOfficialTerritory(ltp, language2, language2); 468 } 469 } 470 } 471 addOfficialTerritory(LanguageTagParser ltp, String language, String baseLanguage)472 private static void addOfficialTerritory(LanguageTagParser ltp, String language, String baseLanguage) { 473 // territoryFix.putAll(baseLanguage, supplementalDataInfo.getTerritoriesForPopulationData(language)); 474 Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language); 475 if (territories == null) { 476 return; 477 } 478 for (String territory : territories) { 479 PopulationData data = supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory); 480 OfficialStatus status = data.getOfficialStatus(); 481 if (status.isMajor()) { 482 territoryFix.put(baseLanguage, territory); 483 System.out.println("\tAdding\t" + baseLanguage + "\t" + territory + "\t" + language); 484 } 485 } 486 } 487 addLanguageScriptCells2(TablePrinter tablePrinter2, String language, String script, String territory)488 private static void addLanguageScriptCells2(TablePrinter tablePrinter2, String language, String script, 489 String territory) { 490 CLDRFile nativeLanguage = null; 491 if (SHOW_NATIVE) { 492 try { 493 nativeLanguage = cldrFactory.make(language + "_" + script + "_" + territory, true); 494 } catch (RuntimeException e) { 495 try { 496 nativeLanguage = cldrFactory.make(language + "_" + script, true); 497 } catch (RuntimeException e2) { 498 try { 499 nativeLanguage = cldrFactory.make(language, true); 500 } catch (RuntimeException e3) { 501 } 502 } 503 } 504 // check for overlap 505 if (nativeLanguage != null && !script.equals("Jpan") && !script.equals("Hans") && !script.equals("Hant")) { 506 UnicodeSet scriptSet; 507 try { 508 String tempScript = script.equals("Kore") ? "Hang" : script; 509 scriptSet = new UnicodeSet("[:script=" + tempScript + ":]"); 510 } catch (RuntimeException e) { 511 scriptSet = new UnicodeSet(); 512 } 513 UnicodeSet exemplars = nativeLanguage.getExemplarSet("", WinningChoice.WINNING); 514 if (scriptSet.containsNone(exemplars)) { 515 System.out.println("Skipping CLDR file -- exemplars differ: " + language + "\t" 516 + nativeLanguage.getLocaleID() + "\t" + scriptSet + "\t" + exemplars); 517 nativeLanguage = null; 518 } 519 } 520 } 521 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 522 if (languageName == null) languageName = "???"; 523 String isLanguageTranslated = ""; 524 String nativeLanguageName = nativeLanguage == null ? null : nativeLanguage.getName(CLDRFile.LANGUAGE_NAME, 525 language); 526 if (nativeLanguageName == null || nativeLanguageName.equals(language)) { 527 nativeLanguageName = "<i>n/a</i>"; 528 isLanguageTranslated = "n"; 529 } 530 531 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 532 // String nativeScriptName = nativeLanguage == null ? null : 533 // nativeLanguage.getName(CLDRFile.SCRIPT_NAME,script); 534 // if (nativeScriptName != null && !nativeScriptName.equals(script)) { 535 // scriptName = nativeScriptName + "[" + scriptName + "]"; 536 // } 537 538 String isTerritoryTranslated = ""; 539 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territory); 540 String nativeTerritoryName = nativeLanguage == null ? null : nativeLanguage.getName(CLDRFile.TERRITORY_NAME, 541 territory); 542 if (nativeTerritoryName == null || nativeTerritoryName.equals(territory)) { 543 nativeTerritoryName = "<i>n/a</i>"; 544 isTerritoryTranslated = "n"; 545 } 546 547 // Type t = Iso639Data.getType(language); 548 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == Type.Living) { 549 // // ok 550 // } else if (!language.equals("und")){ 551 // scriptModern = "N"; 552 // } 553 //String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : ""; 554 555 Info scriptMetatdata = ScriptMetadata.getInfo(script); 556 tablePrinter2.addRow() 557 .addCell(scriptMetatdata.sampleChar) 558 .addCell(scriptName) 559 .addCell(script) 560 .addCell(isLanguageTranslated) 561 .addCell(languageName) 562 .addCell(nativeLanguageName) 563 .addCell(language) 564 .addCell(isTerritoryTranslated) 565 .addCell(territoryName) 566 .addCell(nativeTerritoryName) 567 .addCell(territory) 568 .finishRow(); 569 } 570 571 static ImmutableMap<String, String> fixScriptGif = ImmutableMap.<String, String>builder() 572 .put("hangul", "hangulsyllables") 573 .put("japanese", "hiragana") 574 .put("unknown or invalid script", "unknown") 575 .put("Hant", "Hant") 576 .put("Hans", "Hans") 577 .build(); 578 getGifName(String script)579 private static String getGifName(String script) { 580 String temp = fixScriptGif.get(script); 581 if (temp != null) { 582 return temp; 583 } 584 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 585 scriptName = scriptName.toLowerCase(Locale.ENGLISH); 586 temp = fixScriptGif.get(scriptName); 587 if (temp != null) { 588 return temp; 589 } 590 return scriptName; 591 } 592 593 private static Set<Type> oldLanguage = Collections.unmodifiableSet(EnumSet.of(Type.Ancient, Type.Extinct, 594 Type.Historical, Type.Constructed)); 595 addLanguageScriptCells(TablePrinter tablePrinter, TablePrinter tablePrinter2, String language, String script, String secondary)596 private static void addLanguageScriptCells(TablePrinter tablePrinter, TablePrinter tablePrinter2, String language, 597 String script, String secondary) { 598 try { 599 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 600 if (languageName == null) { 601 languageName = "¿" + language + "?"; 602 System.err.println("No English Language Name for:" + language); 603 } 604 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 605 if (scriptName == null) { 606 scriptName = "¿" + script + "?"; 607 System.err.println("No English Language Name for:" + script); 608 } 609 String scriptModern = StandardCodes.isScriptModern(script) ? "" : script.equals("Zzzz") ? "n/a" : "N"; 610 //Scope s = Iso639Data.getScope(language); 611 Type t = Iso639Data.getType(language); 612 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == Type.Living) { 613 // // ok 614 // } else if (!language.equals("und")){ 615 // scriptModern = "N"; 616 // } 617 String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : ""; 618 619 tablePrinter.addRow() 620 .addCell(languageName) 621 .addCell(language) 622 .addCell(languageModern) 623 .addCell(secondary) 624 .addCell(scriptName) 625 .addCell(script) 626 .addCell(scriptModern) 627 .finishRow(); 628 629 tablePrinter2.addRow() 630 .addCell(scriptName) 631 .addCell(script) 632 .addCell(scriptModern) 633 .addCell(languageName) 634 .addCell(language) 635 .addCell(languageModern) 636 .addCell(secondary) 637 .finishRow(); 638 } catch (RuntimeException e) { 639 throw e; 640 } 641 } 642 643 static class LanguageInfo { 644 private static final Map<String, Map<String, String>> localeAliasInfo = new TreeMap<>(); 645 646 Multimap<String, String> language_scripts = TreeMultimap.create(); 647 648 Multimap<String, String> language_territories = TreeMultimap.create(); 649 650 List<Map<String, String>> deprecatedItems = new ArrayList<>(); 651 652 Multimap<String, String> territory_languages; 653 654 Multimap<String, String> script_languages; 655 656 //Map group_contains = new TreeMap(); 657 658 Set<String[]> aliases = new TreeSet<String[]>(new ArrayComparator(new Comparator[] { new UTF16.StringComparator(), col })); 659 660 Comparator col3 = new ArrayComparator(new Comparator[] { col, col, col }); 661 662 Map<String, String> currency_fractions = new TreeMap<String, String>(col); 663 664 Map<String, Set> currency_territory = new TreeMap<String, Set>(col); 665 666 Map<String, Set> territory_currency = new TreeMap<String, Set>(col); 667 668 Set<String> territoriesWithCurrencies = new TreeSet<>(); 669 670 Set<String> currenciesWithTerritories = new TreeSet<>(); 671 672 Map<String, Map<String, Set<String>>> territoryData = new TreeMap<>(); 673 674 Set<String> territoryTypes = new TreeSet<>(); 675 676 Map<String, LinkedHashSet<String>> charSubstitutions = new TreeMap<String, LinkedHashSet<String>>(col); 677 678 String defaultDigits = null; 679 680 Map<String, Map<String, Object>> territoryLanguageData = new TreeMap<>(); 681 682 private Relation<String, String> territoriesToModernCurrencies = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, 683 null); 684 LanguageInfo(Factory cldrFactory)685 public LanguageInfo(Factory cldrFactory) throws IOException { 686 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 687 for (Iterator<String> it = supp.iterator(); it.hasNext();) { 688 String path = it.next(); 689 String fullPath = supp.getFullXPath(path); 690 if (fullPath == null) { 691 supp.getFullXPath(path); 692 } 693 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 694 695 // <zoneItem type="America/Adak" territory="US" aliases="America/Atka US/Aleutian"/> 696 if (path.indexOf("/zoneItem") >= 0) { 697 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 698 String type = attributes.get("type"); 699 //String territory = attributes.get("territory"); 700 String aliasAttributes = attributes.get("aliases"); 701 if (aliasAttributes != null) { 702 String[] aliasesList = aliasAttributes.split("\\s+"); 703 704 for (int i = 0; i < aliasesList.length; ++i) { 705 String alias = aliasesList[i]; 706 aliases.add(new String[] { "timezone", alias, type }); 707 } 708 } 709 // TODO territory, multizone 710 continue; 711 } 712 713 if (path.indexOf("/currencyData") >= 0) { 714 if (path.indexOf("/fractions") >= 0) { 715 // <info iso4217="ADP" digits="0" rounding="0"/> 716 String element = parts.getElement(parts.size() - 1); 717 if (!element.equals("info")) 718 throw new IllegalArgumentException("Unexpected fractions element: " + element); 719 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 720 String iso4217 = attributes.get("iso4217"); 721 String digits = attributes.get("digits"); 722 String rounding = attributes.get("rounding"); 723 digits = digits + (rounding.equals("0") ? "" : " (" + rounding + ")"); 724 if (iso4217.equals("DEFAULT")) 725 defaultDigits = digits; 726 else 727 currency_fractions.put(getName(CLDRFile.CURRENCY_NAME, iso4217, false), digits); 728 continue; 729 } 730 // <region iso3166="AR"> 731 // <currency iso4217="ARS" from="1992-01-01"/> 732 if (path.indexOf("/region") >= 0) { 733 Map<String, String> attributes = parts.getAttributes(parts.size() - 2); 734 String iso3166 = attributes.get("iso3166"); 735 attributes = parts.getAttributes(parts.size() - 1); 736 String iso4217 = attributes.get("iso4217"); 737 String to = attributes.get("to"); 738 if (to == null) 739 to = "\u221E"; 740 String from = attributes.get("from"); 741 if (from == null) 742 from = "-\u221E"; 743 String countryName = getName(CLDRFile.TERRITORY_NAME, iso3166, false); 744 String currencyName = getName(CLDRFile.CURRENCY_NAME, iso4217, false); 745 Set info = territory_currency.get(countryName); 746 if (info == null) 747 territory_currency.put(countryName, info = new TreeSet(col3)); 748 info.add(new String[] { from, to, currencyName }); 749 info = currency_territory.get(currencyName); 750 if (info == null) 751 currency_territory.put(currencyName, info = new TreeSet(col)); 752 territoriesWithCurrencies.add(iso3166); 753 currenciesWithTerritories.add(iso4217); 754 if (to.equals("\u221E") || to.compareTo("2006") > 0) { 755 territoriesToModernCurrencies.put(iso3166, iso4217); 756 info.add("<b>" + countryName + "</b>"); 757 758 } else { 759 info.add("<i>" + countryName + "</i>"); 760 761 } 762 continue; 763 } 764 } 765 766 if (path.indexOf("/languageData") >= 0) { 767 Map<String, String> attributes = parts.findAttributes("language"); 768 String language = attributes.get("type"); 769 String alt = attributes.get("alt"); 770 addTokens(language, attributes.get("scripts"), " ", language_scripts); 771 // mark the territories 772 if (alt == null) 773 ; // nothing 774 else if ("secondary".equals(alt)) 775 language += "*"; 776 else 777 language += "*" + alt; 778 // <language type="af" scripts="Latn" territories="ZA"/> 779 addTokens(language, attributes.get("territories"), " ", language_territories); 780 continue; 781 } 782 783 if (path.indexOf("/deprecatedItems") >= 0) { 784 deprecatedItems.add(parts.findAttributes("deprecatedItems")); 785 continue; 786 } 787 if (path.indexOf("/calendarData") >= 0) { 788 Map<String, String> attributes = parts.findAttributes("calendar"); 789 if (attributes == null) { 790 System.err.println("Err: on path " + fullPath 791 + " , no attributes on 'calendar'. Probably, this tool is out of date."); 792 } else { 793 String type = attributes.get("type"); 794 String territories = attributes.get("territories"); 795 if (territories == null) { 796 System.err.println("Err: on path " + fullPath 797 + ", missing territories. Probably, this tool is out of date."); 798 } else if (type == null) { 799 System.err.println("Err: on path " + fullPath 800 + ", missing type. Probably, this tool is out of date."); 801 } else { 802 addTerritoryInfo(territories, "calendar", type); 803 } 804 } 805 } 806 if (path.indexOf("/weekData") >= 0 || path.indexOf("measurementData") >= 0) { 807 String element = parts.getElement(parts.size() - 1); 808 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 809 // later, make this a table 810 String key = "count"; 811 String display = "Days in week (min)"; 812 boolean useTerritory = true; 813 switch (element) { 814 case "firstDay": 815 key = "day"; 816 display = "First day of week"; 817 break; 818 case "weekendStart": 819 key = "day"; 820 display = "First day of weekend"; 821 break; 822 case "weekendEnd": 823 key = "day"; 824 display = "Last day of weekend"; 825 break; 826 case "measurementSystem": 827 // <measurementSystem type="metric" territories="001"/> 828 key = "type"; 829 display = "Meas. system"; 830 break; 831 case "paperSize": 832 key = "type"; 833 display = "Paper Size"; 834 break; 835 case "weekOfPreference": 836 useTerritory = false; 837 break; 838 } 839 if (useTerritory) { 840 String type = attributes.get(key); 841 String territories = attributes.get("territories"); 842 addTerritoryInfo(territories, display, type); 843 } 844 } 845 if (path.indexOf("/generation") >= 0 || path.indexOf("/version") >= 0) 846 continue; 847 System.out.println("Skipped Element: " + path); 848 } 849 850 for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) { 851 for (String language : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territory)) { 852 language_territories.put(language, territory); 853 } 854 } 855 territory_languages = Multimaps.invertFrom(language_territories, TreeMultimap.create()); 856 script_languages = Multimaps.invertFrom(language_scripts, TreeMultimap.create()); 857 858 // now get some metadata 859 localeAliasInfo.put("language", new TreeMap<String, String>()); 860 localeAliasInfo.put("script", new TreeMap<String, String>()); 861 localeAliasInfo.put("territory", new TreeMap<String, String>()); 862 localeAliasInfo.put("variant", new TreeMap<String, String>()); 863 localeAliasInfo.put("zone", new TreeMap<String, String>()); 864 localeAliasInfo.put("subdivision", new TreeMap<String, String>()); 865 localeAliasInfo.put("unit", new TreeMap<String, String>()); 866 localeAliasInfo.put("usage", new TreeMap<String, String>()); 867 868 //localeAliasInfo.get("language").put("nb", "no"); 869 localeAliasInfo.get("language").put("zh_CN", "zh_Hans_CN"); 870 localeAliasInfo.get("language").put("zh_SG", "zh_Hans_SG"); 871 localeAliasInfo.get("language").put("zh_TW", "zh_Hant_TW"); 872 localeAliasInfo.get("language").put("zh_MO", "zh_Hant_MO"); 873 localeAliasInfo.get("language").put("zh_HK", "zh_Hant_HK"); 874 875 // CLDRFile supp2 = cldrFactory.make(CLDRFile.SUPPLEMENTAL_METADATA, false); 876 Map<String, Map<String, R2<List<String>, String>>> localeAliasInfo2 = supplementalDataInfo 877 .getLocaleAliasInfo(); 878 for (Entry<String, Map<String, R2<List<String>, String>>> entry1 : localeAliasInfo2.entrySet()) { 879 String element = entry1.getKey(); 880 for (Entry<String, R2<List<String>, String>> entry2 : entry1.getValue().entrySet()) { 881 String type = entry2.getKey(); 882 R2<List<String>, String> replacementReason = entry2.getValue(); 883 List<String> replacementList = replacementReason.get0(); 884 String replacement = replacementList == null ? null : 885 Joiner.on(" ").join(replacementList); 886 String reason = replacementReason.get1(); 887 if (element.equals("timezone")) { 888 element = "zone"; 889 } 890 try { 891 localeAliasInfo.get(element).put(type, replacement == null ? "?" : replacement); 892 } catch (Exception e) { 893 // TODO Auto-generated catch block 894 throw new IllegalArgumentException("Can't find alias data for '" + element + "'", e); 895 } 896 897 String name = ""; 898 if (replacement == null) { 899 name = "(none)"; 900 } else if (element.equals("language")) { 901 name = getName(replacement, false); 902 } else if (element.equals("zone")) { 903 element = "timezone"; 904 name = replacement + "*"; 905 } else { 906 int typeCode = CLDRFile.typeNameToCode(element); 907 if (typeCode >= 0) { 908 name = getName(typeCode, replacement, false); 909 } else { 910 name = "*" + replacement; 911 } 912 } 913 if (element.equals("territory")) { 914 territoryAliases.put(type, name); 915 aliases 916 .add(new String[] { element, getName(CLDRFile.TERRITORY_NAME, type, false), name, reason }); 917 } else { 918 aliases.add(new String[] { element, type, name, reason }); 919 } 920 continue; 921 } 922 } 923 Log.setLog(CLDRPaths.CHART_DIRECTORY + "supplemental/", "characterLog.txt"); 924 Log.close(); 925 } 926 printLikelySubtags(PrintWriter index)927 public void printLikelySubtags(PrintWriter index) throws IOException { 928 929 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Likely Subtags", null, SUPPLEMENTAL_INDEX_ANCHORS)); 930 931 TablePrinter tablePrinter = new TablePrinter() 932 .addColumn("Source Lang", "class='source'", null, "class='source'", true).setSortPriority(1) 933 .setSpanRows(false) 934 .addColumn("Source Script", "class='source'", null, "class='source'", true).setSortPriority(0) 935 .setSpanRows(false).setBreakSpans(true) 936 .addColumn("Source Region", "class='source'", null, "class='source'", true).setSortPriority(2) 937 .setSpanRows(false) 938 .addColumn("Target Lang", "class='target'", null, "class='target'", true).setSortPriority(3) 939 .setBreakSpans(true) 940 .addColumn("Target Script", "class='target'", null, "class='target'", true).setSortPriority(4) 941 .addColumn("Target Region", "class='target'", null, "class='target'", true).setSortPriority(5) 942 .addColumn("Source ID", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 943 .addColumn("Target ID", "class='target'", null, "class='target'", true); 944 Map<String, String> subtags = supplementalDataInfo.getLikelySubtags(); 945 LanguageTagParser sourceParsed = new LanguageTagParser(); 946 LanguageTagParser targetParsed = new LanguageTagParser(); 947 for (String source : subtags.keySet()) { 948 String target = subtags.get(source); 949 sourceParsed.set(source); 950 targetParsed.set(target); 951 tablePrinter.addRow() 952 .addCell(getName(CLDRFile.LANGUAGE_NAME, sourceParsed.getLanguage())) 953 .addCell(getName(CLDRFile.SCRIPT_NAME, sourceParsed.getScript())) 954 .addCell(getName(CLDRFile.TERRITORY_NAME, sourceParsed.getRegion())) 955 .addCell(getName(CLDRFile.LANGUAGE_NAME, targetParsed.getLanguage())) 956 .addCell(getName(CLDRFile.SCRIPT_NAME, targetParsed.getScript())) 957 .addCell(getName(CLDRFile.TERRITORY_NAME, targetParsed.getRegion())) 958 .addCell(source) 959 .addCell(target) 960 .finishRow(); 961 } 962 pw.println(tablePrinter.toTable()); 963 pw.close(); 964 } 965 966 static class LanguageData extends R4<Double, Double, Double, String> { LanguageData(Double a, Double b, Double c, String d)967 public LanguageData(Double a, Double b, Double c, String d) { 968 super(a, b, c, d); 969 } 970 } 971 getName(final int type, final String value)972 private String getName(final int type, final String value) { 973 if (value == null || value.equals("") || value.equals("und")) { 974 return "\u00A0"; 975 } 976 String result = english.getName(type, value); 977 if (result == null) { 978 result = value; 979 } 980 return result; 981 } 982 983 static final Comparator INVERSE_COMPARABLE = new Comparator() { 984 @Override 985 public int compare(Object o1, Object o2) { 986 return ((Comparable) o2).compareTo(o1); 987 } 988 }; 989 990 // http://www.faqs.org/rfcs/rfc2396.html 991 // delims = "<" | ">" | "#" | "%" | <"> 992 // "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" 993 // Within a query component, the characters ";", "/", "?", ":", "@", 994 // "&", "=", "+", ",", and "$" are reserved. 995 static final UnicodeSet ESCAPED_URI_QUERY = new UnicodeSet( 996 "[\\u0000-\\u0020\\u007F <>#%\"\\{}|\\\\\\^\\[\\]`;/?:@\\&=+,$\\u0080-\\U0001FFFF]").freeze(); 997 998 private static final int MINIMAL_BIG_VENDOR = 8; 999 1000 static { System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement())1001 System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement()); 1002 } 1003 urlEncode(String input)1004 private String urlEncode(String input) { 1005 try { 1006 byte[] utf8 = input.getBytes("utf-8"); 1007 StringBuffer output = new StringBuffer(); 1008 for (int i = 0; i < utf8.length; ++i) { 1009 int b = utf8[i] & 0xFF; 1010 if (ESCAPED_URI_QUERY.contains(b)) { 1011 output.append('%'); 1012 if (b < 0x10) output.append('0'); 1013 output.append(Integer.toString(b, 16)); 1014 } else { 1015 output.append((char) b); 1016 } 1017 } 1018 return output.toString(); 1019 } catch (UnsupportedEncodingException e) { 1020 throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); 1021 } 1022 } 1023 addBug(int bugNumber, String text, String from, String subject, String body)1024 private String addBug(int bugNumber, String text, String from, String subject, String body) { 1025 return "<a target='_blank' href='" + CLDRURLS.CLDR_NEWTICKET_URL 1026 + "'>" + text + "</a>"; 1027 } 1028 showLanguageCountryInfo(PrintWriter pw)1029 private void showLanguageCountryInfo(PrintWriter pw) throws IOException { 1030 FormattedFileWriter ffw = new FormattedFileWriter(null, "Language-Territory Information", 1031 null 1032 // "<div style='margin:1em'><p>The language data is provided for localization testing, and is under development for CLDR 1.5. " 1033 // + 1034 // "To add a new territory for a language, see the <i>add new</i> links below. " + 1035 // "For more information, see <a href=\"territory_language_information.html\">Territory-Language Information.</a>" 1036 // + 1037 // "<p></div>" 1038 , SUPPLEMENTAL_INDEX_ANCHORS); 1039 PrintWriter pw21 = new PrintWriter(ffw); 1040 PrintWriter pw2 = pw21; 1041 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1042 nf.setGroupingUsed(true); 1043 //NumberFormat percent = new DecimalFormat("000.0%"); 1044 TablePrinter tablePrinter = new TablePrinter() 1045 // tablePrinter.setSortPriorities(0,5) 1046 .addColumn("L", "class='source'", null, "class='source'", true) 1047 .setSortPriority(0) 1048 .setBreakSpans(true) 1049 .setRepeatHeader(true) 1050 .setHidden(true) 1051 .addColumn("Language", "class='source'", null, "class='source'", true) 1052 .setSortPriority(0) 1053 .setBreakSpans(true) 1054 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 1055 // .addColumn("Report Bug", "class='target'", null, "class='target'", false) 1056 .addColumn("Territory", "class='target'", null, "class='target'", true) 1057 .addColumn("Code", "class='target'", "<a href=\"territory_language_information.html#{0}\">{0}</a>", 1058 "class='target'", true) 1059 .addColumn("Language Population", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1060 .setSortPriority(1).setSortAscending(false) 1061 // .addColumn("Territory Population", "class='target'", "{0,number,#,##0}", "class='targetRight'", true) 1062 // .addColumn("Language Literacy", "class='target'", "{0,number,00.0}%", "class='targetRight'", true) 1063 // .addColumn("Territory Literacy", "class='target'", "{0,number,00.0}%", "class='targetRight'", true) 1064 // .addColumn("Territory GDP (PPP)", "class='target'", "{0,number,#,##0}", "class='targetRight'", true) 1065 ; 1066 TreeSet<String> languages = new TreeSet<>(); 1067 Collection<Comparable[]> data = new ArrayList<>(); 1068 String msg = "<br><i>Please click on each country code</i>"; 1069 1070 Collection<Comparable[]> plainData = new ArrayList<>(); 1071 1072 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1073 // PopulationData territoryData = supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1074 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1075 for (String languageCode : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territoryCode)) { 1076 PopulationData languageData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1077 languages.add(languageCode); 1078 Comparable[] items = new Comparable[] { 1079 getFirstPrimaryWeight(getLanguageName(languageCode)), 1080 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode), 1081 languageCode, 1082 // bug, 1083 territoryName + getOfficialStatus(territoryCode, languageCode), 1084 territoryCode, 1085 languageData.getPopulation(), 1086 // population, 1087 // languageliteracy, 1088 // territoryLiteracy, 1089 // gdp 1090 }; 1091 Comparable[] plainItems = new Comparable[] { 1092 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode), 1093 languageCode, 1094 territoryName, 1095 territoryCode, 1096 getRawOfficialStatus(territoryCode, languageCode), 1097 languageData.getPopulation(), 1098 languageData.getLiteratePopulation() 1099 }; 1100 1101 data.add(items); 1102 plainData.add(plainItems); 1103 } 1104 } 1105 for (String languageCode : languages) { 1106 Comparable[] items = new Comparable[] { 1107 getFirstPrimaryWeight(getLanguageName(languageCode)), 1108 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode), 1109 languageCode, 1110 // bug, 1111 addBug(1217, "<i>add new</i>", "<email>", "Add territory to " + getLanguageName(languageCode) 1112 + " (" + languageCode + ")", "<territory, speaker population in territory, and references>"), 1113 "", 1114 0.0d, 1115 // 0.0d, 1116 // 0.0d, 1117 // 0.0d, 1118 // gdp 1119 }; 1120 data.add(items); 1121 } 1122 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1123 String value = tablePrinter.addRows(flattened).toTable(); 1124 pw2.println(value); 1125 pw2.close(); 1126 try (PrintWriter pw21plain = FileUtilities.openUTF8Writer(ffw.getDir(), ffw.getBaseFileName() + ".txt")) { 1127 for (Comparable[] row : plainData) { 1128 pw21plain.println(Joiner.on("\t").join(row)); 1129 } 1130 } 1131 } 1132 getLanguagePluralMessage(String msg, String languageCode)1133 private String getLanguagePluralMessage(String msg, String languageCode) { 1134 String mainLanguageCode = new LanguageTagParser().set(languageCode).getLanguage(); 1135 String messageWithPlurals = msg + ", on <a href='language_plural_rules.html#" + mainLanguageCode 1136 + "'>plurals</a>" + 1137 ", and on <a href='likely_subtags.html#" + mainLanguageCode + "'>likely-subtags</a>"; 1138 return messageWithPlurals; 1139 } 1140 getLanguageName(String languageCode)1141 private String getLanguageName(String languageCode) { 1142 String result = english.getName(languageCode); 1143 if (!result.equals(languageCode)) return result; 1144 Set<String> names = Iso639Data.getNames(languageCode); 1145 if (names != null && names.size() != 0) { 1146 return names.iterator().next(); 1147 } 1148 return languageCode; 1149 } 1150 showCoverageGoals(PrintWriter pw)1151 private void showCoverageGoals(PrintWriter pw) throws IOException { 1152 PrintWriter pw2 = new PrintWriter(new FormattedFileWriter(null, "Coverage Goals", 1153 null 1154 // "<p>" + 1155 // "The following show default coverage goals for larger organizations. " + 1156 // "<i>[n/a]</i> shows where there is no specific value for a given organization, " + 1157 // "while <i>(...)</i> indicates that the goal is inherited from the parent. " + 1158 // "A * is added if the goal differs from the parent locale's goal. " + 1159 // "For information on what these goals mean (comprehensive, modern, moderate,...), see the LDML specification " 1160 // + 1161 // "<a href='http://www.unicode.org/reports/tr35/#Coverage_Levels'>Appendix M: Coverage Levels</a>. " + 1162 // + 1163 // "</p>" 1164 , null)); 1165 1166 TablePrinter tablePrinter = new TablePrinter() 1167 // tablePrinter.setSortPriorities(0,4) 1168 .addColumn("Language", "class='source'", null, "class='source'", true) 1169 .setSortPriority(0) 1170 .setBreakSpans(true) 1171 .addColumn("Code", "class='source'", 1172 "<a href=\"http://www.unicode.org/cldr/data/common/main/{0}.xml\">{0}</a>", "class='source'", false); 1173 Map<Organization, Map<String, Level>> vendordata = sc.getLocaleTypes(); 1174 Set<String> locales = new TreeSet<>(); 1175 Set<Organization> vendors = new LinkedHashSet<>(); 1176 Set<Organization> smallVendors = new LinkedHashSet<>(); 1177 1178 for (Entry<Organization, Map<String, Level>> vendorData : vendordata.entrySet()) { 1179 Organization vendor = vendorData.getKey(); 1180 //if (vendor.equals(Organization.java)) continue; 1181 Map<String, Level> data = vendorData.getValue(); 1182 if (data.size() < MINIMAL_BIG_VENDOR) { 1183 smallVendors.add(vendor); 1184 continue; 1185 } 1186 vendors.add(vendor); 1187 tablePrinter.addColumn(vendor.getDisplayName(), "class='target'", null, "class='target'", false) 1188 .setSpanRows(true); 1189 locales.addAll(data.keySet()); 1190 } 1191 1192 Collection<Comparable[]> data = new ArrayList<>(); 1193 List<String> list = new ArrayList<>(); 1194 LanguageTagParser ltp = new LanguageTagParser(); 1195 //String alias2 = getAlias("sh_YU"); 1196 1197 for (String locale : locales) { 1198 list.clear(); 1199 String localeCode = locale.equals("*") ? "und" : locale; 1200 String alias = getAlias(localeCode); 1201 if (!alias.equals(localeCode)) { 1202 System.out.println("Should use canonical form: " + locale + " => " + alias); 1203 } 1204 String baseLang = ltp.set(localeCode).getLanguage(); 1205 String baseLangName = getLanguageName(baseLang); 1206 list.add("und".equals(localeCode) ? "other" : baseLangName); 1207 list.add(locale); 1208 for (Organization vendor : vendors) { 1209 String status = getVendorStatus(locale, vendor, vendordata); 1210 if (!baseLang.equals(locale) && !status.startsWith("<")) { 1211 String langStatus = getVendorStatus(baseLang, vendor, vendordata); 1212 if (!langStatus.equals(status)) { 1213 status += "*"; 1214 } 1215 } 1216 list.add(status); 1217 } 1218 data.add(list.toArray(new String[list.size()])); 1219 } 1220 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1221 String value = tablePrinter.addRows(flattened).toTable(); 1222 pw2.println(value); 1223 pw2.append("<h2>Others</h2><div align='left'><ul>"); 1224 for (Organization vendor2 : smallVendors) { 1225 pw2.append("<li><b>"); 1226 pw2.append(TransliteratorUtilities.toHTML.transform( 1227 vendor2.getDisplayName())).append(": </b>"); 1228 boolean first1 = true; 1229 for (Level level : Level.values()) { 1230 boolean first2 = true; 1231 Level other = null; 1232 for (Entry<String, Level> data2 : vendordata.get(vendor2).entrySet()) { 1233 String key = data2.getKey(); 1234 Level level2 = data2.getValue(); 1235 if (level != level2) { 1236 continue; 1237 } 1238 if (key.equals("*")) { 1239 other = level2; 1240 continue; 1241 } 1242 if (first2) { 1243 if (first1) { 1244 first1 = false; 1245 } else { 1246 pw2.append("; "); 1247 } 1248 pw2.append(level2.toString()).append(": "); 1249 first2 = false; 1250 } else { 1251 pw2.append(", "); 1252 } 1253 pw2.append(TransliteratorUtilities.toHTML.transform(key)); 1254 } 1255 if (other != null) { 1256 if (first2) { 1257 if (first1) { 1258 first1 = false; 1259 } else { 1260 pw2.append("; "); 1261 } 1262 pw2.append(level.toString()).append(": "); 1263 first2 = false; 1264 } else { 1265 pw2.append(", "); 1266 } 1267 pw2.append("<i>other</i>"); 1268 } 1269 } 1270 pw2.append("</li>"); 1271 } 1272 pw2.append("</ul></div>"); 1273 pw2.close(); 1274 } 1275 1276 LanguageTagParser lpt2 = new LanguageTagParser(); 1277 getAlias(String locale)1278 private String getAlias(String locale) { 1279 lpt2.set(locale); 1280 locale = lpt2.toString(); // normalize 1281 //String language = lpt2.getLanguage(); 1282 String script = lpt2.getScript(); 1283 String region = lpt2.getRegion(); 1284 // List variants = lpt2.getVariants(); 1285 String temp; 1286 for (String old : localeAliasInfo.get("language").keySet()) { 1287 if (locale.startsWith(old)) { 1288 // the above is a rough check, and will fail with old=moh and locale=mo 1289 if (!locale.equals(old) && !locale.startsWith(old + "_")) { 1290 continue; 1291 } 1292 temp = localeAliasInfo.get("language").get(old); 1293 lpt2.setLanguage(temp.split("\\s+")[0] + locale.substring(old.length())); 1294 break; 1295 } 1296 } 1297 temp = localeAliasInfo.get("script").get(script); 1298 if (temp != null) { 1299 lpt2.setScript(temp.split("\\s+")[0]); 1300 } 1301 temp = localeAliasInfo.get("territory").get(region); 1302 if (temp != null) { 1303 lpt2.setRegion(temp.split("\\s+")[0]); 1304 } 1305 return lpt2.toString(); 1306 } 1307 getVendorStatus(String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata)1308 private String getVendorStatus(String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata) { 1309 Level statusLevel = vendordata.get(vendor).get(locale); 1310 String status = statusLevel == null ? null : statusLevel.toString(); 1311 String curLocale = locale; 1312 while (status == null) { 1313 curLocale = LocaleIDParser.getParent(curLocale); 1314 if ("root".equals(curLocale)) { 1315 status = " "; 1316 break; 1317 } 1318 statusLevel = vendordata.get(vendor).get(curLocale); 1319 if (statusLevel != null) { 1320 status = "<i>(" + statusLevel + ")</i>"; 1321 } 1322 } 1323 return status; 1324 } 1325 showCountryLanguageInfo(PrintWriter pw)1326 private void showCountryLanguageInfo(PrintWriter pw) throws IOException { 1327 PrintWriter pw21 = new PrintWriter(new FormattedFileWriter(null, "Territory-Language Information", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1328 PrintWriter pw2 = pw21; 1329 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1330 nf.setGroupingUsed(true); 1331 //NumberFormat percent = new DecimalFormat("000.0%"); 1332 TablePrinter tablePrinter = new TablePrinter() 1333 // tablePrinter.setSortPriorities(0,4) 1334 .addColumn("T", "class='source'", null, "class='source'", true) 1335 .setSortPriority(0) 1336 .setBreakSpans(true) 1337 .setRepeatHeader(true) 1338 .setHidden(true) 1339 .addColumn("Territory", "class='source'", null, "class='source'", true) 1340 .setSortPriority(0) 1341 .setBreakSpans(true) 1342 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), 1343 "class='source'", true) 1344 .addColumn("Terr. Literacy", "class='target'", "{0,number,@@}%", "class='targetRight'", true); 1345 1346 tablePrinter 1347 .addColumn("Language", "class='target'", null, "class='target'", false) 1348 .addColumn("Code", "class='target'", "<a href=\"language_territory_information.html#{0}\">{0}</a>", 1349 "class='target'", false) 1350 .addColumn("Lang. Pop.", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1351 .addColumn("Pop.%", "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1352 .setSortAscending(false).setSortPriority(1) 1353 .addColumn("Literacy%", "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1354 .addColumn("Written%", "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1355 .addColumn("Report Bug", "class='target'", null, "class='target'", false); 1356 1357 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1358 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1359 PopulationData territoryData2 = supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1360 double territoryLiteracy = territoryData2.getLiteratePopulationPercent(); 1361 1362 for (String languageCode : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territoryCode)) { 1363 PopulationData languageData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1364 double languagePopulationPercent = 100 * languageData.getPopulation() / territoryData2.getPopulation(); 1365 double languageliteracy = languageData.getLiteratePopulationPercent(); 1366 double writingFrequency = languageData.getWritingPercent(); 1367 1368 tablePrinter.addRow() 1369 .addCell(getFirstPrimaryWeight(territoryName)) 1370 .addCell(territoryName) 1371 .addCell(territoryCode) 1372 .addCell(territoryLiteracy) 1373 .addCell(getLanguageName(languageCode) + getOfficialStatus(territoryCode, languageCode)) 1374 .addCell(languageCode) 1375 .addCell(languageData.getPopulation()) 1376 .addCell(languagePopulationPercent) 1377 .addCell(languageliteracy) 1378 .addCell(writingFrequency) 1379 .addCell( 1380 addBug(1217, "<i>bug</i>", "<email>", "Fix info for " + getLanguageName(languageCode) 1381 + " (" + languageCode + ")" 1382 + " in " + territoryName + " (" + territoryCode + ")", 1383 "<fixed data for territory, plus references>")) 1384 .finishRow(); 1385 } 1386 1387 tablePrinter.addRow() 1388 .addCell(getFirstPrimaryWeight(territoryName)) 1389 .addCell(territoryName) 1390 .addCell(territoryCode) 1391 .addCell(territoryLiteracy) 1392 .addCell( 1393 addBug(1217, "<i>add new</i>", "<email>", "Add language to " + territoryName + "(" 1394 + territoryCode + ")", 1395 "<language, speaker pop. and literacy in territory, plus references>")) 1396 .addCell("") 1397 .addCell(0.0d) 1398 .addCell(0.0d) 1399 .addCell(0.0d) 1400 .addCell(0.0d) 1401 .addCell("") 1402 .finishRow(); 1403 1404 } 1405 String value = tablePrinter.toTable(); 1406 pw2.println(value); 1407 pw2.close(); 1408 } 1409 showCountryInfo(PrintWriter pw)1410 private void showCountryInfo(PrintWriter pw) throws IOException { 1411 PrintWriter pw21 = new PrintWriter(new FormattedFileWriter(null, "Territory Information", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1412 PrintWriter pw2 = pw21; 1413 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1414 nf.setGroupingUsed(true); 1415 //NumberFormat percent = new DecimalFormat("000.0%"); 1416 TablePrinter tablePrinter = new TablePrinter() 1417 // tablePrinter.setSortPriorities(0,4) 1418 .addColumn("T", "class='source'", null, "class='source'", true) 1419 .setSortPriority(0) 1420 .setBreakSpans(true) 1421 .setRepeatHeader(true) 1422 .setHidden(true) 1423 .addColumn("Territory", "class='source'", null, "class='source'", true) 1424 .setSortPriority(0) 1425 .setBreakSpans(true) 1426 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), 1427 "class='source'", true) 1428 .addColumn("Terr. Pop (M)", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1429 .addColumn("Terr. GDP ($M PPP)", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1430 .addColumn("Currencies (2006...)", "class='target'", null, "class='target'", true); 1431 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext();) { 1432 String header = it.next(); 1433 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1434 tablePrinter.addColumn(header).setHeaderAttributes("class='target'") 1435 .setCellAttributes("class='target'").setSpanRows(true); 1436 } 1437 1438 tablePrinter 1439 .addColumn("Report Bug", "class='target'", null, "class='target'", false); 1440 1441 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1442 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1443 PopulationData territoryData2 = supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1444 double population = territoryData2.getPopulation() / 1000000; 1445 double gdp = territoryData2.getGdp() / 1000000; 1446 1447 Map<String, Set<String>> worldData = territoryData.get(getName(CLDRFile.TERRITORY_NAME, "001", false)); 1448 Map<String, Set<String>> countryData = territoryData.get(getName(CLDRFile.TERRITORY_NAME, territoryCode, false)); 1449 1450 tablePrinter.addRow() 1451 .addCell(getFirstPrimaryWeight(territoryName)) 1452 .addCell(territoryName) 1453 .addCell(territoryCode) 1454 .addCell(population) 1455 .addCell(gdp) 1456 .addCell(getCurrencyNames(territoryCode)); 1457 1458 addOtherCountryData(tablePrinter, worldData, countryData); 1459 1460 tablePrinter 1461 .addCell( 1462 addBug(1217, "<i>bug</i>", "<email>", "Fix info for " + territoryName + " (" + territoryCode + ")", 1463 "<fixed data for territory, plus references>")) 1464 .finishRow(); 1465 1466 } 1467 String value = tablePrinter.toTable(); 1468 pw2.println(value); 1469 pw2.close(); 1470 } 1471 1472 static Normalizer2 nfd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE); 1473 1474 // Do just an approximation for now 1475 getFirstPrimaryWeight(String territoryName)1476 private String getFirstPrimaryWeight(String territoryName) { 1477 char first = territoryName.charAt(0); 1478 String result = nfd.getDecomposition(first); 1479 if (result == null) { 1480 return UTF16.valueOf(first); 1481 } 1482 return UTF16.valueOf(result.codePointAt(0)); 1483 } 1484 1485 // private String getTerritoryWithLikelyLink(String territoryCode) { 1486 // return "<a href='likely_subtags.html#und_"+ territoryCode + "'>" + territoryCode + "</a>"; 1487 // } 1488 getOfficialStatus(String territoryCode, String languageCode)1489 private String getOfficialStatus(String territoryCode, String languageCode) { 1490 PopulationData x = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1491 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1492 return " <span title='" + x.getOfficialStatus().toString().replace('_', ' ') + "'>{" 1493 + x.getOfficialStatus().toShortString() + "}</span>"; 1494 } 1495 getRawOfficialStatus(String territoryCode, String languageCode)1496 private String getRawOfficialStatus(String territoryCode, String languageCode) { 1497 PopulationData x = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1498 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1499 return x.getOfficialStatus().toString(); 1500 } 1501 addOtherCountryData(TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData)1502 private void addOtherCountryData(TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData) { 1503 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext();) { 1504 String type = it2.next(); 1505 Set<String> worldResults = worldData.get(type); 1506 Set<String> territoryResults = null; 1507 if (countryData != null) { 1508 territoryResults = countryData.get(type); 1509 } 1510 if (territoryResults == null) { 1511 territoryResults = worldResults; 1512 } 1513 String out = ""; 1514 if (territoryResults != null) { 1515 out = territoryResults + ""; 1516 out = out.substring(1, out.length() - 1); // remove [ and ] 1517 } 1518 tablePrinter.addCell(out); 1519 } 1520 } 1521 getCurrencyNames(String territoryCode)1522 private String getCurrencyNames(String territoryCode) { 1523 Set<String> currencies = territoriesToModernCurrencies.getAll(territoryCode); 1524 if (currencies == null || currencies.size() == 0) return ""; 1525 StringBuilder buffer = new StringBuilder(); 1526 for (String code : currencies) { 1527 if (buffer.length() != 0) buffer.append(",<br>"); 1528 buffer.append(getName(CLDRFile.CURRENCY_NAME, code, false)); 1529 } 1530 return buffer.toString(); 1531 } 1532 addCharSubstitution(String value, String substitute)1533 private void addCharSubstitution(String value, String substitute) { 1534 if (substitute.equals(value)) 1535 return; 1536 LinkedHashSet<String> already = charSubstitutions.get(value); 1537 if (already == null) 1538 charSubstitutions.put(value, already = new LinkedHashSet<>(0)); 1539 already.add(substitute); 1540 Log.logln(hex(value, " ") + "; " + hex(substitute, " ")); 1541 } 1542 1543 /** 1544 * 1545 */ 1546 // public void showTerritoryInfo() { 1547 // Map territory_parent = new TreeMap(); 1548 // gather("001", territory_parent); 1549 // for (Iterator it = territory_parent.keySet().iterator(); it.hasNext();) { 1550 // String territory = (String) it.next(); 1551 // String parent = (String) territory_parent.get(territory); 1552 // System.out.println(territory + "\t" + english.getName(english.TERRITORY_NAME, territory) + "\t" 1553 // + parent + "\t" + english.getName(english.TERRITORY_NAME, parent)); 1554 // } 1555 // } 1556 1557 // private void gather(String item, Map territory_parent) { 1558 // Collection containedByItem = (Collection) group_contains.get(item); 1559 // if (containedByItem == null) 1560 // return; 1561 // for (Iterator it = containedByItem.iterator(); it.hasNext();) { 1562 // String contained = (String) it.next(); 1563 // territory_parent.put(contained, item); 1564 // gather(contained, territory_parent); 1565 // } 1566 // } 1567 addTerritoryInfo(String territoriesList, String type, String info)1568 private void addTerritoryInfo(String territoriesList, String type, String info) { 1569 String[] territories = territoriesList.split("\\s+"); 1570 territoryTypes.add(type); 1571 for (int i = 0; i < territories.length; ++i) { 1572 String territory = getName(CLDRFile.TERRITORY_NAME, territories[i], false); 1573 Map<String, Set<String>> s = territoryData.get(territory); 1574 if (s == null) { 1575 territoryData.put(territory, s = new TreeMap<>()); 1576 } 1577 Set<String> ss = s.get(type); 1578 if (ss == null) { 1579 s.put(type, ss = new TreeSet<>()); 1580 } 1581 ss.add(info); 1582 } 1583 } 1584 showCalendarData(PrintWriter pw0)1585 public void showCalendarData(PrintWriter pw0) throws IOException { 1586 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Other Territory Data", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1587 pw.println("<table>"); 1588 pw.println("<tr><th class='source'>Territory</th>"); 1589 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext();) { 1590 String header = it.next(); 1591 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1592 pw.println("<th class='target'>" + header + "</th>"); 1593 } 1594 pw.println("</tr>"); 1595 1596 String worldName = getName(CLDRFile.TERRITORY_NAME, "001", false); 1597 Map<String, Set<String>> worldData = territoryData.get(worldName); 1598 for (Iterator<String> it = territoryData.keySet().iterator(); it.hasNext();) { 1599 String country = it.next(); 1600 if (country.equals(worldName)) 1601 continue; 1602 showCountry(pw, country, country, worldData); 1603 } 1604 showCountry(pw, worldName, "Other", worldData); 1605 pw.println("</table>"); 1606 pw.close(); 1607 } 1608 showCountry(PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData)1609 private void showCountry(PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData) { 1610 pw.println("<tr><td class='source'>" + countryTitle + "</td>"); 1611 Map<String, Set<String>> data = territoryData.get(country); 1612 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext();) { 1613 String type = it2.next(); 1614 String target = "target"; 1615 Set<String> results = data.get(type); 1616 Set<String> worldResults = worldData.get(type); 1617 if (results == null) { 1618 results = worldResults; 1619 target = "target2"; 1620 } else if (results.equals(worldResults)) { 1621 target = "target2"; 1622 } 1623 String out = ""; 1624 if (results != null) { 1625 out = results + ""; 1626 out = out.substring(1, out.length() - 1); // remove [ and ] 1627 } 1628 pw.println("<td class='" + target + "'>" + out + "</td>"); 1629 } 1630 pw.println("</tr>"); 1631 } 1632 showCorrespondances()1633 public void showCorrespondances() { 1634 // show correspondances between language and script 1635 Map<String, String> name_script = new TreeMap<>(); 1636 for (Iterator<String> it = sc.getAvailableCodes("script").iterator(); it.hasNext();) { 1637 String script = it.next(); 1638 String name = english.getName(CLDRFile.SCRIPT_NAME, script); 1639 if (name == null) 1640 name = script; 1641 name_script.put(name, script); 1642 /* 1643 * source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages 1644 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories 1645 * : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages 1646 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts 1647 */} 1648 String delimiter = "\\P{L}+"; 1649 Map<String, String> name_language = new TreeMap<>(); 1650 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); it.hasNext();) { 1651 String language = it.next(); 1652 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 1653 if (names == null) 1654 names = language; 1655 name_language.put(names, language); 1656 } 1657 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); it.hasNext();) { 1658 String language = it.next(); 1659 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 1660 if (names == null) 1661 names = language; 1662 String[] words = names.split(delimiter); 1663 if (words.length > 1) { 1664 // System.out.println(names); 1665 } 1666 for (int i = 0; i < words.length; ++i) { 1667 String name = words[i]; 1668 String script = name_script.get(name); 1669 if (script != null) { 1670 Set<String> langSet = (Set<String>) script_languages.asMap().get(script); 1671 if (langSet != null && langSet.contains(language)) 1672 System.out.print("*"); 1673 System.out.println("\t" + name + " [" + language + "]\t=> " + name + " [" + script + "]"); 1674 } else { 1675 String language2 = name_language.get(name); 1676 if (language2 != null && !language.equals(language2)) { 1677 Set<String> langSet = (Set<String>) language_scripts.get(language); 1678 if (langSet != null) 1679 System.out.print("*"); 1680 System.out.print("?\tSame script?\t + " + getName(CLDRFile.LANGUAGE_NAME, language, false) 1681 + "\t & " + getName(CLDRFile.LANGUAGE_NAME, language2, false)); 1682 langSet = (Set<String>) language_scripts.get(language2); 1683 if (langSet != null) 1684 System.out.print("*"); 1685 System.out.println(); 1686 } 1687 } 1688 } 1689 } 1690 } 1691 1692 /** 1693 * @throws IOException 1694 * 1695 */ printCurrency(PrintWriter index)1696 public void printCurrency(PrintWriter index) throws IOException { 1697 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Detailed Territory-Currency Information", 1698 null 1699 // "<p>The following table shows when currencies were in use in different countries. " + 1700 // "See also <a href='#format_info'>Decimal Digits and Rounding</a>. " + 1701 // "To correct any information here, please file a " + 1702 // addBug(1274, "bug", "<email>", "Currency Bug", 1703 // "<currency, country, and references supporting change>") + 1704 // ".</p>" 1705 , SUPPLEMENTAL_INDEX_ANCHORS)); 1706 String section1 = "Territory to Currency"; 1707 String section2 = "Decimal Digits and Rounding"; 1708 showContents(pw, "territory_currency", section1, "format_info", section2); 1709 1710 pw.println("<h2>" + CldrUtility.getDoubleLinkedText("territory_currency", "1. " + section1) + "</h2>"); 1711 1712 // doTitle(pw, "Territory \u2192 Currency"); 1713 pw.println("<table>"); 1714 pw.println("<tr><th class='source'>Territory</th>" + 1715 "<th class='source'>Code</th>" + 1716 "<th class='target'>From</th>" + 1717 "<th class='target'>To</th>" + 1718 "<th class='target'>Currency</th>" + 1719 "<th class='target'>Name</th>" + 1720 "</tr>"); 1721 1722 Relation<String, String> currencyToTerritory = Relation.of(new HashMap<String, Set<String>>(), 1723 HashSet.class); 1724 Relation<String, String> modernCurrencyToTerritory = Relation.of(new HashMap<String, Set<String>>(), 1725 HashSet.class); 1726 1727 for (Entry<String, String> nameCode : NAME_TO_REGION.entrySet()) { 1728 String name = nameCode.getKey(); 1729 String regionCode = nameCode.getValue(); 1730 if (!StandardCodes.isCountry(regionCode)) { 1731 continue; 1732 } 1733 if (sc.isLstregPrivateUse("region", regionCode)) { 1734 continue; 1735 } 1736 Set<CurrencyDateInfo> info = supplementalDataInfo.getCurrencyDateInfo(regionCode); 1737 1738 int infoSize = 1; 1739 if (info != null) { 1740 infoSize = info.size(); 1741 } 1742 pw.println("<tr>" + 1743 "<td class='source' rowSpan='" + infoSize + "'>" + name + "</td>" + 1744 "<td class='source' rowSpan='" + infoSize + "'>" + CldrUtility.getDoubleLinkedText(regionCode) 1745 + "</td>"); 1746 if (info == null) { 1747 pw.println("<td class='target'>" + "<i>na</i>" + "</td>" + 1748 "<td class='target'>" + "<i>na</i>" + "</td>" + 1749 "<td class='target'>" + "<i>na</i>" + "</td>" + 1750 "<td class='target'>" + "<i>na</i>" + "</td>" + 1751 "</tr>"); 1752 continue; 1753 } 1754 boolean first = true; 1755 for (CurrencyDateInfo infoItem : info) { 1756 Date endData = infoItem.getEnd(); 1757 if (endData.equals(CurrencyDateInfo.END_OF_TIME)) { 1758 modernCurrencyToTerritory.put(infoItem.getCurrency(), getTerritoryName(regionCode)); 1759 } else { 1760 currencyToTerritory.put(infoItem.getCurrency(), getTerritoryName(regionCode)); 1761 } 1762 if (first) 1763 first = false; 1764 else 1765 pw.println("<tr>"); 1766 pw.println("<td class='target'>" + CurrencyDateInfo.formatDate(infoItem.getStart()) + "</td>" + 1767 "<td class='target'>" + CurrencyDateInfo.formatDate(endData) + "</td>" + 1768 "<td class='target'>" + infoItem.getCurrency() + "</td>" + 1769 "<td class='target'>" + english.getName("currency", infoItem.getCurrency()) + "</td>" + 1770 "</tr>"); 1771 } 1772 } 1773 // doFooter(pw); 1774 // pw.close(); 1775 // pw = new PrintWriter(new FormattedFileWriter(index, "Currency Format Info", null)); 1776 pw.write("</table>"); 1777 1778 pw.println("<h2>" + CldrUtility.getDoubleLinkedText("format_info", "2. " + section2) + "</h2>"); 1779 1780 pw.write("<p>This table shows the number of digits used for each currency, " 1781 + " and the countries where it is or was in use. " 1782 + "Countries where the currency is in current use are bolded. " 1783 + "If the currency uses ‘nickel rounding’ in transactions, the digits are followed by ‘(5)’. " 1784 + "Where the values are different in a cash context, that is shown in a second column." 1785 + "</p>"); 1786 pw.write("<div align='center'><table>"); 1787 1788 // doTitle(pw, "Currency Format Info"); 1789 // <info iso4217="CZK" digits="2" rounding="0" cashDigits="0" cashRounding="0"/> 1790 1791 pw.println("<tr>" + 1792 "<th class='source nowrap'>Name</th>" + 1793 "<th class='source'>Currency</th>" + 1794 "<th class='target'>Digits</th>" + 1795 "<th class='target'>Cash Digits</th>" + 1796 "<th class='target'>Countries</th>" + 1797 "</tr>"); 1798 Set<String> currencyList = new TreeSet<String>(col); 1799 currencyList.addAll(currency_fractions.keySet()); 1800 currencyList.addAll(currency_territory.keySet()); 1801 1802 for (Entry<String, String> nameCode : NAME_TO_CURRENCY.entrySet()) { 1803 //String name = nameCode.getKey(); 1804 String currency = nameCode.getValue(); 1805 CurrencyNumberInfo info = supplementalDataInfo.getCurrencyNumberInfo(currency); 1806 Set<String> territories = currencyToTerritory.get(currency); 1807 Set<String> modernTerritories = modernCurrencyToTerritory.get(currency); 1808 1809 // String fractions = (String) currency_fractions.get(currency); 1810 // if (fractions == null) 1811 // fractions = defaultDigits; 1812 // Set territories = (Set) currency_territory.get(currency); 1813 pw.print("<tr>" + 1814 "<td class='source nowrap'>" 1815 + TransliteratorUtilities.toHTML.transform(english.getName("currency", currency)) + "</td>" + 1816 "<td class='source'>" + CldrUtility.getDoubleLinkedText(currency) + "</td>" + 1817 "<td class='target'>" + 1818 info.getDigits() 1819 + (info.getRounding() == 0 ? "" : " (" + info.getRounding() + ")") 1820 + "</td>" 1821 + "<td class='target'>" 1822 + (info.cashDigits == info.getDigits() && info.cashRounding == info.getRounding() ? "" : (info.cashDigits 1823 + (info.cashRounding == 0 ? "" : " (" + info.cashRounding + ")"))) 1824 + "</td>" + 1825 "<td class='target'>"); 1826 boolean first = true; 1827 boolean needBreak = false; 1828 if (modernTerritories != null) { 1829 needBreak = true; 1830 for (String territory : modernTerritories) { 1831 if (first) 1832 first = false; 1833 else 1834 pw.print(", "); 1835 pw.print("<b>" + territory + "</b>"); 1836 } 1837 } 1838 //boolean haveBreak = true; 1839 if (territories != null) { 1840 for (String territory : territories) { 1841 if (first) 1842 first = false; 1843 else if (!needBreak) 1844 pw.print(", "); 1845 else { 1846 pw.print(",<br>"); 1847 needBreak = false; 1848 } 1849 pw.print(territory); 1850 } 1851 } 1852 pw.println("</td></tr>"); 1853 } 1854 pw.println("</table>"); 1855 pw.close(); 1856 // doFooter(pw); 1857 1858 // if (false) { 1859 // doTitle(pw, "Territories Versus Currencies"); 1860 // pw.println("<tr><th>Territories Without Currencies</th><th>Currencies Without Territories</th></tr>"); 1861 // pw.println("<tr><td class='target'>"); 1862 // Set territoriesWithoutCurrencies = new TreeSet(); 1863 // territoriesWithoutCurrencies.addAll(sc.getGoodAvailableCodes("territory")); 1864 // territoriesWithoutCurrencies.removeAll(territoriesWithCurrencies); 1865 // territoriesWithoutCurrencies.removeAll(group_contains.keySet()); 1866 // boolean first = true; 1867 // for (Iterator it = territoriesWithoutCurrencies.iterator(); it.hasNext();) { 1868 // if (first) first = false; 1869 // else pw.print(", "); 1870 // pw.print(english.getName(CLDRFile.TERRITORY_NAME, it.next().toString(), false)); 1871 // } 1872 // pw.println("</td><td class='target'>"); 1873 // Set currenciesWithoutTerritories = new TreeSet(); 1874 // currenciesWithoutTerritories.addAll(sc.getGoodAvailableCodes("currency")); 1875 // currenciesWithoutTerritories.removeAll(currenciesWithTerritories); 1876 // first = true; 1877 // for (Iterator it = currenciesWithoutTerritories.iterator(); it.hasNext();) { 1878 // if (first) first = false; 1879 // else pw.print(", "); 1880 // pw.print(english.getName(CLDRFile.CURRENCY_NAME, it.next().toString(), false)); 1881 // } 1882 // pw.println("</td></tr>"); 1883 // doFooter(pw); 1884 // } 1885 } 1886 getTerritoryName(String territory)1887 private String getTerritoryName(String territory) { 1888 String name; 1889 name = english.getName("territory", territory); 1890 if (name == null) { 1891 name = sc.getData("territory", territory); 1892 } 1893 if (name != null) { 1894 return TransliteratorUtilities.toHTML.transform(name) + " (" + territory + ")"; 1895 } else { 1896 return territory; 1897 } 1898 } 1899 1900 /** 1901 * @throws IOException 1902 * 1903 */ printAliases(PrintWriter index)1904 public void printAliases(PrintWriter index) throws IOException { 1905 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Aliases", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1906 1907 // doTitle(pw, "Aliases"); 1908 pw.println("<table>"); 1909 pw.println("<tr><th class='source'>" + "Type" + "</th>" + 1910 "<th class='source'>" + "Code" + "</th>" + 1911 "<th class='target'>" + "Reason" + "</th>" + 1912 "<th class='target'>" + "Substitute (if available)" + "</th></tr>"); 1913 for (Iterator<String[]> it = aliases.iterator(); it.hasNext();) { 1914 String[] items = it.next(); 1915 pw.println("<tr><td class='source'>" + items[0] + "</td>" + 1916 "<td class='source'>" + CldrUtility.getDoubleLinkedText(items[1]) + "</td>" + 1917 "<td class='target'>" + items[3] + "</td>" + 1918 "<td class='target'>" + items[2] + "</td></tr>"); 1919 } 1920 // doFooter(pw); 1921 pw.println("</table>"); 1922 pw.close(); 1923 } 1924 1925 // deprecatedItems 1926 // public void printDeprecatedItems(PrintWriter pw) { 1927 // doTitle(pw, "Deprecated Items"); 1928 // pw.print("<tr><td class='z0'><b>Type</b></td><td class='z1'><b>Elements</b></td><td class='z2'><b>Attributes</b></td><td class='z4'><b>Values</b></td>"); 1929 // for (Iterator it = deprecatedItems.iterator(); it.hasNext();) { 1930 // Map source = (Map)it.next(); 1931 // Object item; 1932 // pw.print("<tr>"); 1933 // pw.print("<td class='z0'>" + ((item = source.get("type")) != null ? item : "<i>any</i>") + "</td>"); 1934 // pw.print("<td class='z1'>" + ((item = source.get("elements")) != null ? item : "<i>any</i>") + "</td>"); 1935 // pw.print("<td class='z2'>" + ((item = source.get("attributes")) != null ? item : "<i>any</i>") + "</td>"); 1936 // pw.print("<td class='z4'>" + ((item = source.get("values")) != null ? item : "<i>any</i>") + "</td>"); 1937 // pw.print("</tr>"); 1938 // } 1939 // doFooter(pw); 1940 // } 1941 printWindows_Tzid(PrintWriter index)1942 public void printWindows_Tzid(PrintWriter index) throws IOException { 1943 Map<String, Map<String, Map<String, String>>> zoneMapping = supplementalDataInfo 1944 .getTypeToZoneToRegionToZone(); 1945 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Zone \u2192 Tzid", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1946 for (Entry<String, Map<String, Map<String, String>>> typeAndZoneToRegionToZone : zoneMapping.entrySet()) { 1947 String type = typeAndZoneToRegionToZone.getKey(); 1948 Map<String, Map<String, String>> zoneToRegionToZone = typeAndZoneToRegionToZone.getValue(); 1949 pw.println("<br><h1>Mapping for: " + type + "</h1><br>"); 1950 // doTitle(pw, "Windows \u2192 Tzid"); 1951 pw.println("<table>"); 1952 pw.println("<tr><th class='source'>" + type + "</th><th class='source'>" + "Region" 1953 + "</th><th class='target'>" + "TZID" + "</th></tr>"); 1954 1955 for (Entry<String, Map<String, String>> zoneAndregionToZone : zoneToRegionToZone.entrySet()) { 1956 String source = zoneAndregionToZone.getKey(); 1957 Map<String, String> regionToZone = zoneAndregionToZone.getValue(); 1958 for (Entry<String, String> regionAndZone : regionToZone.entrySet()) { 1959 String region = regionAndZone.getKey(); 1960 String target = regionAndZone.getValue(); 1961 if (region == null) region = "<i>any</a>"; 1962 pw.println("<tr><td class='source'>" + source + "</td><td class='source'>" + region 1963 + "</td><td class='target'>" + target + "</td></tr>"); 1964 } 1965 } 1966 // doFooter(pw); 1967 pw.println("</table>"); 1968 } 1969 pw.close(); 1970 } 1971 1972 // <info iso4217="ADP" digits="0" rounding="0"/> 1973 printCharacters(PrintWriter index)1974 public void printCharacters(PrintWriter index) throws IOException { 1975 String title = "Character Fallback Substitutions"; 1976 1977 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 1978 // doTitle(pw, title); 1979 pw.println("<table>"); 1980 1981 pw.println( 1982 "<tr><th colSpan='3'>Substitute for character (if not in repertoire)</th><th colSpan='4'>The following (in priority order, first string that <i>is</i> in repertoire)</th></tr>"); 1983 UnicodeSet chars = new UnicodeSet("[:NFKC_QuickCheck=N:]"); 1984 for (com.ibm.icu.text.UnicodeSetIterator it = new com.ibm.icu.text.UnicodeSetIterator(chars); it.next();) { 1985 String value = it.getString(); 1986 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFC)); 1987 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFKC)); 1988 } 1989 int[] counts = new int[4]; 1990 for (Iterator<String> it = charSubstitutions.keySet().iterator(); it.hasNext();) { 1991 String value = it.next(); 1992 LinkedHashSet<String> substitutes = charSubstitutions.get(value); 1993 String nfc = Normalizer.normalize(value, Normalizer.NFC); 1994 String nfkc = Normalizer.normalize(value, Normalizer.NFKC); 1995 1996 String sourceTag = "<td class='source'>"; 1997 if (substitutes.size() > 1) { 1998 sourceTag = "<td class='source' rowSpan='" + substitutes.size() + "'>"; 1999 } 2000 boolean first = true; 2001 for (Iterator<String> it2 = substitutes.iterator(); it2.hasNext();) { 2002 String substitute = it2.next(); 2003 String type = "Explicit"; 2004 String targetTag = "<td class='target3'>"; 2005 if (substitute.equals(nfc)) { 2006 type = "NFC"; 2007 targetTag = "<td class='target'>"; 2008 counts[2]++; 2009 } else if (substitute.equals(nfkc)) { 2010 type = "NFKC"; 2011 targetTag = "<td class='target4'>"; 2012 counts[3]++; 2013 } else { 2014 counts[0]++; 2015 } 2016 pw.println("<tr>" 2017 + (!first ? "" : sourceTag + hex(value, ", ") + "</td>" + sourceTag 2018 + TransliteratorUtilities.toHTML.transliterate(value) + "</td>" + sourceTag 2019 + UCharacter.getName(value, ", ") 2020 + "</td>") 2021 + targetTag + type + "</td>" + targetTag + hex(substitute, ", ") + "</td>" 2022 + targetTag + TransliteratorUtilities.toHTML.transliterate(substitute) + "</td>" + targetTag 2023 + UCharacter.getName(substitute, ", ") + "</td></tr>"); 2024 first = false; 2025 } 2026 } 2027 // doFooter(pw); 2028 pw.println("</table>"); 2029 2030 pw.close(); 2031 for (int i = 0; i < counts.length; ++i) { 2032 System.out.println("Count\t" + i + "\t" + counts[i]); 2033 } 2034 } 2035 hex(String s, String separator)2036 public static String hex(String s, String separator) { 2037 StringBuffer result = new StringBuffer(); 2038 int cp; 2039 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 2040 cp = UTF16.charAt(s, i); 2041 if (i != 0) 2042 result.append(separator); 2043 result.append(com.ibm.icu.impl.Utility.hex(cp)); 2044 } 2045 return result.toString(); 2046 } 2047 2048 /** 2049 * 2050 */ 2051 // private PrintWriter doTitle(PrintWriter pw, String title) { 2052 // //String anchor = FileUtilities.anchorize(title); 2053 // pw.println("<div align='center'><table>"); 2054 // //anchors.put(title, anchor); 2055 // //PrintWriter result = null; 2056 // //return result; 2057 // } 2058 2059 // private void doFooter(PrintWriter pw) { 2060 // pw.println("</table></div>"); 2061 // } printContains2(PrintWriter pw, String lead, String start, int depth, boolean isFirst)2062 public void printContains2(PrintWriter pw, String lead, String start, int depth, boolean isFirst) { 2063 String name = depth == 4 ? start : getName(CLDRFile.TERRITORY_NAME, start, false); 2064 if (!isFirst) 2065 pw.print(lead); 2066 int count = getTotalContainedItems(start, depth); 2067 pw.print("<td class='z" + depth + "' rowSpan='" + count + "'>" + name + "</td>"); // colSpan='" + (5 - 2068 // depth) + "' 2069 if (depth == 4) 2070 pw.println("</tr>"); 2071 Collection<String> contains = getContainedCollection(start, depth); 2072 if (contains != null) { 2073 Collection<String> contains2 = new TreeSet<String>(territoryNameComparator); 2074 contains2.addAll(contains); 2075 boolean first = true; 2076 for (Iterator<String> it = contains2.iterator(); it.hasNext();) { 2077 String item = it.next(); 2078 printContains2(pw, lead, item, depth + 1, first); // + "<td> </td>" 2079 first = false; 2080 } 2081 } 2082 } 2083 getTotalContainedItems(String start, int depth)2084 private int getTotalContainedItems(String start, int depth) { 2085 Collection<String> c = getContainedCollection(start, depth); 2086 if (c == null) 2087 return 1; 2088 int sum = 0; 2089 for (Iterator<String> it = c.iterator(); it.hasNext();) { 2090 sum += getTotalContainedItems(it.next(), depth + 1); 2091 } 2092 return sum; 2093 } 2094 2095 /** 2096 * 2097 */ getContainedCollection(String start, int depth)2098 private Collection<String> getContainedCollection(String start, int depth) { 2099 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2100 if (contains == null) { 2101 contains = sc.getCountryToZoneSet().get(start); 2102 if (contains == null && depth == 3) { 2103 contains = new TreeSet<>(); 2104 if (start.compareTo("A") >= 0) { 2105 contains.add("<font color='red'>MISSING TZID</font>"); 2106 } else { 2107 contains.add("<font color='red'>Not yet ISO code</font>"); 2108 } 2109 } 2110 } 2111 return contains; 2112 } 2113 2114 /** 2115 * @param table 2116 * TODO 2117 * 2118 */ printMissing(PrintWriter pw, int source, int table)2119 public void printMissing(PrintWriter pw, int source, int table) { 2120 Set<String> missingItems = new HashSet<>(); 2121 String type = null; 2122 if (source == CLDRFile.TERRITORY_NAME) { 2123 type = "territory"; 2124 missingItems.addAll(sc.getAvailableCodes(type)); 2125 missingItems.removeAll(territory_languages.keySet()); 2126 missingItems.removeAll(supplementalDataInfo.getContainmentCore().keySet()); 2127 missingItems.remove("200"); // czechoslovakia 2128 } else if (source == CLDRFile.SCRIPT_NAME) { 2129 type = "script"; 2130 missingItems.addAll(sc.getAvailableCodes(type)); 2131 missingItems.removeAll(script_languages.keySet()); 2132 } else if (source == CLDRFile.LANGUAGE_NAME) { 2133 type = "language"; 2134 missingItems.addAll(sc.getAvailableCodes(type)); 2135 if (table == CLDRFile.SCRIPT_NAME) 2136 missingItems.removeAll(language_scripts.keySet()); 2137 if (table == CLDRFile.TERRITORY_NAME) 2138 missingItems.removeAll(language_territories.keySet()); 2139 } else { 2140 throw new IllegalArgumentException("Illegal code"); 2141 } 2142 Set<String> missingItemsNamed = new TreeSet<String>(col); 2143 for (Iterator<String> it = missingItems.iterator(); it.hasNext();) { 2144 String item = it.next(); 2145 List<String> data = sc.getFullData(type, item); 2146 if (data.get(0).equals("PRIVATE USE")) 2147 continue; 2148 if (data.size() < 3) 2149 continue; 2150 if (!"".equals(data.get(2))) 2151 continue; 2152 2153 String itemName = getName(source, item, true); 2154 missingItemsNamed.add(itemName); 2155 } 2156 pw.println("<div align='center'><table>"); 2157 for (Iterator<String> it = missingItemsNamed.iterator(); it.hasNext();) { 2158 pw.println("<tr><td class='target'>" + it.next() + "</td></tr>"); 2159 } 2160 pw.println("</table></div>"); 2161 } 2162 2163 // source, eg english.TERRITORY_NAME 2164 // target, eg english.LANGUAGE_NAME print(PrintWriter pw, int source, int target)2165 public void print(PrintWriter pw, int source, int target) { 2166 Multimap<String, String> data = source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages 2167 : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories 2168 : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages 2169 : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts 2170 : null; 2171 // transform into names, and sort 2172 Map<String, Set<String>> territory_languageNames = new TreeMap<String, Set<String>>(col); 2173 for (Iterator<String> it = data.keySet().iterator(); it.hasNext();) { 2174 String territory = it.next(); 2175 String territoryName = getName(source, territory, true); 2176 Set<String> s = territory_languageNames.get(territoryName); 2177 if (s == null) 2178 territory_languageNames.put(territoryName, s = new TreeSet<String>(col)); 2179 for (Iterator<String> it2 = data.get(territory).iterator(); it2.hasNext();) { 2180 String language = it2.next(); 2181 String languageName = getName(target, language, true); 2182 s.add(languageName); 2183 } 2184 } 2185 2186 pw.println("<div align='center'><table>"); 2187 2188 for (Iterator<String> it = territory_languageNames.keySet().iterator(); it.hasNext();) { 2189 String territoryName = it.next(); 2190 pw.println("<tr><td class='source' colspan='2'>" + territoryName + "</td></tr>"); 2191 Set<String> s = territory_languageNames.get(territoryName); 2192 for (Iterator<String> it2 = s.iterator(); it2.hasNext();) { 2193 String languageName = it2.next(); 2194 pw.println("<tr><td> </td><td class='target'>" + languageName + "</td></tr>"); 2195 } 2196 } 2197 pw.println("</table></div>"); 2198 2199 } 2200 2201 /** 2202 * @param codeFirst 2203 * TODO 2204 * 2205 */ getName(int type, String oldcode, boolean codeFirst)2206 private String getName(int type, String oldcode, boolean codeFirst) { 2207 if (oldcode.contains(" ")) { 2208 String[] result = oldcode.split("\\s+"); 2209 for (int i = 0; i < result.length; ++i) { 2210 result[i] = getName(type, result[i], codeFirst); 2211 } 2212 return CldrUtility.join(Arrays.asList(result), ", "); 2213 } else { 2214 int pos = oldcode.indexOf('*'); 2215 String code = pos < 0 ? oldcode : oldcode.substring(0, pos); 2216 String ename = english.getName(type, code); 2217 String nameString = ename == null ? code : ename; 2218 return nameString.equals(oldcode) ? nameString 2219 : codeFirst ? "[" + oldcode + "]" + "\t" + nameString 2220 : nameString + "\t" + "[" + oldcode + "]"; 2221 } 2222 } 2223 2224 private String getName(String locale, boolean codeFirst) { 2225 String ename = getLanguageName(locale); 2226 return codeFirst ? "[" + locale + "]\t" + (ename == null ? locale : ename) : (ename == null ? locale 2227 : ename) + "\t[" + locale + "]"; 2228 } 2229 2230 Comparator territoryNameComparator = new Comparator() { 2231 @Override 2232 public int compare(Object o1, Object o2) { 2233 return col.compare(getName(CLDRFile.TERRITORY_NAME, (String) o1, false), 2234 getName(CLDRFile.TERRITORY_NAME, (String) o2, false)); 2235 } 2236 }; 2237 2238 static String[] stringArrayPattern = new String[0]; 2239 static String[][] string2ArrayPattern = new String[0][]; 2240 2241 public static Map<String, String> territoryAliases = new HashMap<>(); 2242 2243 public void printContains(PrintWriter index) throws IOException { 2244 String title = "Territory Containment (UN M.49)"; 2245 2246 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 2247 // doTitle(pw, title); 2248 List<String[]> rows = new ArrayList<>(); 2249 printContains3("001", rows, new ArrayList<String>()); 2250 TablePrinter tablePrinter = new TablePrinter() 2251 .addColumn("World", "class='source'", null, "class='z0'", true).setSortPriority(0) 2252 .addColumn("Continent", "class='source'", null, "class='z1'", true).setSortPriority(1) 2253 .addColumn("Subcontinent", "class='source'", null, "class='z2'", true).setSortPriority(2) 2254 .addColumn("Country (Territory)", "class='source'", null, "class='z3'", true).setSortPriority(3) 2255 .addColumn("Time Zone", "class='source'", null, "class='z4'", true).setSortPriority(4); 2256 String[][] flatData = rows.toArray(string2ArrayPattern); 2257 pw.println(tablePrinter.addRows(flatData).toTable()); 2258 2259 showSubtable(pw, ContainmentStyle.grouping, "Groupings", "Grouping", "Contained Regions"); 2260 showSubtable(pw, ContainmentStyle.deprecated, "Deprecated", "Container", "Deprecated Region"); 2261 2262 // Relation<String, String> deprecated = supplementalDataInfo 2263 // .getTerritoryToContained(ContainmentStyle.deprecated); 2264 // 2265 // for (String region : deprecated.keySet()) { 2266 // nameToContainers.add(region); 2267 // } 2268 // pw.println("<h2>Groupings and Deprecated Regions</h2>"); 2269 // for (String region : nameToContainers) { 2270 // String name = getName(CLDRFile.TERRITORY_NAME, region, false); 2271 // Set<String> dep = deprecated.get(region); 2272 // Set<String> gro = grouping.get(region); 2273 // Iterator<String> depIt = (dep == null ? Collections.EMPTY_SET : dep).iterator(); 2274 // Iterator<String> groIt = (gro == null ? Collections.EMPTY_SET : gro).iterator(); 2275 // while (depIt.hasNext() || groIt.hasNext()) { 2276 // String dep1 = depIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, depIt.next(), false) : ""; 2277 // String gro1 = groIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, groIt.next(), false) : ""; 2278 // tablePrinter2.addRow() 2279 // .addCell(name) 2280 // .addCell(gro1) 2281 // .addCell(dep1) 2282 // .finishRow(); 2283 // } 2284 // } 2285 // pw.println(tablePrinter2.toTable()); 2286 // pw.println("<h2>Other Groupings</h2>"); 2287 // for (Entry<String, Set<String>> regionContained : grouping.keyValuesSet()) { 2288 // showContainers(pw, regionContained); 2289 // } 2290 // 2291 // pw.println("<h2>Deprecated Codes</h2>"); 2292 // for (Entry<String, Set<String>> regionContained : deprecated.keyValuesSet()) { 2293 // showContainers(pw, regionContained); 2294 // } 2295 pw.close(); 2296 } 2297 2298 public void showSubtable(PrintWriter pw, ContainmentStyle containmentStyle, String title, String containerTitle, String containeeTitle) { 2299 pw.println("<h2>" + 2300 title + 2301 "</h2>"); 2302 TablePrinter tablePrinter2 = new TablePrinter() 2303 .addColumn(containerTitle, "class='source'", null, "class='z0'", true).setSortPriority(0) 2304 .addColumn(containeeTitle, "class='source'", null, "class='z4'", true).setSortPriority(1); 2305 2306 Relation<String, String> grouping = supplementalDataInfo 2307 .getTerritoryToContained(containmentStyle); 2308 2309 for (Entry<String, String> containerRegion : grouping.keyValueSet()) { 2310 String container = getName(CLDRFile.TERRITORY_NAME, containerRegion.getKey(), false); 2311 String containee = getName(CLDRFile.TERRITORY_NAME, containerRegion.getValue(), false); 2312 tablePrinter2.addRow() 2313 .addCell(container) 2314 .addCell(containee) 2315 .finishRow(); 2316 } 2317 pw.println(tablePrinter2.toTable()); 2318 } 2319 2320 public void showContainers(PrintWriter pw, Entry<String, Set<String>> regionContained) { 2321 String region = regionContained.getKey(); 2322 Set<String> contained = regionContained.getValue(); 2323 pw.println("<ul><li>" + getName(CLDRFile.TERRITORY_NAME, region, false) + "<ul>"); 2324 for (String sub : contained) { 2325 pw.println("<li>" + getName(CLDRFile.TERRITORY_NAME, sub, false) + "</li>"); 2326 } 2327 pw.println("</ul></li></ul>"); 2328 } 2329 2330 private void printContains3(String start, List<String[]> rows, ArrayList<String> currentRow) { 2331 int len = currentRow.size(); 2332 if (len > 3) { 2333 return; // skip long items 2334 } 2335 currentRow.add(getName(CLDRFile.TERRITORY_NAME, start, false)); 2336 //Collection<String> contains = (Collection<String>) group_contains.get(start); 2337 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2338 if (contains == null) { 2339 contains = sc.getCountryToZoneSet().get(start); 2340 currentRow.add(""); 2341 if (contains == null) { 2342 currentRow.set(len + 1, "???"); 2343 rows.add(currentRow.toArray(stringArrayPattern)); 2344 } else { 2345 for (String item : contains) { 2346 currentRow.set(len + 1, item); 2347 rows.add(currentRow.toArray(stringArrayPattern)); 2348 } 2349 } 2350 currentRow.remove(len + 1); 2351 } else { 2352 for (String item : contains) { 2353 if (territoryAliases.keySet().contains(item)) { 2354 continue; 2355 } 2356 printContains3(item, rows, currentRow); 2357 } 2358 } 2359 currentRow.remove(len); 2360 } 2361 2362 } 2363 2364 /** 2365 * 2366 */ getInverse(Map<String, Set<String>> language_territories)2367 private static Map<String, Set<String>> getInverse(Map<String, Set<String>> language_territories) { 2368 // get inverse relation 2369 Map<String, Set<String>> territory_languages = new TreeMap<>(); 2370 for (Iterator<String> it = language_territories.keySet().iterator(); it.hasNext();) { 2371 String language = it.next(); 2372 Set<String> territories = language_territories.get(language); 2373 for (Iterator<String> it2 = territories.iterator(); it2.hasNext();) { 2374 String territory = it2.next(); 2375 Set<String> languages = territory_languages.get(territory); 2376 if (languages == null) 2377 territory_languages.put(territory, languages = new TreeSet<String>(col)); 2378 languages.add(language); 2379 } 2380 } 2381 return territory_languages; 2382 2383 } 2384 2385 static final Map<String, String> NAME_TO_REGION = getNameToCode(CodeType.territory, "region"); 2386 static final Map<String, String> NAME_TO_CURRENCY = getNameToCode(CodeType.currency, "currency"); 2387 getNameToCode(CodeType codeType, String cldrCodeType)2388 private static SortedMap<String, String> getNameToCode(CodeType codeType, String cldrCodeType) { 2389 SortedMap<String, String> temp = new TreeMap<String, String>(col); 2390 for (String territory : StandardCodes.make().getAvailableCodes(codeType)) { 2391 String name = english.getName(cldrCodeType, territory); 2392 temp.put(name == null ? territory : name, territory); 2393 } 2394 temp = Collections.unmodifiableSortedMap(temp); 2395 return temp; 2396 } 2397 2398 /** 2399 * @param value_delimiter 2400 * TODO 2401 * 2402 */ addTokens(String key, String values, String value_delimiter, Map<String, Set<String>> key_value)2403 private static void addTokens(String key, String values, String value_delimiter, Map<String, Set<String>> key_value) { 2404 if (values != null) { 2405 Set<String> s = key_value.get(key); 2406 if (s == null) 2407 key_value.put(key, s = new TreeSet<String>(col)); 2408 s.addAll(Arrays.asList(values.split(value_delimiter))); 2409 } 2410 } 2411 addTokens(String key, String values, String value_delimiter, Multimap<String, String> key_value)2412 private static void addTokens(String key, String values, String value_delimiter, Multimap<String, String> key_value) { 2413 if (values != null) { 2414 key_value.putAll(key, Arrays.asList(values.split(value_delimiter))); 2415 } 2416 } 2417 showContents(Appendable pw, String... items)2418 public static void showContents(Appendable pw, String... items) { 2419 try { 2420 pw.append("</div>" + System.lineSeparator()); 2421 pw.append("<h3>Contents</h3>" + System.lineSeparator()); 2422 pw.append("<ol>" + System.lineSeparator()); 2423 for (int i = 0; i < items.length; i += 2) { 2424 pw.append("<li><a href='#" + items[i] + "'>" + items[i + 1] + "</a></li>" + System.lineSeparator()); 2425 } 2426 pw.append("</ol><hr>" + System.lineSeparator()); 2427 2428 pw.append("<div align='center'>" + System.lineSeparator()); 2429 } catch (IOException e) { 2430 throw new ICUUncheckedIOException(e); 2431 } 2432 } 2433 2434 } 2435