1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004-2011, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import java.io.IOException; 10 import java.io.PrintWriter; 11 import java.io.StringWriter; 12 import java.io.UnsupportedEncodingException; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collection; 16 import java.util.Collections; 17 import java.util.Comparator; 18 import java.util.Date; 19 import java.util.EnumSet; 20 import java.util.HashMap; 21 import java.util.HashSet; 22 import java.util.Iterator; 23 import java.util.LinkedHashSet; 24 import java.util.List; 25 import java.util.Locale; 26 import java.util.Map; 27 import java.util.Map.Entry; 28 import java.util.Set; 29 import java.util.SortedMap; 30 import java.util.TreeMap; 31 import java.util.TreeSet; 32 33 import org.unicode.cldr.draft.FileUtilities; 34 import org.unicode.cldr.draft.ScriptMetadata; 35 import org.unicode.cldr.draft.ScriptMetadata.Info; 36 import org.unicode.cldr.util.ArrayComparator; 37 import org.unicode.cldr.util.CLDRConfig; 38 import org.unicode.cldr.util.CLDRFile; 39 import org.unicode.cldr.util.CLDRFile.WinningChoice; 40 import org.unicode.cldr.util.CLDRLocale; 41 import org.unicode.cldr.util.CLDRPaths; 42 import org.unicode.cldr.util.CLDRTool; 43 import org.unicode.cldr.util.CLDRURLS; 44 import org.unicode.cldr.util.CldrUtility; 45 import org.unicode.cldr.util.Factory; 46 import org.unicode.cldr.util.FileCopier; 47 import org.unicode.cldr.util.Iso639Data; 48 import org.unicode.cldr.util.Iso639Data.Scope; 49 import org.unicode.cldr.util.Iso639Data.Type; 50 import org.unicode.cldr.util.LanguageTagParser; 51 import org.unicode.cldr.util.Level; 52 import org.unicode.cldr.util.Log; 53 import org.unicode.cldr.util.Organization; 54 import org.unicode.cldr.util.StandardCodes; 55 import org.unicode.cldr.util.StandardCodes.CodeType; 56 import org.unicode.cldr.util.SupplementalDataInfo; 57 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 58 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 61 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 62 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 63 import org.unicode.cldr.util.TransliteratorUtilities; 64 import org.unicode.cldr.util.XPathParts; 65 66 import com.google.common.base.Joiner; 67 import com.google.common.collect.ImmutableMap; 68 import com.google.common.collect.ImmutableSet; 69 import com.google.common.collect.Multimap; 70 import com.google.common.collect.Multimaps; 71 import com.google.common.collect.TreeMultimap; 72 import com.ibm.icu.impl.Relation; 73 import com.ibm.icu.impl.Row.R2; 74 import com.ibm.icu.impl.Row.R4; 75 import com.ibm.icu.lang.UCharacter; 76 import com.ibm.icu.text.Collator; 77 import com.ibm.icu.text.Normalizer; 78 import com.ibm.icu.text.Normalizer2; 79 import com.ibm.icu.text.NumberFormat; 80 import com.ibm.icu.text.UTF16; 81 import com.ibm.icu.text.UnicodeSet; 82 import com.ibm.icu.util.ICUUncheckedIOException; 83 import com.ibm.icu.util.ULocale; 84 85 @CLDRTool(alias = "showlanguages", description = "Generate Language info charts") 86 public class ShowLanguages { 87 private static final boolean SHOW_NATIVE = true; 88 89 static Comparator col = new org.unicode.cldr.util.MultiComparator( 90 Collator.getInstance(new ULocale("en")), 91 new UTF16.StringComparator(true, false, 0)); 92 93 static StandardCodes sc = StandardCodes.make(); 94 95 static Factory cldrFactory = CLDRConfig.getInstance().getCldrFactory();//.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 96 static CLDRFile english = CLDRConfig.getInstance().getEnglish(); 97 main(String[] args)98 public static void main(String[] args) throws IOException { 99 System.out.println("Writing into " + FormattedFileWriter.CHART_TARGET_DIR); 100 FileCopier.ensureDirectoryExists(FormattedFileWriter.CHART_TARGET_DIR); 101 FileCopier.copy(ShowLanguages.class, "index.css", FormattedFileWriter.CHART_TARGET_DIR); 102 FormattedFileWriter.copyIncludeHtmls(FormattedFileWriter.CHART_TARGET_DIR); 103 104 StringWriter sw = printLanguageData(cldrFactory, "index.html"); 105 writeSupplementalIndex("index.html", sw); 106 107 // cldrFactory = Factory.make(Utility.COMMON_DIRECTORY + "../dropbox/extra2/", ".*"); 108 // printLanguageData(cldrFactory, "language_info2.txt"); 109 System.out.println("Done - wrote into " + FormattedFileWriter.CHART_TARGET_DIR); 110 } 111 112 /** 113 * 114 */ 115 public static FormattedFileWriter.Anchors SUPPLEMENTAL_INDEX_ANCHORS = new FormattedFileWriter.Anchors(); 116 117 static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo 118 .getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 119 printLanguageData(Factory cldrFactory, String filename)120 private static StringWriter printLanguageData(Factory cldrFactory, String filename) throws IOException { 121 StringWriter sw = new StringWriter(); 122 PrintWriter pw = new PrintWriter(sw); 123 124 LanguageInfo linfo = new LanguageInfo(cldrFactory); 125 linfo.showCoverageGoals(pw); 126 127 128 new ChartDtdDelta().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 129 ShowLocaleCoverage.showCoverage(SUPPLEMENTAL_INDEX_ANCHORS, null); 130 131 new ChartDayPeriods().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 132 new ChartLanguageMatching().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 133 new ChartLanguageGroups().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 134 new ChartSubdivisions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 135 if (ToolConstants.CHART_VERSION.compareTo("37") >= 0) { 136 new ChartUnitConversions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 137 new ChartUnitPreferences().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 138 } 139 // since we don't want these listed on the supplemental page, use null 140 141 new ShowPlurals().printPlurals(english, null, pw, cldrFactory); 142 143 linfo.printLikelySubtags(pw); 144 145 linfo.showCountryLanguageInfo(pw); 146 147 linfo.showLanguageCountryInfo(pw); 148 149 // linfo.showTerritoryInfo(); 150 // linfo.printCountryData(pw); 151 152 // linfo.printDeprecatedItems(pw); 153 154 // PrintWriter pw1 = new PrintWriter(new FormattedFileWriter(pw, "Languages and Territories", null)); 155 // pw1.println("<tr><th>Language \u2192 Territories"); 156 // pw1.println("</th><th>Territory \u2192 Language"); 157 // pw1.println("</th><th>Territories Not Represented"); 158 // pw1.println("</th><th>Languages Not Represented"); 159 // pw1.println("</th></tr>"); 160 // 161 // pw1.println("<tr><td>"); 162 // linfo.print(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 163 // pw1.println("</td><td>"); 164 // linfo.print(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.LANGUAGE_NAME); 165 // pw1.println("</td><td>"); 166 // linfo.printMissing(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.TERRITORY_NAME); 167 // pw1.println("</td><td>"); 168 // linfo.printMissing(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 169 // pw1.println("</td></tr>"); 170 // 171 // pw1.close(); 172 173 printLanguageScript(linfo, pw); 174 printScriptLanguageTerritory(linfo, pw); 175 176 linfo.showCorrespondances(); 177 178 // linfo.showCalendarData(pw); 179 180 linfo.showCountryInfo(pw); 181 linfo.printCurrency(pw); 182 linfo.printContains(pw); 183 184 linfo.printWindows_Tzid(pw); 185 linfo.printAliases(pw); 186 187 linfo.printCharacters(pw); 188 189 pw.close(); 190 191 return sw; 192 } 193 writeSupplementalIndex(String filename, StringWriter sw)194 private static void writeSupplementalIndex(String filename, StringWriter sw) throws IOException { 195 String[] replacements = { 196 "%date%", CldrUtility.isoFormatDateOnly(new Date()), 197 "%contents%", SUPPLEMENTAL_INDEX_ANCHORS.toString(), 198 "%data%", sw.toString(), 199 "%index%", "../index.html" }; 200 PrintWriter pw2 = org.unicode.cldr.draft.FileUtilities.openUTF8Writer(FormattedFileWriter.CHART_TARGET_DIR, filename); 201 FileUtilities.appendFile(ShowLanguages.class, "supplemental.html", replacements, pw2); 202 pw2.close(); 203 } 204 printLanguageScript(LanguageInfo linfo, PrintWriter pw)205 private static void printLanguageScript(LanguageInfo linfo, PrintWriter pw) throws IOException { 206 PrintWriter pw1; 207 TablePrinter tablePrinter = new TablePrinter() 208 .addColumn("Language", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0) 209 .setBreakSpans(true) 210 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 211 .setSpanRows(true) 212 .addColumn("ML", "class='target' title='modern language'", null, "class='target'", true).setSpanRows(true) 213 .setSortPriority(1) 214 .addColumn("P", "class='target' title='primary'", null, "class='target'", true).setSortPriority(3) 215 .addColumn("Script", "class='target'", null, "class='target'", true).setSortPriority(3) 216 .addColumn("Code", "class='target'", null, "class='target'", true) 217 .addColumn("MS", "class='target' title='modern script'", null, "class='target'", true).setSortPriority(2); 218 219 TablePrinter tablePrinter2 = new TablePrinter() 220 .addColumn("Script", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0) 221 .setBreakSpans(true) 222 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 223 .setSpanRows(true) 224 .addColumn("MS", "class='target' title='modern script'", null, "class='target'", true).setSpanRows(true) 225 .setSortPriority(1) 226 .addColumn("Language", "class='target'", null, "class='target'", true).setSortPriority(3) 227 .addColumn("Code", "class='target'", null, "class='target'", true) 228 .addColumn("ML", "class='target' title='modern language'", null, "class='target'", true).setSortPriority(2) 229 .addColumn("P", "class='target' title='primary'", null, "class='target'", true).setSortPriority(3); 230 231 // get the codes so we can show the remainder 232 Set<String> remainingScripts = new TreeSet<>(getScriptsToShow()); // StandardCodes.MODERN_SCRIPTS); 233 UnicodeSet temp = new UnicodeSet(); 234 for (String script : getScriptsToShow()) { 235 temp.clear(); 236 try { 237 temp.applyPropertyAlias("script", script); 238 } catch (RuntimeException e) { 239 } // fall through 240 if (temp.size() == 0) { 241 remainingScripts.remove(script); 242 System.out.println("Removing: " + script); 243 } else { 244 System.out.println("Keeping: " + script); 245 } 246 } 247 remainingScripts.remove("Brai"); 248 remainingScripts.remove("Hira"); 249 remainingScripts.remove("Qaai"); 250 remainingScripts.remove("Hrkt"); 251 remainingScripts.remove("Zzzz"); 252 remainingScripts.remove("Zyyy"); 253 254 Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow()); 255 for (String language : getLanguagesToShow()) { 256 Scope s = Iso639Data.getScope(language); 257 Type t = Iso639Data.getType(language); 258 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 259 remainingLanguages.remove(language); 260 } 261 } 262 263 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 264 for (String language : languages) { 265 Set<BasicLanguageData> basicLanguageData = supplementalDataInfo.getBasicLanguageData(language); 266 for (BasicLanguageData basicData : basicLanguageData) { 267 String secondary = isOfficial(language) // basicData.getType() == BasicLanguageData.Type.primary 268 ? "\u00A0" 269 : "N"; 270 for (String script : basicData.getScripts()) { 271 addLanguageScriptCells(tablePrinter, tablePrinter2, language, script, secondary); 272 remainingScripts.remove(script); 273 remainingLanguages.remove(language); 274 } 275 } 276 } 277 for (String language : remainingLanguages) { 278 addLanguageScriptCells(tablePrinter, tablePrinter2, language, "Zzzz", "?"); 279 } 280 for (String script : remainingScripts) { 281 addLanguageScriptCells(tablePrinter, tablePrinter2, "und", script, "?"); 282 } 283 284 pw1 = new PrintWriter(new FormattedFileWriter(null, "Languages and Scripts", null, SUPPLEMENTAL_INDEX_ANCHORS)); 285 pw1.println(tablePrinter.toTable()); 286 pw1.close(); 287 288 pw1 = new PrintWriter(new FormattedFileWriter(null, "Scripts and Languages", null, SUPPLEMENTAL_INDEX_ANCHORS)); 289 pw1.println(tablePrinter2.toTable()); 290 pw1.close(); 291 292 } 293 294 static final Map<String, OfficialStatus> languageToBestStatus = new HashMap<>(); 295 static { 296 for (String language : supplementalDataInfo.getLanguagesForTerritoriesPopulationData()) { 297 Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language); 298 if (territories == null) { 299 continue; 300 } 301 int underbar = language.indexOf('_'); 302 String base = underbar < 0 ? null : language.substring(0, underbar); 303 304 for (String territory : territories) { 305 PopulationData data = supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory); 306 OfficialStatus status = data.getOfficialStatus(); 307 OfficialStatus old; 308 old = languageToBestStatus.get(language); 309 if (old == null || status.compareTo(old) > 0) { 310 languageToBestStatus.put(language, status); 311 } 312 if (base != null) { 313 old = languageToBestStatus.get(base); 314 if (old == null || status.compareTo(old) > 0) { 315 languageToBestStatus.put(base, status); 316 } 317 } 318 } 319 } 320 } 321 322 private static boolean isOfficial(String language) { 323 OfficialStatus status = languageToBestStatus.get(language); 324 if (status != null && status.isMajor()) { 325 return true; 326 } 327 int underbar = language.indexOf('_'); 328 if (underbar < 0) { 329 return false; 330 } 331 return isOfficial(language.substring(0, underbar)); 332 } 333 334 private static Set<String> getLanguagesToShow() { 335 return getEnglishTypes("language", CLDRFile.LANGUAGE_NAME); 336 } 337 338 private static Set<String> getEnglishTypes(String type, int code) { 339 Set<String> result = new HashSet<>(sc.getSurveyToolDisplayCodes(type)); 340 for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext();) { 341 XPathParts parts = XPathParts.getFrozenInstance(it.next()); 342 String newType = parts.getAttributeValue(-1, "type"); 343 if (!result.contains(newType)) { 344 result.add(newType); 345 } 346 } 347 return result; 348 } 349 350 private static Set<String> getScriptsToShow() { 351 return getEnglishTypes("script", CLDRFile.SCRIPT_NAME); 352 } 353 354 private static void printScriptLanguageTerritory(LanguageInfo linfo, PrintWriter pw) throws IOException { 355 PrintWriter pw1; 356 TablePrinter tablePrinter2 = new TablePrinter() 357 .addColumn("Sample Char", "class='source'", null, "class='source sample'", true).setSpanRows(true) 358 .addColumn("Script", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0) 359 .setBreakSpans(true) 360 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 361 .setSpanRows(true) 362 .addColumn("T", "class='target'", null, "class='target'", true).setSortPriority(1) 363 .addColumn("Language", "class='target'", null, "class='target'", true).setSortPriority(2) 364 .addColumn("Native", "class='target'", null, "class='target'", true) 365 .addColumn("Code", "class='target'", null, "class='target'", true) 366 .addColumn("T", "class='target'", null, "class='target'", true).setSortPriority(3) 367 .addColumn("Territory", "class='target'", null, "class='target'", true).setSortPriority(4) 368 .addColumn("Native", "class='target'", null, "class='target'", true) 369 .addColumn("Code", "class='target'", null, "class='target'", true); 370 371 // get the codes so we can show the remainder 372 Set<String> remainingScripts = new TreeSet<>(getScriptsToShow()); 373 Set<String> remainingTerritories = new TreeSet<>(sc.getGoodAvailableCodes("territory")); 374 UnicodeSet temp = new UnicodeSet(); 375 for (String script : getScriptsToShow()) { 376 temp.clear(); 377 try { 378 temp.applyPropertyAlias("script", script); 379 } catch (RuntimeException e) { 380 } // fall through 381 if (temp.size() == 0) { 382 remainingScripts.remove(script); 383 System.out.println("Removing: " + script); 384 } else { 385 System.out.println("Keeping: " + script); 386 } 387 } 388 remainingScripts.remove("Brai"); 389 remainingScripts.remove("Hira"); 390 remainingScripts.remove("Qaai"); 391 remainingScripts.remove("Hrkt"); 392 remainingScripts.remove("Zzzz"); 393 remainingScripts.remove("Zyyy"); 394 395 Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow()); 396 for (String language : getLanguagesToShow()) { 397 Scope s = Iso639Data.getScope(language); 398 Type t = Iso639Data.getType(language); 399 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 400 remainingLanguages.remove(language); 401 } 402 } 403 404 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 405 for (String language : languages) { 406 Set<BasicLanguageData> basicLanguageData = supplementalDataInfo.getBasicLanguageData(language); 407 for (BasicLanguageData basicData : basicLanguageData) { 408 if (basicData.getType() != BasicLanguageData.Type.primary) { 409 continue; 410 } 411 Set<String> mainTerritories = getTerritories(language); 412 if (mainTerritories.size() == 0) { 413 continue; 414 // mainTerritories.add("ZZ"); 415 } 416 417 TreeSet<String> mainScripts = new TreeSet<>(basicData.getScripts()); 418 if (mainScripts.size() == 0) { 419 continue; 420 } 421 for (String script : mainScripts) { 422 for (String territory : mainTerritories) { 423 addLanguageScriptCells2(tablePrinter2, language, script, territory); 424 remainingTerritories.remove(territory); 425 } 426 remainingScripts.remove(script); 427 } 428 } 429 remainingLanguages.remove(language); 430 } 431 // for (String language : remainingLanguages) { 432 // addLanguageScriptCells2( tablePrinter2, language, "Zzzz", "ZZ"); 433 // } 434 // for (String script : remainingScripts) { 435 // addLanguageScriptCells2( tablePrinter2, "und", script, "ZZ"); 436 // } 437 // for (String territory : remainingTerritories) { 438 // addLanguageScriptCells2( tablePrinter2, "und", "Zzzz", territory); 439 // } 440 441 pw1 = new PrintWriter(new FormattedFileWriter(null, "Scripts, Languages, and Territories", null, SUPPLEMENTAL_INDEX_ANCHORS)); 442 pw1.println(tablePrinter2.toTable()); 443 pw1.close(); 444 } 445 446 private static Relation<String, String> territoryFix; 447 getTerritories(String language)448 private static Set<String> getTerritories(String language) { 449 if (territoryFix == null) { // set up the data 450 initTerritoryFix(); 451 } 452 Set<String> territories = territoryFix.getAll(language); 453 if (territories == null) { 454 territories = new TreeSet<>(); 455 } 456 return territories; 457 } 458 initTerritoryFix()459 private static void initTerritoryFix() { 460 territoryFix = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 461 Set<String> languages = supplementalDataInfo.getLanguages(); 462 LanguageTagParser ltp = new LanguageTagParser(); 463 for (String language2 : languages) { 464 if (language2.contains("_")) { 465 ltp.set(language2).getLanguage(); 466 addOfficialTerritory(ltp, language2, ltp.getLanguage()); 467 } else { 468 addOfficialTerritory(ltp, language2, language2); 469 } 470 } 471 } 472 addOfficialTerritory(LanguageTagParser ltp, String language, String baseLanguage)473 private static void addOfficialTerritory(LanguageTagParser ltp, String language, String baseLanguage) { 474 // territoryFix.putAll(baseLanguage, supplementalDataInfo.getTerritoriesForPopulationData(language)); 475 Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language); 476 if (territories == null) { 477 return; 478 } 479 for (String territory : territories) { 480 PopulationData data = supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory); 481 OfficialStatus status = data.getOfficialStatus(); 482 if (status.isMajor()) { 483 territoryFix.put(baseLanguage, territory); 484 System.out.println("\tAdding\t" + baseLanguage + "\t" + territory + "\t" + language); 485 } 486 } 487 } 488 addLanguageScriptCells2(TablePrinter tablePrinter2, String language, String script, String territory)489 private static void addLanguageScriptCells2(TablePrinter tablePrinter2, String language, String script, 490 String territory) { 491 CLDRFile nativeLanguage = null; 492 if (SHOW_NATIVE) { 493 try { 494 nativeLanguage = cldrFactory.make(language + "_" + script + "_" + territory, true); 495 } catch (RuntimeException e) { 496 try { 497 nativeLanguage = cldrFactory.make(language + "_" + script, true); 498 } catch (RuntimeException e2) { 499 try { 500 nativeLanguage = cldrFactory.make(language, true); 501 } catch (RuntimeException e3) { 502 } 503 } 504 } 505 // check for overlap 506 if (nativeLanguage != null && !script.equals("Jpan") && !script.equals("Hans") && !script.equals("Hant")) { 507 UnicodeSet scriptSet; 508 try { 509 String tempScript = script.equals("Kore") ? "Hang" : script; 510 scriptSet = new UnicodeSet("[:script=" + tempScript + ":]"); 511 } catch (RuntimeException e) { 512 scriptSet = new UnicodeSet(); 513 } 514 UnicodeSet exemplars = nativeLanguage.getExemplarSet("", WinningChoice.WINNING); 515 if (scriptSet.containsNone(exemplars)) { 516 System.out.println("Skipping CLDR file -- exemplars differ: " + language + "\t" 517 + nativeLanguage.getLocaleID() + "\t" + scriptSet + "\t" + exemplars); 518 nativeLanguage = null; 519 } 520 } 521 } 522 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 523 if (languageName == null) languageName = "???"; 524 String isLanguageTranslated = ""; 525 String nativeLanguageName = nativeLanguage == null ? null : nativeLanguage.getName(CLDRFile.LANGUAGE_NAME, 526 language); 527 if (nativeLanguageName == null || nativeLanguageName.equals(language)) { 528 nativeLanguageName = "<i>n/a</i>"; 529 isLanguageTranslated = "n"; 530 } 531 532 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 533 // String nativeScriptName = nativeLanguage == null ? null : 534 // nativeLanguage.getName(CLDRFile.SCRIPT_NAME,script); 535 // if (nativeScriptName != null && !nativeScriptName.equals(script)) { 536 // scriptName = nativeScriptName + "[" + scriptName + "]"; 537 // } 538 539 String isTerritoryTranslated = ""; 540 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territory); 541 String nativeTerritoryName = nativeLanguage == null ? null : nativeLanguage.getName(CLDRFile.TERRITORY_NAME, 542 territory); 543 if (nativeTerritoryName == null || nativeTerritoryName.equals(territory)) { 544 nativeTerritoryName = "<i>n/a</i>"; 545 isTerritoryTranslated = "n"; 546 } 547 548 // Type t = Iso639Data.getType(language); 549 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == Type.Living) { 550 // // ok 551 // } else if (!language.equals("und")){ 552 // scriptModern = "N"; 553 // } 554 //String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : ""; 555 556 Info scriptMetatdata = ScriptMetadata.getInfo(script); 557 tablePrinter2.addRow() 558 .addCell(scriptMetatdata.sampleChar) 559 .addCell(scriptName) 560 .addCell(script) 561 .addCell(isLanguageTranslated) 562 .addCell(languageName) 563 .addCell(nativeLanguageName) 564 .addCell(language) 565 .addCell(isTerritoryTranslated) 566 .addCell(territoryName) 567 .addCell(nativeTerritoryName) 568 .addCell(territory) 569 .finishRow(); 570 } 571 572 static ImmutableMap<String, String> fixScriptGif = ImmutableMap.<String, String>builder() 573 .put("hangul", "hangulsyllables") 574 .put("japanese", "hiragana") 575 .put("unknown or invalid script", "unknown") 576 .put("Hant", "Hant") 577 .put("Hans", "Hans") 578 .build(); 579 getGifName(String script)580 private static String getGifName(String script) { 581 String temp = fixScriptGif.get(script); 582 if (temp != null) { 583 return temp; 584 } 585 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 586 scriptName = scriptName.toLowerCase(Locale.ENGLISH); 587 temp = fixScriptGif.get(scriptName); 588 if (temp != null) { 589 return temp; 590 } 591 return scriptName; 592 } 593 594 private static Set<Type> oldLanguage = Collections.unmodifiableSet(EnumSet.of(Type.Ancient, Type.Extinct, 595 Type.Historical, Type.Constructed)); 596 addLanguageScriptCells(TablePrinter tablePrinter, TablePrinter tablePrinter2, String language, String script, String secondary)597 private static void addLanguageScriptCells(TablePrinter tablePrinter, TablePrinter tablePrinter2, String language, 598 String script, String secondary) { 599 try { 600 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 601 if (languageName == null) { 602 languageName = "¿" + language + "?"; 603 System.err.println("No English Language Name for:" + language); 604 } 605 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 606 if (scriptName == null) { 607 scriptName = "¿" + script + "?"; 608 System.err.println("No English Language Name for:" + script); 609 } 610 String scriptModern = StandardCodes.isScriptModern(script) ? "" : script.equals("Zzzz") ? "n/a" : "N"; 611 //Scope s = Iso639Data.getScope(language); 612 Type t = Iso639Data.getType(language); 613 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == Type.Living) { 614 // // ok 615 // } else if (!language.equals("und")){ 616 // scriptModern = "N"; 617 // } 618 String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : ""; 619 620 tablePrinter.addRow() 621 .addCell(languageName) 622 .addCell(language) 623 .addCell(languageModern) 624 .addCell(secondary) 625 .addCell(scriptName) 626 .addCell(script) 627 .addCell(scriptModern) 628 .finishRow(); 629 630 tablePrinter2.addRow() 631 .addCell(scriptName) 632 .addCell(script) 633 .addCell(scriptModern) 634 .addCell(languageName) 635 .addCell(language) 636 .addCell(languageModern) 637 .addCell(secondary) 638 .finishRow(); 639 } catch (RuntimeException e) { 640 throw e; 641 } 642 } 643 644 // TODO This is old code that read supplemental data. Should be replaced by using SupplementalDataInfo. 645 // https://unicode-org.atlassian.net/browse/CLDR-15673 646 647 static class LanguageInfo { 648 private static final Map<String, Map<String, String>> localeAliasInfo = new TreeMap<>(); 649 650 Multimap<String, String> language_scripts = TreeMultimap.create(); 651 652 Multimap<String, String> language_territories = TreeMultimap.create(); 653 654 List<Map<String, String>> deprecatedItems = new ArrayList<>(); 655 656 Multimap<String, String> territory_languages; 657 658 Multimap<String, String> script_languages; 659 660 //Map group_contains = new TreeMap(); 661 662 Set<String[]> aliases = new TreeSet<String[]>(new ArrayComparator(new Comparator[] { new UTF16.StringComparator(), col })); 663 664 Comparator col3 = new ArrayComparator(new Comparator[] { col, col, col }); 665 666 Map<String, String> currency_fractions = new TreeMap<String, String>(col); 667 668 Map<String, Set> currency_territory = new TreeMap<String, Set>(col); 669 670 Map<String, Set> territory_currency = new TreeMap<String, Set>(col); 671 672 Set<String> territoriesWithCurrencies = new TreeSet<>(); 673 674 Set<String> currenciesWithTerritories = new TreeSet<>(); 675 676 Map<String, Map<String, Set<String>>> territoryData = new TreeMap<>(); 677 678 Set<String> territoryTypes = new TreeSet<>(); 679 680 Map<String, LinkedHashSet<String>> charSubstitutions = new TreeMap<String, LinkedHashSet<String>>(col); 681 682 String defaultDigits = null; 683 684 Map<String, Map<String, Object>> territoryLanguageData = new TreeMap<>(); 685 686 private Relation<String, String> territoriesToModernCurrencies = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, 687 null); 688 LanguageInfo(Factory cldrFactory)689 public LanguageInfo(Factory cldrFactory) throws IOException { 690 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 691 for (Iterator<String> it = supp.iterator(); it.hasNext();) { 692 String path = it.next(); 693 String fullPath = supp.getFullXPath(path); 694 if (fullPath == null) { 695 supp.getFullXPath(path); 696 } 697 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 698 699 // <zoneItem type="America/Adak" territory="US" aliases="America/Atka US/Aleutian"/> 700 if (path.indexOf("/zoneItem") >= 0) { 701 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 702 String type = attributes.get("type"); 703 //String territory = attributes.get("territory"); 704 String aliasAttributes = attributes.get("aliases"); 705 if (aliasAttributes != null) { 706 String[] aliasesList = aliasAttributes.split("\\s+"); 707 708 for (int i = 0; i < aliasesList.length; ++i) { 709 String alias = aliasesList[i]; 710 aliases.add(new String[] { "timezone", alias, type }); 711 } 712 } 713 // TODO territory, multizone 714 continue; 715 } 716 717 if (path.indexOf("/currencyData") >= 0) { 718 if (path.indexOf("/fractions") >= 0) { 719 // <info iso4217="ADP" digits="0" rounding="0"/> 720 String element = parts.getElement(parts.size() - 1); 721 if (!element.equals("info")) 722 throw new IllegalArgumentException("Unexpected fractions element: " + element); 723 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 724 String iso4217 = attributes.get("iso4217"); 725 String digits = attributes.get("digits"); 726 String rounding = attributes.get("rounding"); 727 digits = digits + (rounding.equals("0") ? "" : " (" + rounding + ")"); 728 if (iso4217.equals("DEFAULT")) 729 defaultDigits = digits; 730 else 731 currency_fractions.put(getName(CLDRFile.CURRENCY_NAME, iso4217, false), digits); 732 continue; 733 } 734 // <region iso3166="AR"> 735 // <currency iso4217="ARS" from="1992-01-01"/> 736 if (path.indexOf("/region") >= 0) { 737 Map<String, String> attributes = parts.getAttributes(parts.size() - 2); 738 String iso3166 = attributes.get("iso3166"); 739 attributes = parts.getAttributes(parts.size() - 1); 740 String iso4217 = attributes.get("iso4217"); 741 String to = attributes.get("to"); 742 if (to == null) 743 to = "\u221E"; 744 String from = attributes.get("from"); 745 if (from == null) 746 from = "-\u221E"; 747 String countryName = getName(CLDRFile.TERRITORY_NAME, iso3166, false); 748 String currencyName = getName(CLDRFile.CURRENCY_NAME, iso4217, false); 749 Set info = territory_currency.get(countryName); 750 if (info == null) 751 territory_currency.put(countryName, info = new TreeSet(col3)); 752 info.add(new String[] { from, to, currencyName }); 753 info = currency_territory.get(currencyName); 754 if (info == null) 755 currency_territory.put(currencyName, info = new TreeSet(col)); 756 territoriesWithCurrencies.add(iso3166); 757 currenciesWithTerritories.add(iso4217); 758 if (to.equals("\u221E") || to.compareTo("2006") > 0) { 759 territoriesToModernCurrencies.put(iso3166, iso4217); 760 info.add("<b>" + countryName + "</b>"); 761 762 } else { 763 info.add("<i>" + countryName + "</i>"); 764 765 } 766 continue; 767 } 768 } 769 770 if (path.indexOf("/languageData") >= 0) { 771 Map<String, String> attributes = parts.findAttributes("language"); 772 String language = attributes.get("type"); 773 String alt = attributes.get("alt"); 774 addTokens(language, attributes.get("scripts"), " ", language_scripts); 775 // mark the territories 776 if (alt == null) 777 ; // nothing 778 else if ("secondary".equals(alt)) 779 language += "*"; 780 else 781 language += "*" + alt; 782 // <language type="af" scripts="Latn" territories="ZA"/> 783 addTokens(language, attributes.get("territories"), " ", language_territories); 784 continue; 785 } 786 787 if (path.indexOf("/deprecatedItems") >= 0) { 788 deprecatedItems.add(parts.findAttributes("deprecatedItems")); 789 continue; 790 } 791 if (path.indexOf("/calendarData") >= 0) { 792 Map<String, String> attributes = parts.findAttributes("calendar"); 793 if (attributes == null) { 794 System.err.println("Err: on path " + fullPath 795 + " , no attributes on 'calendar'. Probably, this tool is out of date."); 796 } else { 797 String type = attributes.get("type"); 798 String territories = attributes.get("territories"); 799 if (territories == null) { 800 System.err.println("Err: on path " + fullPath 801 + ", missing territories. Probably, this tool is out of date."); 802 } else if (type == null) { 803 System.err.println("Err: on path " + fullPath 804 + ", missing type. Probably, this tool is out of date."); 805 } else { 806 addTerritoryInfo(territories, "calendar", type); 807 } 808 } 809 } 810 if (path.indexOf("/weekData") >= 0 || path.indexOf("measurementData") >= 0) { 811 String element = parts.getElement(parts.size() - 1); 812 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 813 // later, make this a table 814 String key = "count"; 815 String display = "Days in week (min)"; 816 boolean useTerritory = true; 817 switch (element) { 818 case "firstDay": 819 key = "day"; 820 display = "First day of week"; 821 break; 822 case "weekendStart": 823 key = "day"; 824 display = "First day of weekend"; 825 break; 826 case "weekendEnd": 827 key = "day"; 828 display = "Last day of weekend"; 829 break; 830 case "measurementSystem": 831 // <measurementSystem type="metric" territories="001"/> 832 key = "type"; 833 display = "Meas. system"; 834 break; 835 case "paperSize": 836 key = "type"; 837 display = "Paper Size"; 838 break; 839 case "weekOfPreference": 840 useTerritory = false; 841 break; 842 } 843 if (useTerritory) { 844 String type = attributes.get(key); 845 String territories = attributes.get("territories"); 846 addTerritoryInfo(territories, display, type); 847 } 848 } 849 if (path.indexOf("/generation") >= 0 || path.indexOf("/version") >= 0) 850 continue; 851 System.out.println("Skipped Element: " + path); 852 } 853 854 for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) { 855 for (String language : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territory)) { 856 language_territories.put(language, territory); 857 } 858 } 859 territory_languages = Multimaps.invertFrom(language_territories, TreeMultimap.create()); 860 script_languages = Multimaps.invertFrom(language_scripts, TreeMultimap.create()); 861 862 // now get some metadata 863 localeAliasInfo.put("language", new TreeMap<String, String>()); 864 localeAliasInfo.put("script", new TreeMap<String, String>()); 865 localeAliasInfo.put("territory", new TreeMap<String, String>()); 866 localeAliasInfo.put("variant", new TreeMap<String, String>()); 867 localeAliasInfo.put("zone", new TreeMap<String, String>()); 868 localeAliasInfo.put("subdivision", new TreeMap<String, String>()); 869 localeAliasInfo.put("unit", new TreeMap<String, String>()); 870 localeAliasInfo.put("usage", new TreeMap<String, String>()); 871 872 //localeAliasInfo.get("language").put("nb", "no"); 873 localeAliasInfo.get("language").put("zh_CN", "zh_Hans_CN"); 874 localeAliasInfo.get("language").put("zh_SG", "zh_Hans_SG"); 875 localeAliasInfo.get("language").put("zh_TW", "zh_Hant_TW"); 876 localeAliasInfo.get("language").put("zh_MO", "zh_Hant_MO"); 877 localeAliasInfo.get("language").put("zh_HK", "zh_Hant_HK"); 878 879 // CLDRFile supp2 = cldrFactory.make(CLDRFile.SUPPLEMENTAL_METADATA, false); 880 Map<String, Map<String, R2<List<String>, String>>> localeAliasInfo2 = supplementalDataInfo 881 .getLocaleAliasInfo(); 882 for (Entry<String, Map<String, R2<List<String>, String>>> entry1 : localeAliasInfo2.entrySet()) { 883 String element = entry1.getKey(); 884 for (Entry<String, R2<List<String>, String>> entry2 : entry1.getValue().entrySet()) { 885 String type = entry2.getKey(); 886 R2<List<String>, String> replacementReason = entry2.getValue(); 887 List<String> replacementList = replacementReason.get0(); 888 String replacement = replacementList == null ? null : 889 Joiner.on(" ").join(replacementList); 890 String reason = replacementReason.get1(); 891 if (element.equals("timezone")) { 892 element = "zone"; 893 } 894 try { 895 localeAliasInfo.get(element).put(type, replacement == null ? "?" : replacement); 896 } catch (Exception e) { 897 // TODO Auto-generated catch block 898 throw new IllegalArgumentException("Can't find alias data for '" + element + "'", e); 899 } 900 901 String name = ""; 902 if (replacement == null) { 903 name = "(none)"; 904 } else if (element.equals("language")) { 905 name = getName(replacement, false); 906 } else if (element.equals("zone")) { 907 element = "timezone"; 908 name = replacement + "*"; 909 } else { 910 int typeCode = CLDRFile.typeNameToCode(element); 911 if (typeCode >= 0) { 912 name = getName(typeCode, replacement, false); 913 } else { 914 name = "*" + replacement; 915 } 916 } 917 if (element.equals("territory")) { 918 territoryAliases.put(type, name); 919 aliases 920 .add(new String[] { element, getName(CLDRFile.TERRITORY_NAME, type, false), name, reason }); 921 } else { 922 aliases.add(new String[] { element, type, name, reason }); 923 } 924 continue; 925 } 926 } 927 Log.setLog(CLDRPaths.CHART_DIRECTORY + "supplemental/", "characterLog.txt"); 928 Log.close(); 929 } 930 printLikelySubtags(PrintWriter index)931 public void printLikelySubtags(PrintWriter index) throws IOException { 932 933 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Likely Subtags", null, SUPPLEMENTAL_INDEX_ANCHORS)); 934 935 TablePrinter tablePrinter = new TablePrinter() 936 .addColumn("Source Lang", "class='source'", null, "class='source'", true).setSortPriority(1) 937 .setSpanRows(false) 938 .addColumn("Source Script", "class='source'", null, "class='source'", true).setSortPriority(0) 939 .setSpanRows(false).setBreakSpans(true) 940 .addColumn("Source Region", "class='source'", null, "class='source'", true).setSortPriority(2) 941 .setSpanRows(false) 942 .addColumn("Target Lang", "class='target'", null, "class='target'", true).setSortPriority(3) 943 .setBreakSpans(true) 944 .addColumn("Target Script", "class='target'", null, "class='target'", true).setSortPriority(4) 945 .addColumn("Target Region", "class='target'", null, "class='target'", true).setSortPriority(5) 946 .addColumn("Source ID", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 947 .addColumn("Target ID", "class='target'", null, "class='target'", true); 948 Map<String, String> subtags = supplementalDataInfo.getLikelySubtags(); 949 LanguageTagParser sourceParsed = new LanguageTagParser(); 950 LanguageTagParser targetParsed = new LanguageTagParser(); 951 for (String source : subtags.keySet()) { 952 String target = subtags.get(source); 953 sourceParsed.set(source); 954 targetParsed.set(target); 955 tablePrinter.addRow() 956 .addCell(getName(CLDRFile.LANGUAGE_NAME, sourceParsed.getLanguage())) 957 .addCell(getName(CLDRFile.SCRIPT_NAME, sourceParsed.getScript())) 958 .addCell(getName(CLDRFile.TERRITORY_NAME, sourceParsed.getRegion())) 959 .addCell(getName(CLDRFile.LANGUAGE_NAME, targetParsed.getLanguage())) 960 .addCell(getName(CLDRFile.SCRIPT_NAME, targetParsed.getScript())) 961 .addCell(getName(CLDRFile.TERRITORY_NAME, targetParsed.getRegion())) 962 .addCell(source) 963 .addCell(target) 964 .finishRow(); 965 } 966 pw.println(tablePrinter.toTable()); 967 pw.close(); 968 } 969 970 static class LanguageData extends R4<Double, Double, Double, String> { LanguageData(Double a, Double b, Double c, String d)971 public LanguageData(Double a, Double b, Double c, String d) { 972 super(a, b, c, d); 973 } 974 } 975 getName(final int type, final String value)976 private String getName(final int type, final String value) { 977 if (value == null || value.equals("") || value.equals("und")) { 978 return "\u00A0"; 979 } 980 String result = english.getName(type, value); 981 if (result == null) { 982 result = value; 983 } 984 return result; 985 } 986 987 static final Comparator INVERSE_COMPARABLE = new Comparator() { 988 @Override 989 public int compare(Object o1, Object o2) { 990 return ((Comparable) o2).compareTo(o1); 991 } 992 }; 993 994 // http://www.faqs.org/rfcs/rfc2396.html 995 // delims = "<" | ">" | "#" | "%" | <"> 996 // "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" 997 // Within a query component, the characters ";", "/", "?", ":", "@", 998 // "&", "=", "+", ",", and "$" are reserved. 999 static final UnicodeSet ESCAPED_URI_QUERY = new UnicodeSet( 1000 "[\\u0000-\\u0020\\u007F <>#%\"\\{}|\\\\\\^\\[\\]`;/?:@\\&=+,$\\u0080-\\U0001FFFF]").freeze(); 1001 1002 private static final int MINIMAL_BIG_VENDOR = 8; 1003 1004 static { System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement())1005 System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement()); 1006 } 1007 urlEncode(String input)1008 private String urlEncode(String input) { 1009 try { 1010 byte[] utf8 = input.getBytes("utf-8"); 1011 StringBuffer output = new StringBuffer(); 1012 for (int i = 0; i < utf8.length; ++i) { 1013 int b = utf8[i] & 0xFF; 1014 if (ESCAPED_URI_QUERY.contains(b)) { 1015 output.append('%'); 1016 if (b < 0x10) output.append('0'); 1017 output.append(Integer.toString(b, 16)); 1018 } else { 1019 output.append((char) b); 1020 } 1021 } 1022 return output.toString(); 1023 } catch (UnsupportedEncodingException e) { 1024 throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); 1025 } 1026 } 1027 addBug(int bugNumber, String text, String from, String subject, String body)1028 private String addBug(int bugNumber, String text, String from, String subject, String body) { 1029 return "<a target='_blank' href='" + CLDRURLS.CLDR_NEWTICKET_URL 1030 + "'>" + text + "</a>"; 1031 } 1032 showLanguageCountryInfo(PrintWriter pw)1033 private void showLanguageCountryInfo(PrintWriter pw) throws IOException { 1034 FormattedFileWriter ffw = new FormattedFileWriter(null, "Language-Territory Information", 1035 null 1036 // "<div style='margin:1em'><p>The language data is provided for localization testing, and is under development for CLDR 1.5. " 1037 // + 1038 // "To add a new territory for a language, see the <i>add new</i> links below. " + 1039 // "For more information, see <a href=\"territory_language_information.html\">Territory-Language Information.</a>" 1040 // + 1041 // "<p></div>" 1042 , SUPPLEMENTAL_INDEX_ANCHORS); 1043 PrintWriter pw21 = new PrintWriter(ffw); 1044 PrintWriter pw2 = pw21; 1045 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1046 nf.setGroupingUsed(true); 1047 //NumberFormat percent = new DecimalFormat("000.0%"); 1048 TablePrinter tablePrinter = new TablePrinter() 1049 // tablePrinter.setSortPriorities(0,5) 1050 .addColumn("L", "class='source'", null, "class='source'", true) 1051 .setSortPriority(0) 1052 .setBreakSpans(true) 1053 .setRepeatHeader(true) 1054 .setHidden(true) 1055 .addColumn("Language", "class='source'", null, "class='source'", true) 1056 .setSortPriority(0) 1057 .setBreakSpans(true) 1058 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 1059 // .addColumn("Report Bug", "class='target'", null, "class='target'", false) 1060 .addColumn("Territory", "class='target'", null, "class='target'", true) 1061 .addColumn("Code", "class='target'", "<a href=\"territory_language_information.html#{0}\">{0}</a>", 1062 "class='target'", true) 1063 .addColumn("Language Population", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1064 .setSortPriority(1).setSortAscending(false) 1065 // .addColumn("Territory Population", "class='target'", "{0,number,#,##0}", "class='targetRight'", true) 1066 // .addColumn("Language Literacy", "class='target'", "{0,number,00.0}%", "class='targetRight'", true) 1067 // .addColumn("Territory Literacy", "class='target'", "{0,number,00.0}%", "class='targetRight'", true) 1068 // .addColumn("Territory GDP (PPP)", "class='target'", "{0,number,#,##0}", "class='targetRight'", true) 1069 ; 1070 TreeSet<String> languages = new TreeSet<>(); 1071 Collection<Comparable[]> data = new ArrayList<>(); 1072 String msg = "<br><i>Please click on each country code</i>"; 1073 1074 Collection<Comparable[]> plainData = new ArrayList<>(); 1075 1076 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1077 // PopulationData territoryData = supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1078 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1079 for (String languageCode : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territoryCode)) { 1080 PopulationData languageData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1081 languages.add(languageCode); 1082 Comparable[] items = new Comparable[] { 1083 getFirstPrimaryWeight(getLanguageName(languageCode)), 1084 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode), 1085 languageCode, 1086 // bug, 1087 territoryName + getOfficialStatus(territoryCode, languageCode), 1088 territoryCode, 1089 languageData.getPopulation(), 1090 // population, 1091 // languageliteracy, 1092 // territoryLiteracy, 1093 // gdp 1094 }; 1095 Comparable[] plainItems = new Comparable[] { 1096 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode), 1097 languageCode, 1098 territoryName, 1099 territoryCode, 1100 getRawOfficialStatus(territoryCode, languageCode), 1101 languageData.getPopulation(), 1102 languageData.getLiteratePopulation() 1103 }; 1104 1105 data.add(items); 1106 plainData.add(plainItems); 1107 } 1108 } 1109 for (String languageCode : languages) { 1110 Comparable[] items = new Comparable[] { 1111 getFirstPrimaryWeight(getLanguageName(languageCode)), 1112 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode), 1113 languageCode, 1114 // bug, 1115 addBug(1217, "<i>add new</i>", "<email>", "Add territory to " + getLanguageName(languageCode) 1116 + " (" + languageCode + ")", "<territory, speaker population in territory, and references>"), 1117 "", 1118 0.0d, 1119 // 0.0d, 1120 // 0.0d, 1121 // 0.0d, 1122 // gdp 1123 }; 1124 data.add(items); 1125 } 1126 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1127 String value = tablePrinter.addRows(flattened).toTable(); 1128 pw2.println(value); 1129 pw2.close(); 1130 try (PrintWriter pw21plain = FileUtilities.openUTF8Writer(ffw.getDir(), ffw.getBaseFileName() + ".txt")) { 1131 for (Comparable[] row : plainData) { 1132 pw21plain.println(Joiner.on("\t").join(row)); 1133 } 1134 } 1135 } 1136 getLanguagePluralMessage(String msg, String languageCode)1137 private String getLanguagePluralMessage(String msg, String languageCode) { 1138 String mainLanguageCode = new LanguageTagParser().set(languageCode).getLanguage(); 1139 String messageWithPlurals = msg + ", on <a href='language_plural_rules.html#" + mainLanguageCode 1140 + "'>plurals</a>" + 1141 ", and on <a href='likely_subtags.html#" + mainLanguageCode + "'>likely-subtags</a>"; 1142 return messageWithPlurals; 1143 } 1144 getLanguageName(String languageCode)1145 private String getLanguageName(String languageCode) { 1146 String result = english.getName(languageCode); 1147 if (!result.equals(languageCode)) return result; 1148 Set<String> names = Iso639Data.getNames(languageCode); 1149 if (names != null && names.size() != 0) { 1150 return names.iterator().next(); 1151 } 1152 return languageCode; 1153 } 1154 1155 static final Set<Organization> TC_Vendors = ImmutableSet.of( 1156 Organization.apple, Organization.google, Organization.microsoft, Organization.cldr); 1157 showCoverageGoals(PrintWriter pw)1158 private void showCoverageGoals(PrintWriter pw) throws IOException { 1159 PrintWriter pw2 = new PrintWriter(new FormattedFileWriter(null, "Coverage Goals", 1160 null 1161 // "<p>" + 1162 // "The following show default coverage goals for larger organizations. " + 1163 // "<i>[n/a]</i> shows where there is no specific value for a given organization, " + 1164 // "while <i>(...)</i> indicates that the goal is inherited from the parent. " + 1165 // "A * is added if the goal differs from the parent locale's goal. " + 1166 // "For information on what these goals mean (comprehensive, modern, moderate,...), see the LDML specification " 1167 // + 1168 // "<a href='http://www.unicode.org/reports/tr35/#Coverage_Levels'>Appendix M: Coverage Levels</a>. " + 1169 // + 1170 // "</p>" 1171 , null)); 1172 1173 TablePrinter tablePrinter = new TablePrinter() 1174 // tablePrinter.setSortPriorities(0,4) 1175 .addColumn("Language", "class='source'", null, "class='source'", false) 1176 .setSortPriority(0) 1177 .setBreakSpans(false) 1178 .addColumn("Code", "class='source'", 1179 "<a href=\"http://www.unicode.org/cldr/data/common/main/{0}.xml\">{0}</a>", "class='source'", false) 1180 .addColumn("D. Votes", "class='target'", null, "class='target'", false); 1181 1182 Map<Organization, Map<String, Level>> vendordata = sc.getLocaleTypes(); 1183 Set<String> locales = new TreeSet<>(); 1184 Set<Organization> vendors = new LinkedHashSet<>(); 1185 Set<Organization> smallVendors = new LinkedHashSet<>(); 1186 1187 for (Organization organization : TC_Vendors) { 1188 //if (vendor.equals(Organization.java)) continue; 1189 Map<String, Level> data = vendordata.get(organization); 1190 vendors.add(organization); 1191 tablePrinter.addColumn(organization.getDisplayName(), "class='target'", null, "class='target'", false) 1192 .setSpanRows(false); 1193 locales.addAll(data.keySet()); 1194 } 1195 1196 for (Entry<Organization, Map<String, Level>> vendorData : vendordata.entrySet()) { 1197 Organization vendor = vendorData.getKey(); 1198 if (!TC_Vendors.contains(vendor)) { 1199 smallVendors.add(vendor); 1200 continue; 1201 } 1202 } 1203 1204 Collection<Comparable[]> data = new ArrayList<>(); 1205 List<String> list = new ArrayList<>(); 1206 LanguageTagParser ltp = new LanguageTagParser(); 1207 //String alias2 = getAlias("sh_YU"); 1208 1209 pw2.append("<h2>TC Orgs</h2>"); 1210 1211 for (String locale : locales) { 1212 list.clear(); 1213 String localeCode = locale.equals("*") ? "und" : locale; 1214 String alias = getAlias(localeCode); 1215 if (!alias.equals(localeCode)) { 1216 throw new IllegalArgumentException("Should use canonical form: " + locale + " => " + alias); 1217 } 1218 String baseLang = ltp.set(localeCode).getLanguage(); 1219 String baseLangName = getLanguageName(baseLang); 1220 list.add("und".equals(localeCode) ? "other" : baseLangName); 1221 list.add(locale); 1222 int defaultVotes = supplementalDataInfo.getRequiredVotes(CLDRLocale.getInstance(locale), null); 1223 list.add(String.valueOf(defaultVotes)); 1224 for (Organization vendor : vendors) { 1225 String status = getVendorStatus(locale, vendor, vendordata); 1226 // if (!baseLang.equals(locale) && !status.startsWith("<")) { 1227 // String langStatus = getVendorStatus(baseLang, vendor, vendordata); 1228 // if (!langStatus.equals(status)) { 1229 // status += "*"; 1230 // } 1231 // } 1232 list.add(status); 1233 } 1234 data.add(list.toArray(new String[list.size()])); 1235 } 1236 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1237 String value = tablePrinter.addRows(flattened).toTable(); 1238 pw2.println(value); 1239 1240 pw2.append("<h2>Others</h2><div align='left'><ul>"); 1241 1242 for (Organization vendor2 : smallVendors) { 1243 pw2.append("<li><b>"); 1244 pw2.append(TransliteratorUtilities.toHTML.transform( 1245 vendor2.getDisplayName())).append(": </b>"); 1246 boolean first1 = true; 1247 for (Level level : Level.values()) { 1248 boolean first2 = true; 1249 Level other = null; 1250 for (Entry<String, Level> data2 : vendordata.get(vendor2).entrySet()) { 1251 String key = data2.getKey(); 1252 Level level2 = data2.getValue(); 1253 if (level != level2) { 1254 continue; 1255 } 1256 if (key.equals("*")) { 1257 other = level2; 1258 continue; 1259 } 1260 if (first2) { 1261 if (first1) { 1262 first1 = false; 1263 } else { 1264 pw2.append("; "); 1265 } 1266 pw2.append(level2.toString()).append(": "); 1267 first2 = false; 1268 } else { 1269 pw2.append(", "); 1270 } 1271 pw2.append(TransliteratorUtilities.toHTML.transform(key)); 1272 } 1273 if (other != null) { 1274 if (first2) { 1275 if (first1) { 1276 first1 = false; 1277 } else { 1278 pw2.append("; "); 1279 } 1280 pw2.append(level.toString()).append(": "); 1281 first2 = false; 1282 } else { 1283 pw2.append(", "); 1284 } 1285 pw2.append("<i>other</i>"); 1286 } 1287 } 1288 pw2.append("</li>"); 1289 } 1290 pw2.append("</ul></div>"); 1291 pw2.close(); 1292 } 1293 1294 LanguageTagParser lpt2 = new LanguageTagParser(); 1295 1296 // TODO replace this with standard call. 1297 getAlias(String locale)1298 private String getAlias(String locale) { 1299 lpt2.set(locale); 1300 locale = lpt2.toString(); // normalize 1301 //String language = lpt2.getLanguage(); 1302 String script = lpt2.getScript(); 1303 String region = lpt2.getRegion(); 1304 // List variants = lpt2.getVariants(); 1305 String temp; 1306 for (String old : localeAliasInfo.get("language").keySet()) { 1307 if (locale.startsWith(old)) { 1308 // the above is a rough check, and will fail with old=moh and locale=mo 1309 if (!locale.equals(old) && !locale.startsWith(old + "_")) { 1310 continue; 1311 } 1312 temp = localeAliasInfo.get("language").get(old); 1313 lpt2.setLanguage(temp.split("\\s+")[0] + locale.substring(old.length())); 1314 break; 1315 } 1316 } 1317 temp = localeAliasInfo.get("script").get(script); 1318 if (temp != null) { 1319 lpt2.setScript(temp.split("\\s+")[0]); 1320 } 1321 temp = localeAliasInfo.get("territory").get(region); 1322 if (temp != null) { 1323 lpt2.setRegion(temp.split("\\s+")[0]); 1324 } 1325 return lpt2.toString(); 1326 } 1327 getVendorStatus(String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata)1328 private String getVendorStatus(String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata) { 1329 Level statusLevel = vendordata.get(vendor).get(locale); 1330 return statusLevel == null ? "" : statusLevel.toString(); 1331 // String status = statusLevel == null ? null : statusLevel.toString(); 1332 // String curLocale = locale; 1333 // while (status == null) { 1334 // curLocale = LocaleIDParser.getParent(curLocale); 1335 // if ("root".equals(curLocale)) { 1336 // status = " "; 1337 // break; 1338 // } 1339 // statusLevel = vendordata.get(vendor).get(curLocale); 1340 // if (statusLevel != null) { 1341 // status = statusLevel + "†"; 1342 // } 1343 // } 1344 // return status; 1345 } 1346 showCountryLanguageInfo(PrintWriter pw)1347 private void showCountryLanguageInfo(PrintWriter pw) throws IOException { 1348 PrintWriter pw21 = new PrintWriter(new FormattedFileWriter(null, "Territory-Language Information", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1349 PrintWriter pw2 = pw21; 1350 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1351 nf.setGroupingUsed(true); 1352 //NumberFormat percent = new DecimalFormat("000.0%"); 1353 TablePrinter tablePrinter = new TablePrinter() 1354 // tablePrinter.setSortPriorities(0,4) 1355 .addColumn("T", "class='source'", null, "class='source'", true) 1356 .setSortPriority(0) 1357 .setBreakSpans(true) 1358 .setRepeatHeader(true) 1359 .setHidden(true) 1360 .addColumn("Territory", "class='source'", null, "class='source'", true) 1361 .setSortPriority(0) 1362 .setBreakSpans(true) 1363 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), 1364 "class='source'", true) 1365 .addColumn("Terr. Literacy", "class='target'", "{0,number,@@}%", "class='targetRight'", true); 1366 1367 tablePrinter 1368 .addColumn("Language", "class='target'", null, "class='target'", false) 1369 .addColumn("Code", "class='target'", "<a href=\"language_territory_information.html#{0}\">{0}</a>", 1370 "class='target'", false) 1371 .addColumn("Lang. Pop.", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1372 .addColumn("Pop.%", "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1373 .setSortAscending(false).setSortPriority(1) 1374 .addColumn("Literacy%", "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1375 .addColumn("Written%", "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1376 .addColumn("Report Bug", "class='target'", null, "class='target'", false); 1377 1378 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1379 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1380 PopulationData territoryData2 = supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1381 double territoryLiteracy = territoryData2.getLiteratePopulationPercent(); 1382 1383 for (String languageCode : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territoryCode)) { 1384 PopulationData languageData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1385 double languagePopulationPercent = 100 * languageData.getPopulation() / territoryData2.getPopulation(); 1386 double languageliteracy = languageData.getLiteratePopulationPercent(); 1387 double writingFrequency = languageData.getWritingPercent(); 1388 1389 tablePrinter.addRow() 1390 .addCell(getFirstPrimaryWeight(territoryName)) 1391 .addCell(territoryName) 1392 .addCell(territoryCode) 1393 .addCell(territoryLiteracy) 1394 .addCell(getLanguageName(languageCode) + getOfficialStatus(territoryCode, languageCode)) 1395 .addCell(languageCode) 1396 .addCell(languageData.getPopulation()) 1397 .addCell(languagePopulationPercent) 1398 .addCell(languageliteracy) 1399 .addCell(writingFrequency) 1400 .addCell( 1401 addBug(1217, "<i>bug</i>", "<email>", "Fix info for " + getLanguageName(languageCode) 1402 + " (" + languageCode + ")" 1403 + " in " + territoryName + " (" + territoryCode + ")", 1404 "<fixed data for territory, plus references>")) 1405 .finishRow(); 1406 } 1407 1408 tablePrinter.addRow() 1409 .addCell(getFirstPrimaryWeight(territoryName)) 1410 .addCell(territoryName) 1411 .addCell(territoryCode) 1412 .addCell(territoryLiteracy) 1413 .addCell( 1414 addBug(1217, "<i>add new</i>", "<email>", "Add language to " + territoryName + "(" 1415 + territoryCode + ")", 1416 "<language, speaker pop. and literacy in territory, plus references>")) 1417 .addCell("") 1418 .addCell(0.0d) 1419 .addCell(0.0d) 1420 .addCell(0.0d) 1421 .addCell(0.0d) 1422 .addCell("") 1423 .finishRow(); 1424 1425 } 1426 String value = tablePrinter.toTable(); 1427 pw2.println(value); 1428 pw2.close(); 1429 } 1430 showCountryInfo(PrintWriter pw)1431 private void showCountryInfo(PrintWriter pw) throws IOException { 1432 PrintWriter pw21 = new PrintWriter(new FormattedFileWriter(null, "Territory Information", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1433 PrintWriter pw2 = pw21; 1434 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1435 nf.setGroupingUsed(true); 1436 //NumberFormat percent = new DecimalFormat("000.0%"); 1437 TablePrinter tablePrinter = new TablePrinter() 1438 // tablePrinter.setSortPriorities(0,4) 1439 .addColumn("T", "class='source'", null, "class='source'", true) 1440 .setSortPriority(0) 1441 .setBreakSpans(true) 1442 .setRepeatHeader(true) 1443 .setHidden(true) 1444 .addColumn("Territory", "class='source'", null, "class='source'", true) 1445 .setSortPriority(0) 1446 .setBreakSpans(true) 1447 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), 1448 "class='source'", true) 1449 .addColumn("Terr. Pop (M)", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1450 .addColumn("Terr. GDP ($M PPP)", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1451 .addColumn("Currencies (2006...)", "class='target'", null, "class='target'", true); 1452 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext();) { 1453 String header = it.next(); 1454 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1455 tablePrinter.addColumn(header).setHeaderAttributes("class='target'") 1456 .setCellAttributes("class='target'").setSpanRows(true); 1457 } 1458 1459 tablePrinter 1460 .addColumn("Report Bug", "class='target'", null, "class='target'", false); 1461 1462 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1463 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1464 PopulationData territoryData2 = supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1465 double population = territoryData2.getPopulation() / 1000000; 1466 double gdp = territoryData2.getGdp() / 1000000; 1467 1468 Map<String, Set<String>> worldData = territoryData.get(getName(CLDRFile.TERRITORY_NAME, "001", false)); 1469 Map<String, Set<String>> countryData = territoryData.get(getName(CLDRFile.TERRITORY_NAME, territoryCode, false)); 1470 1471 tablePrinter.addRow() 1472 .addCell(getFirstPrimaryWeight(territoryName)) 1473 .addCell(territoryName) 1474 .addCell(territoryCode) 1475 .addCell(population) 1476 .addCell(gdp) 1477 .addCell(getCurrencyNames(territoryCode)); 1478 1479 addOtherCountryData(tablePrinter, worldData, countryData); 1480 1481 tablePrinter 1482 .addCell( 1483 addBug(1217, "<i>bug</i>", "<email>", "Fix info for " + territoryName + " (" + territoryCode + ")", 1484 "<fixed data for territory, plus references>")) 1485 .finishRow(); 1486 1487 } 1488 String value = tablePrinter.toTable(); 1489 pw2.println(value); 1490 pw2.close(); 1491 } 1492 1493 static Normalizer2 nfd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE); 1494 1495 // Do just an approximation for now 1496 getFirstPrimaryWeight(String territoryName)1497 private String getFirstPrimaryWeight(String territoryName) { 1498 char first = territoryName.charAt(0); 1499 String result = nfd.getDecomposition(first); 1500 if (result == null) { 1501 return UTF16.valueOf(first); 1502 } 1503 return UTF16.valueOf(result.codePointAt(0)); 1504 } 1505 1506 // private String getTerritoryWithLikelyLink(String territoryCode) { 1507 // return "<a href='likely_subtags.html#und_"+ territoryCode + "'>" + territoryCode + "</a>"; 1508 // } 1509 getOfficialStatus(String territoryCode, String languageCode)1510 private String getOfficialStatus(String territoryCode, String languageCode) { 1511 PopulationData x = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1512 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1513 return " <span title='" + x.getOfficialStatus().toString().replace('_', ' ') + "'>{" 1514 + x.getOfficialStatus().toShortString() + "}</span>"; 1515 } 1516 getRawOfficialStatus(String territoryCode, String languageCode)1517 private String getRawOfficialStatus(String territoryCode, String languageCode) { 1518 PopulationData x = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1519 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1520 return x.getOfficialStatus().toString(); 1521 } 1522 addOtherCountryData(TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData)1523 private void addOtherCountryData(TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData) { 1524 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext();) { 1525 String type = it2.next(); 1526 Set<String> worldResults = worldData.get(type); 1527 Set<String> territoryResults = null; 1528 if (countryData != null) { 1529 territoryResults = countryData.get(type); 1530 } 1531 if (territoryResults == null) { 1532 territoryResults = worldResults; 1533 } 1534 String out = ""; 1535 if (territoryResults != null) { 1536 out = territoryResults + ""; 1537 out = out.substring(1, out.length() - 1); // remove [ and ] 1538 } 1539 tablePrinter.addCell(out); 1540 } 1541 } 1542 getCurrencyNames(String territoryCode)1543 private String getCurrencyNames(String territoryCode) { 1544 Set<String> currencies = territoriesToModernCurrencies.getAll(territoryCode); 1545 if (currencies == null || currencies.size() == 0) return ""; 1546 StringBuilder buffer = new StringBuilder(); 1547 for (String code : currencies) { 1548 if (buffer.length() != 0) buffer.append(",<br>"); 1549 buffer.append(getName(CLDRFile.CURRENCY_NAME, code, false)); 1550 } 1551 return buffer.toString(); 1552 } 1553 addCharSubstitution(String value, String substitute)1554 private void addCharSubstitution(String value, String substitute) { 1555 if (substitute.equals(value)) 1556 return; 1557 LinkedHashSet<String> already = charSubstitutions.get(value); 1558 if (already == null) 1559 charSubstitutions.put(value, already = new LinkedHashSet<>(0)); 1560 already.add(substitute); 1561 Log.logln(hex(value, " ") + "; " + hex(substitute, " ")); 1562 } 1563 1564 /** 1565 * 1566 */ 1567 // public void showTerritoryInfo() { 1568 // Map territory_parent = new TreeMap(); 1569 // gather("001", territory_parent); 1570 // for (Iterator it = territory_parent.keySet().iterator(); it.hasNext();) { 1571 // String territory = (String) it.next(); 1572 // String parent = (String) territory_parent.get(territory); 1573 // System.out.println(territory + "\t" + english.getName(english.TERRITORY_NAME, territory) + "\t" 1574 // + parent + "\t" + english.getName(english.TERRITORY_NAME, parent)); 1575 // } 1576 // } 1577 1578 // private void gather(String item, Map territory_parent) { 1579 // Collection containedByItem = (Collection) group_contains.get(item); 1580 // if (containedByItem == null) 1581 // return; 1582 // for (Iterator it = containedByItem.iterator(); it.hasNext();) { 1583 // String contained = (String) it.next(); 1584 // territory_parent.put(contained, item); 1585 // gather(contained, territory_parent); 1586 // } 1587 // } 1588 addTerritoryInfo(String territoriesList, String type, String info)1589 private void addTerritoryInfo(String territoriesList, String type, String info) { 1590 String[] territories = territoriesList.split("\\s+"); 1591 territoryTypes.add(type); 1592 for (int i = 0; i < territories.length; ++i) { 1593 String territory = getName(CLDRFile.TERRITORY_NAME, territories[i], false); 1594 Map<String, Set<String>> s = territoryData.get(territory); 1595 if (s == null) { 1596 territoryData.put(territory, s = new TreeMap<>()); 1597 } 1598 Set<String> ss = s.get(type); 1599 if (ss == null) { 1600 s.put(type, ss = new TreeSet<>()); 1601 } 1602 ss.add(info); 1603 } 1604 } 1605 showCalendarData(PrintWriter pw0)1606 public void showCalendarData(PrintWriter pw0) throws IOException { 1607 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Other Territory Data", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1608 pw.println("<table>"); 1609 pw.println("<tr><th class='source'>Territory</th>"); 1610 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext();) { 1611 String header = it.next(); 1612 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1613 pw.println("<th class='target'>" + header + "</th>"); 1614 } 1615 pw.println("</tr>"); 1616 1617 String worldName = getName(CLDRFile.TERRITORY_NAME, "001", false); 1618 Map<String, Set<String>> worldData = territoryData.get(worldName); 1619 for (Iterator<String> it = territoryData.keySet().iterator(); it.hasNext();) { 1620 String country = it.next(); 1621 if (country.equals(worldName)) 1622 continue; 1623 showCountry(pw, country, country, worldData); 1624 } 1625 showCountry(pw, worldName, "Other", worldData); 1626 pw.println("</table>"); 1627 pw.close(); 1628 } 1629 showCountry(PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData)1630 private void showCountry(PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData) { 1631 pw.println("<tr><td class='source'>" + countryTitle + "</td>"); 1632 Map<String, Set<String>> data = territoryData.get(country); 1633 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext();) { 1634 String type = it2.next(); 1635 String target = "target"; 1636 Set<String> results = data.get(type); 1637 Set<String> worldResults = worldData.get(type); 1638 if (results == null) { 1639 results = worldResults; 1640 target = "target2"; 1641 } else if (results.equals(worldResults)) { 1642 target = "target2"; 1643 } 1644 String out = ""; 1645 if (results != null) { 1646 out = results + ""; 1647 out = out.substring(1, out.length() - 1); // remove [ and ] 1648 } 1649 pw.println("<td class='" + target + "'>" + out + "</td>"); 1650 } 1651 pw.println("</tr>"); 1652 } 1653 showCorrespondances()1654 public void showCorrespondances() { 1655 // show correspondances between language and script 1656 Map<String, String> name_script = new TreeMap<>(); 1657 for (Iterator<String> it = sc.getAvailableCodes("script").iterator(); it.hasNext();) { 1658 String script = it.next(); 1659 String name = english.getName(CLDRFile.SCRIPT_NAME, script); 1660 if (name == null) 1661 name = script; 1662 name_script.put(name, script); 1663 /* 1664 * source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages 1665 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories 1666 * : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages 1667 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts 1668 */} 1669 String delimiter = "\\P{L}+"; 1670 Map<String, String> name_language = new TreeMap<>(); 1671 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); it.hasNext();) { 1672 String language = it.next(); 1673 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 1674 if (names == null) 1675 names = language; 1676 name_language.put(names, language); 1677 } 1678 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); it.hasNext();) { 1679 String language = it.next(); 1680 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 1681 if (names == null) 1682 names = language; 1683 String[] words = names.split(delimiter); 1684 if (words.length > 1) { 1685 // System.out.println(names); 1686 } 1687 for (int i = 0; i < words.length; ++i) { 1688 String name = words[i]; 1689 String script = name_script.get(name); 1690 if (script != null) { 1691 Set<String> langSet = (Set<String>) script_languages.asMap().get(script); 1692 if (langSet != null && langSet.contains(language)) 1693 System.out.print("*"); 1694 System.out.println("\t" + name + " [" + language + "]\t=> " + name + " [" + script + "]"); 1695 } else { 1696 String language2 = name_language.get(name); 1697 if (language2 != null && !language.equals(language2)) { 1698 Set<String> langSet = (Set<String>) language_scripts.get(language); 1699 if (langSet != null) 1700 System.out.print("*"); 1701 System.out.print("?\tSame script?\t + " + getName(CLDRFile.LANGUAGE_NAME, language, false) 1702 + "\t & " + getName(CLDRFile.LANGUAGE_NAME, language2, false)); 1703 langSet = (Set<String>) language_scripts.get(language2); 1704 if (langSet != null) 1705 System.out.print("*"); 1706 System.out.println(); 1707 } 1708 } 1709 } 1710 } 1711 } 1712 1713 /** 1714 * @throws IOException 1715 * 1716 */ printCurrency(PrintWriter index)1717 public void printCurrency(PrintWriter index) throws IOException { 1718 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Detailed Territory-Currency Information", 1719 null 1720 // "<p>The following table shows when currencies were in use in different countries. " + 1721 // "See also <a href='#format_info'>Decimal Digits and Rounding</a>. " + 1722 // "To correct any information here, please file a " + 1723 // addBug(1274, "bug", "<email>", "Currency Bug", 1724 // "<currency, country, and references supporting change>") + 1725 // ".</p>" 1726 , SUPPLEMENTAL_INDEX_ANCHORS)); 1727 String section1 = "Territory to Currency"; 1728 String section2 = "Decimal Digits and Rounding"; 1729 showContents(pw, "territory_currency", section1, "format_info", section2); 1730 1731 pw.println("<h2>" + CldrUtility.getDoubleLinkedText("territory_currency", "1. " + section1) + "</h2>"); 1732 1733 // doTitle(pw, "Territory \u2192 Currency"); 1734 pw.println("<table>"); 1735 pw.println("<tr><th class='source'>Territory</th>" + 1736 "<th class='source'>Code</th>" + 1737 "<th class='target'>From</th>" + 1738 "<th class='target'>To</th>" + 1739 "<th class='target'>Currency</th>" + 1740 "<th class='target'>Name</th>" + 1741 "</tr>"); 1742 1743 Relation<String, String> currencyToTerritory = Relation.of(new HashMap<String, Set<String>>(), 1744 HashSet.class); 1745 Relation<String, String> modernCurrencyToTerritory = Relation.of(new HashMap<String, Set<String>>(), 1746 HashSet.class); 1747 1748 for (Entry<String, String> nameCode : NAME_TO_REGION.entrySet()) { 1749 String name = nameCode.getKey(); 1750 String regionCode = nameCode.getValue(); 1751 if (!StandardCodes.isCountry(regionCode)) { 1752 continue; 1753 } 1754 if (sc.isLstregPrivateUse("region", regionCode)) { 1755 continue; 1756 } 1757 Set<CurrencyDateInfo> info = supplementalDataInfo.getCurrencyDateInfo(regionCode); 1758 1759 int infoSize = 1; 1760 if (info != null) { 1761 infoSize = info.size(); 1762 } 1763 pw.println("<tr>" + 1764 "<td class='source' rowSpan='" + infoSize + "'>" + name + "</td>" + 1765 "<td class='source' rowSpan='" + infoSize + "'>" + CldrUtility.getDoubleLinkedText(regionCode) 1766 + "</td>"); 1767 if (info == null) { 1768 pw.println("<td class='target'>" + "<i>na</i>" + "</td>" + 1769 "<td class='target'>" + "<i>na</i>" + "</td>" + 1770 "<td class='target'>" + "<i>na</i>" + "</td>" + 1771 "<td class='target'>" + "<i>na</i>" + "</td>" + 1772 "</tr>"); 1773 continue; 1774 } 1775 boolean first = true; 1776 for (CurrencyDateInfo infoItem : info) { 1777 Date endData = infoItem.getEnd(); 1778 if (endData.equals(CurrencyDateInfo.END_OF_TIME)) { 1779 modernCurrencyToTerritory.put(infoItem.getCurrency(), getTerritoryName(regionCode)); 1780 } else { 1781 currencyToTerritory.put(infoItem.getCurrency(), getTerritoryName(regionCode)); 1782 } 1783 if (first) 1784 first = false; 1785 else 1786 pw.println("<tr>"); 1787 pw.println("<td class='target'>" + CurrencyDateInfo.formatDate(infoItem.getStart()) + "</td>" + 1788 "<td class='target'>" + CurrencyDateInfo.formatDate(endData) + "</td>" + 1789 "<td class='target'>" + infoItem.getCurrency() + "</td>" + 1790 "<td class='target'>" + english.getName("currency", infoItem.getCurrency()) + "</td>" + 1791 "</tr>"); 1792 } 1793 } 1794 // doFooter(pw); 1795 // pw.close(); 1796 // pw = new PrintWriter(new FormattedFileWriter(index, "Currency Format Info", null)); 1797 pw.write("</table>"); 1798 1799 pw.println("<h2>" + CldrUtility.getDoubleLinkedText("format_info", "2. " + section2) + "</h2>"); 1800 1801 pw.write("<p>This table shows the number of digits used for each currency, " 1802 + " and the countries where it is or was in use. " 1803 + "Countries where the currency is in current use are bolded. " 1804 + "If the currency uses ‘nickel rounding’ in transactions, the digits are followed by ‘(5)’. " 1805 + "Where the values are different in a cash context, that is shown in a second column." 1806 + "</p>"); 1807 pw.write("<div align='center'><table>"); 1808 1809 // doTitle(pw, "Currency Format Info"); 1810 // <info iso4217="CZK" digits="2" rounding="0" cashDigits="0" cashRounding="0"/> 1811 1812 pw.println("<tr>" + 1813 "<th class='source nowrap'>Name</th>" + 1814 "<th class='source'>Currency</th>" + 1815 "<th class='target'>Digits</th>" + 1816 "<th class='target'>Cash Digits</th>" + 1817 "<th class='target'>Countries</th>" + 1818 "</tr>"); 1819 Set<String> currencyList = new TreeSet<String>(col); 1820 currencyList.addAll(currency_fractions.keySet()); 1821 currencyList.addAll(currency_territory.keySet()); 1822 1823 for (Entry<String, String> nameCode : NAME_TO_CURRENCY.entrySet()) { 1824 //String name = nameCode.getKey(); 1825 String currency = nameCode.getValue(); 1826 CurrencyNumberInfo info = supplementalDataInfo.getCurrencyNumberInfo(currency); 1827 Set<String> territories = currencyToTerritory.get(currency); 1828 Set<String> modernTerritories = modernCurrencyToTerritory.get(currency); 1829 1830 // String fractions = (String) currency_fractions.get(currency); 1831 // if (fractions == null) 1832 // fractions = defaultDigits; 1833 // Set territories = (Set) currency_territory.get(currency); 1834 pw.print("<tr>" + 1835 "<td class='source nowrap'>" 1836 + TransliteratorUtilities.toHTML.transform(english.getName("currency", currency)) + "</td>" + 1837 "<td class='source'>" + CldrUtility.getDoubleLinkedText(currency) + "</td>" + 1838 "<td class='target'>" + 1839 info.getDigits() 1840 + (info.getRounding() == 0 ? "" : " (" + info.getRounding() + ")") 1841 + "</td>" 1842 + "<td class='target'>" 1843 + (info.cashDigits == info.getDigits() && info.cashRounding == info.getRounding() ? "" : (info.cashDigits 1844 + (info.cashRounding == 0 ? "" : " (" + info.cashRounding + ")"))) 1845 + "</td>" + 1846 "<td class='target'>"); 1847 boolean first = true; 1848 boolean needBreak = false; 1849 if (modernTerritories != null) { 1850 needBreak = true; 1851 for (String territory : modernTerritories) { 1852 if (first) 1853 first = false; 1854 else 1855 pw.print(", "); 1856 pw.print("<b>" + territory + "</b>"); 1857 } 1858 } 1859 //boolean haveBreak = true; 1860 if (territories != null) { 1861 for (String territory : territories) { 1862 if (first) 1863 first = false; 1864 else if (!needBreak) 1865 pw.print(", "); 1866 else { 1867 pw.print(",<br>"); 1868 needBreak = false; 1869 } 1870 pw.print(territory); 1871 } 1872 } 1873 pw.println("</td></tr>"); 1874 } 1875 pw.println("</table>"); 1876 pw.close(); 1877 // doFooter(pw); 1878 1879 // if (false) { 1880 // doTitle(pw, "Territories Versus Currencies"); 1881 // pw.println("<tr><th>Territories Without Currencies</th><th>Currencies Without Territories</th></tr>"); 1882 // pw.println("<tr><td class='target'>"); 1883 // Set territoriesWithoutCurrencies = new TreeSet(); 1884 // territoriesWithoutCurrencies.addAll(sc.getGoodAvailableCodes("territory")); 1885 // territoriesWithoutCurrencies.removeAll(territoriesWithCurrencies); 1886 // territoriesWithoutCurrencies.removeAll(group_contains.keySet()); 1887 // boolean first = true; 1888 // for (Iterator it = territoriesWithoutCurrencies.iterator(); it.hasNext();) { 1889 // if (first) first = false; 1890 // else pw.print(", "); 1891 // pw.print(english.getName(CLDRFile.TERRITORY_NAME, it.next().toString(), false)); 1892 // } 1893 // pw.println("</td><td class='target'>"); 1894 // Set currenciesWithoutTerritories = new TreeSet(); 1895 // currenciesWithoutTerritories.addAll(sc.getGoodAvailableCodes("currency")); 1896 // currenciesWithoutTerritories.removeAll(currenciesWithTerritories); 1897 // first = true; 1898 // for (Iterator it = currenciesWithoutTerritories.iterator(); it.hasNext();) { 1899 // if (first) first = false; 1900 // else pw.print(", "); 1901 // pw.print(english.getName(CLDRFile.CURRENCY_NAME, it.next().toString(), false)); 1902 // } 1903 // pw.println("</td></tr>"); 1904 // doFooter(pw); 1905 // } 1906 } 1907 getTerritoryName(String territory)1908 private String getTerritoryName(String territory) { 1909 String name; 1910 name = english.getName("territory", territory); 1911 if (name == null) { 1912 name = sc.getData("territory", territory); 1913 } 1914 if (name != null) { 1915 return TransliteratorUtilities.toHTML.transform(name) + " (" + territory + ")"; 1916 } else { 1917 return territory; 1918 } 1919 } 1920 1921 /** 1922 * @throws IOException 1923 * 1924 */ printAliases(PrintWriter index)1925 public void printAliases(PrintWriter index) throws IOException { 1926 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Aliases", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1927 1928 // doTitle(pw, "Aliases"); 1929 pw.println("<table>"); 1930 pw.println("<tr><th class='source'>" + "Type" + "</th>" + 1931 "<th class='source'>" + "Code" + "</th>" + 1932 "<th class='target'>" + "Reason" + "</th>" + 1933 "<th class='target'>" + "Substitute (if available)" + "</th></tr>"); 1934 for (Iterator<String[]> it = aliases.iterator(); it.hasNext();) { 1935 String[] items = it.next(); 1936 pw.println("<tr><td class='source'>" + items[0] + "</td>" + 1937 "<td class='source'>" + CldrUtility.getDoubleLinkedText(items[1]) + "</td>" + 1938 "<td class='target'>" + items[3] + "</td>" + 1939 "<td class='target'>" + items[2] + "</td></tr>"); 1940 } 1941 // doFooter(pw); 1942 pw.println("</table>"); 1943 pw.close(); 1944 } 1945 1946 // deprecatedItems 1947 // public void printDeprecatedItems(PrintWriter pw) { 1948 // doTitle(pw, "Deprecated Items"); 1949 // pw.print("<tr><td class='z0'><b>Type</b></td><td class='z1'><b>Elements</b></td><td class='z2'><b>Attributes</b></td><td class='z4'><b>Values</b></td>"); 1950 // for (Iterator it = deprecatedItems.iterator(); it.hasNext();) { 1951 // Map source = (Map)it.next(); 1952 // Object item; 1953 // pw.print("<tr>"); 1954 // pw.print("<td class='z0'>" + ((item = source.get("type")) != null ? item : "<i>any</i>") + "</td>"); 1955 // pw.print("<td class='z1'>" + ((item = source.get("elements")) != null ? item : "<i>any</i>") + "</td>"); 1956 // pw.print("<td class='z2'>" + ((item = source.get("attributes")) != null ? item : "<i>any</i>") + "</td>"); 1957 // pw.print("<td class='z4'>" + ((item = source.get("values")) != null ? item : "<i>any</i>") + "</td>"); 1958 // pw.print("</tr>"); 1959 // } 1960 // doFooter(pw); 1961 // } 1962 printWindows_Tzid(PrintWriter index)1963 public void printWindows_Tzid(PrintWriter index) throws IOException { 1964 Map<String, Map<String, Map<String, String>>> zoneMapping = supplementalDataInfo 1965 .getTypeToZoneToRegionToZone(); 1966 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Zone \u2192 Tzid", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1967 for (Entry<String, Map<String, Map<String, String>>> typeAndZoneToRegionToZone : zoneMapping.entrySet()) { 1968 String type = typeAndZoneToRegionToZone.getKey(); 1969 Map<String, Map<String, String>> zoneToRegionToZone = typeAndZoneToRegionToZone.getValue(); 1970 pw.println("<br><h1>Mapping for: " + type + "</h1><br>"); 1971 // doTitle(pw, "Windows \u2192 Tzid"); 1972 pw.println("<table>"); 1973 pw.println("<tr><th class='source'>" + type + "</th><th class='source'>" + "Region" 1974 + "</th><th class='target'>" + "TZID" + "</th></tr>"); 1975 1976 for (Entry<String, Map<String, String>> zoneAndregionToZone : zoneToRegionToZone.entrySet()) { 1977 String source = zoneAndregionToZone.getKey(); 1978 Map<String, String> regionToZone = zoneAndregionToZone.getValue(); 1979 for (Entry<String, String> regionAndZone : regionToZone.entrySet()) { 1980 String region = regionAndZone.getKey(); 1981 String target = regionAndZone.getValue(); 1982 if (region == null) region = "<i>any</a>"; 1983 pw.println("<tr><td class='source'>" + source + "</td><td class='source'>" + region 1984 + "</td><td class='target'>" + target + "</td></tr>"); 1985 } 1986 } 1987 // doFooter(pw); 1988 pw.println("</table>"); 1989 } 1990 pw.close(); 1991 } 1992 1993 // <info iso4217="ADP" digits="0" rounding="0"/> 1994 printCharacters(PrintWriter index)1995 public void printCharacters(PrintWriter index) throws IOException { 1996 String title = "Character Fallback Substitutions"; 1997 1998 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 1999 // doTitle(pw, title); 2000 pw.println("<table>"); 2001 2002 pw.println( 2003 "<tr><th colSpan='3'>Substitute for character (if not in repertoire)</th><th colSpan='4'>The following (in priority order, first string that <i>is</i> in repertoire)</th></tr>"); 2004 UnicodeSet chars = new UnicodeSet("[:NFKC_QuickCheck=N:]"); 2005 for (com.ibm.icu.text.UnicodeSetIterator it = new com.ibm.icu.text.UnicodeSetIterator(chars); it.next();) { 2006 String value = it.getString(); 2007 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFC)); 2008 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFKC)); 2009 } 2010 int[] counts = new int[4]; 2011 for (Iterator<String> it = charSubstitutions.keySet().iterator(); it.hasNext();) { 2012 String value = it.next(); 2013 LinkedHashSet<String> substitutes = charSubstitutions.get(value); 2014 String nfc = Normalizer.normalize(value, Normalizer.NFC); 2015 String nfkc = Normalizer.normalize(value, Normalizer.NFKC); 2016 2017 String sourceTag = "<td class='source'>"; 2018 if (substitutes.size() > 1) { 2019 sourceTag = "<td class='source' rowSpan='" + substitutes.size() + "'>"; 2020 } 2021 boolean first = true; 2022 for (Iterator<String> it2 = substitutes.iterator(); it2.hasNext();) { 2023 String substitute = it2.next(); 2024 String type = "Explicit"; 2025 String targetTag = "<td class='target3'>"; 2026 if (substitute.equals(nfc)) { 2027 type = "NFC"; 2028 targetTag = "<td class='target'>"; 2029 counts[2]++; 2030 } else if (substitute.equals(nfkc)) { 2031 type = "NFKC"; 2032 targetTag = "<td class='target4'>"; 2033 counts[3]++; 2034 } else { 2035 counts[0]++; 2036 } 2037 pw.println("<tr>" 2038 + (!first ? "" : sourceTag + hex(value, ", ") + "</td>" + sourceTag 2039 + TransliteratorUtilities.toHTML.transliterate(value) + "</td>" + sourceTag 2040 + UCharacter.getName(value, ", ") 2041 + "</td>") 2042 + targetTag + type + "</td>" + targetTag + hex(substitute, ", ") + "</td>" 2043 + targetTag + TransliteratorUtilities.toHTML.transliterate(substitute) + "</td>" + targetTag 2044 + UCharacter.getName(substitute, ", ") + "</td></tr>"); 2045 first = false; 2046 } 2047 } 2048 // doFooter(pw); 2049 pw.println("</table>"); 2050 2051 pw.close(); 2052 for (int i = 0; i < counts.length; ++i) { 2053 System.out.println("Count\t" + i + "\t" + counts[i]); 2054 } 2055 } 2056 hex(String s, String separator)2057 public static String hex(String s, String separator) { 2058 StringBuffer result = new StringBuffer(); 2059 int cp; 2060 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 2061 cp = UTF16.charAt(s, i); 2062 if (i != 0) 2063 result.append(separator); 2064 result.append(com.ibm.icu.impl.Utility.hex(cp)); 2065 } 2066 return result.toString(); 2067 } 2068 2069 /** 2070 * 2071 */ 2072 // private PrintWriter doTitle(PrintWriter pw, String title) { 2073 // //String anchor = FileUtilities.anchorize(title); 2074 // pw.println("<div align='center'><table>"); 2075 // //anchors.put(title, anchor); 2076 // //PrintWriter result = null; 2077 // //return result; 2078 // } 2079 2080 // private void doFooter(PrintWriter pw) { 2081 // pw.println("</table></div>"); 2082 // } printContains2(PrintWriter pw, String lead, String start, int depth, boolean isFirst)2083 public void printContains2(PrintWriter pw, String lead, String start, int depth, boolean isFirst) { 2084 String name = depth == 4 ? start : getName(CLDRFile.TERRITORY_NAME, start, false); 2085 if (!isFirst) 2086 pw.print(lead); 2087 int count = getTotalContainedItems(start, depth); 2088 pw.print("<td class='z" + depth + "' rowSpan='" + count + "'>" + name + "</td>"); // colSpan='" + (5 - 2089 // depth) + "' 2090 if (depth == 4) 2091 pw.println("</tr>"); 2092 Collection<String> contains = getContainedCollection(start, depth); 2093 if (contains != null) { 2094 Collection<String> contains2 = new TreeSet<String>(territoryNameComparator); 2095 contains2.addAll(contains); 2096 boolean first = true; 2097 for (Iterator<String> it = contains2.iterator(); it.hasNext();) { 2098 String item = it.next(); 2099 printContains2(pw, lead, item, depth + 1, first); // + "<td> </td>" 2100 first = false; 2101 } 2102 } 2103 } 2104 getTotalContainedItems(String start, int depth)2105 private int getTotalContainedItems(String start, int depth) { 2106 Collection<String> c = getContainedCollection(start, depth); 2107 if (c == null) 2108 return 1; 2109 int sum = 0; 2110 for (Iterator<String> it = c.iterator(); it.hasNext();) { 2111 sum += getTotalContainedItems(it.next(), depth + 1); 2112 } 2113 return sum; 2114 } 2115 2116 /** 2117 * 2118 */ getContainedCollection(String start, int depth)2119 private Collection<String> getContainedCollection(String start, int depth) { 2120 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2121 if (contains == null) { 2122 contains = sc.getCountryToZoneSet().get(start); 2123 if (contains == null && depth == 3) { 2124 contains = new TreeSet<>(); 2125 if (start.compareTo("A") >= 0) { 2126 contains.add("<font color='red'>MISSING TZID</font>"); 2127 } else { 2128 contains.add("<font color='red'>Not yet ISO code</font>"); 2129 } 2130 } 2131 } 2132 return contains; 2133 } 2134 2135 /** 2136 * @param table 2137 * TODO 2138 * 2139 */ printMissing(PrintWriter pw, int source, int table)2140 public void printMissing(PrintWriter pw, int source, int table) { 2141 Set<String> missingItems = new HashSet<>(); 2142 String type = null; 2143 if (source == CLDRFile.TERRITORY_NAME) { 2144 type = "territory"; 2145 missingItems.addAll(sc.getAvailableCodes(type)); 2146 missingItems.removeAll(territory_languages.keySet()); 2147 missingItems.removeAll(supplementalDataInfo.getContainmentCore().keySet()); 2148 missingItems.remove("200"); // czechoslovakia 2149 } else if (source == CLDRFile.SCRIPT_NAME) { 2150 type = "script"; 2151 missingItems.addAll(sc.getAvailableCodes(type)); 2152 missingItems.removeAll(script_languages.keySet()); 2153 } else if (source == CLDRFile.LANGUAGE_NAME) { 2154 type = "language"; 2155 missingItems.addAll(sc.getAvailableCodes(type)); 2156 if (table == CLDRFile.SCRIPT_NAME) 2157 missingItems.removeAll(language_scripts.keySet()); 2158 if (table == CLDRFile.TERRITORY_NAME) 2159 missingItems.removeAll(language_territories.keySet()); 2160 } else { 2161 throw new IllegalArgumentException("Illegal code"); 2162 } 2163 Set<String> missingItemsNamed = new TreeSet<String>(col); 2164 for (Iterator<String> it = missingItems.iterator(); it.hasNext();) { 2165 String item = it.next(); 2166 List<String> data = sc.getFullData(type, item); 2167 if (data.get(0).equals("PRIVATE USE")) 2168 continue; 2169 if (data.size() < 3) 2170 continue; 2171 if (!"".equals(data.get(2))) 2172 continue; 2173 2174 String itemName = getName(source, item, true); 2175 missingItemsNamed.add(itemName); 2176 } 2177 pw.println("<div align='center'><table>"); 2178 for (Iterator<String> it = missingItemsNamed.iterator(); it.hasNext();) { 2179 pw.println("<tr><td class='target'>" + it.next() + "</td></tr>"); 2180 } 2181 pw.println("</table></div>"); 2182 } 2183 2184 // source, eg english.TERRITORY_NAME 2185 // target, eg english.LANGUAGE_NAME print(PrintWriter pw, int source, int target)2186 public void print(PrintWriter pw, int source, int target) { 2187 Multimap<String, String> data = source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages 2188 : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories 2189 : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages 2190 : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts 2191 : null; 2192 // transform into names, and sort 2193 Map<String, Set<String>> territory_languageNames = new TreeMap<String, Set<String>>(col); 2194 for (Iterator<String> it = data.keySet().iterator(); it.hasNext();) { 2195 String territory = it.next(); 2196 String territoryName = getName(source, territory, true); 2197 Set<String> s = territory_languageNames.get(territoryName); 2198 if (s == null) 2199 territory_languageNames.put(territoryName, s = new TreeSet<String>(col)); 2200 for (Iterator<String> it2 = data.get(territory).iterator(); it2.hasNext();) { 2201 String language = it2.next(); 2202 String languageName = getName(target, language, true); 2203 s.add(languageName); 2204 } 2205 } 2206 2207 pw.println("<div align='center'><table>"); 2208 2209 for (Iterator<String> it = territory_languageNames.keySet().iterator(); it.hasNext();) { 2210 String territoryName = it.next(); 2211 pw.println("<tr><td class='source' colspan='2'>" + territoryName + "</td></tr>"); 2212 Set<String> s = territory_languageNames.get(territoryName); 2213 for (Iterator<String> it2 = s.iterator(); it2.hasNext();) { 2214 String languageName = it2.next(); 2215 pw.println("<tr><td> </td><td class='target'>" + languageName + "</td></tr>"); 2216 } 2217 } 2218 pw.println("</table></div>"); 2219 2220 } 2221 2222 /** 2223 * @param codeFirst 2224 * TODO 2225 * 2226 */ getName(int type, String oldcode, boolean codeFirst)2227 private String getName(int type, String oldcode, boolean codeFirst) { 2228 if (oldcode.contains(" ")) { 2229 String[] result = oldcode.split("\\s+"); 2230 for (int i = 0; i < result.length; ++i) { 2231 result[i] = getName(type, result[i], codeFirst); 2232 } 2233 return CldrUtility.join(Arrays.asList(result), ", "); 2234 } else { 2235 int pos = oldcode.indexOf('*'); 2236 String code = pos < 0 ? oldcode : oldcode.substring(0, pos); 2237 String ename = english.getName(type, code); 2238 String nameString = ename == null ? code : ename; 2239 return nameString.equals(oldcode) ? nameString 2240 : codeFirst ? "[" + oldcode + "]" + "\t" + nameString 2241 : nameString + "\t" + "[" + oldcode + "]"; 2242 } 2243 } 2244 2245 private String getName(String locale, boolean codeFirst) { 2246 String ename = getLanguageName(locale); 2247 return codeFirst ? "[" + locale + "]\t" + (ename == null ? locale : ename) : (ename == null ? locale 2248 : ename) + "\t[" + locale + "]"; 2249 } 2250 2251 Comparator territoryNameComparator = new Comparator() { 2252 @Override 2253 public int compare(Object o1, Object o2) { 2254 return col.compare(getName(CLDRFile.TERRITORY_NAME, (String) o1, false), 2255 getName(CLDRFile.TERRITORY_NAME, (String) o2, false)); 2256 } 2257 }; 2258 2259 static String[] stringArrayPattern = new String[0]; 2260 static String[][] string2ArrayPattern = new String[0][]; 2261 2262 public static Map<String, String> territoryAliases = new HashMap<>(); 2263 2264 public void printContains(PrintWriter index) throws IOException { 2265 String title = "Territory Containment (UN M.49)"; 2266 2267 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 2268 // doTitle(pw, title); 2269 List<String[]> rows = new ArrayList<>(); 2270 printContains3("001", rows, new ArrayList<String>()); 2271 TablePrinter tablePrinter = new TablePrinter() 2272 .addColumn("World", "class='source'", null, "class='z0'", true).setSortPriority(0) 2273 .addColumn("Continent", "class='source'", null, "class='z1'", true).setSortPriority(1) 2274 .addColumn("Subcontinent", "class='source'", null, "class='z2'", true).setSortPriority(2) 2275 .addColumn("Country (Territory)", "class='source'", null, "class='z3'", true).setSortPriority(3) 2276 .addColumn("Time Zone", "class='source'", null, "class='z4'", true).setSortPriority(4); 2277 String[][] flatData = rows.toArray(string2ArrayPattern); 2278 pw.println(tablePrinter.addRows(flatData).toTable()); 2279 2280 showSubtable(pw, ContainmentStyle.grouping, "Groupings", "Grouping", "Contained Regions"); 2281 showSubtable(pw, ContainmentStyle.deprecated, "Deprecated", "Container", "Deprecated Region"); 2282 2283 // Relation<String, String> deprecated = supplementalDataInfo 2284 // .getTerritoryToContained(ContainmentStyle.deprecated); 2285 // 2286 // for (String region : deprecated.keySet()) { 2287 // nameToContainers.add(region); 2288 // } 2289 // pw.println("<h2>Groupings and Deprecated Regions</h2>"); 2290 // for (String region : nameToContainers) { 2291 // String name = getName(CLDRFile.TERRITORY_NAME, region, false); 2292 // Set<String> dep = deprecated.get(region); 2293 // Set<String> gro = grouping.get(region); 2294 // Iterator<String> depIt = (dep == null ? Collections.EMPTY_SET : dep).iterator(); 2295 // Iterator<String> groIt = (gro == null ? Collections.EMPTY_SET : gro).iterator(); 2296 // while (depIt.hasNext() || groIt.hasNext()) { 2297 // String dep1 = depIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, depIt.next(), false) : ""; 2298 // String gro1 = groIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, groIt.next(), false) : ""; 2299 // tablePrinter2.addRow() 2300 // .addCell(name) 2301 // .addCell(gro1) 2302 // .addCell(dep1) 2303 // .finishRow(); 2304 // } 2305 // } 2306 // pw.println(tablePrinter2.toTable()); 2307 // pw.println("<h2>Other Groupings</h2>"); 2308 // for (Entry<String, Set<String>> regionContained : grouping.keyValuesSet()) { 2309 // showContainers(pw, regionContained); 2310 // } 2311 // 2312 // pw.println("<h2>Deprecated Codes</h2>"); 2313 // for (Entry<String, Set<String>> regionContained : deprecated.keyValuesSet()) { 2314 // showContainers(pw, regionContained); 2315 // } 2316 pw.close(); 2317 } 2318 2319 public void showSubtable(PrintWriter pw, ContainmentStyle containmentStyle, String title, String containerTitle, String containeeTitle) { 2320 pw.println("<h2>" + 2321 title + 2322 "</h2>"); 2323 TablePrinter tablePrinter2 = new TablePrinter() 2324 .addColumn(containerTitle, "class='source'", null, "class='z0'", true).setSortPriority(0) 2325 .addColumn(containeeTitle, "class='source'", null, "class='z4'", true).setSortPriority(1); 2326 2327 Relation<String, String> grouping = supplementalDataInfo 2328 .getTerritoryToContained(containmentStyle); 2329 2330 for (Entry<String, String> containerRegion : grouping.keyValueSet()) { 2331 String container = getName(CLDRFile.TERRITORY_NAME, containerRegion.getKey(), false); 2332 String containee = getName(CLDRFile.TERRITORY_NAME, containerRegion.getValue(), false); 2333 tablePrinter2.addRow() 2334 .addCell(container) 2335 .addCell(containee) 2336 .finishRow(); 2337 } 2338 pw.println(tablePrinter2.toTable()); 2339 } 2340 2341 public void showContainers(PrintWriter pw, Entry<String, Set<String>> regionContained) { 2342 String region = regionContained.getKey(); 2343 Set<String> contained = regionContained.getValue(); 2344 pw.println("<ul><li>" + getName(CLDRFile.TERRITORY_NAME, region, false) + "<ul>"); 2345 for (String sub : contained) { 2346 pw.println("<li>" + getName(CLDRFile.TERRITORY_NAME, sub, false) + "</li>"); 2347 } 2348 pw.println("</ul></li></ul>"); 2349 } 2350 2351 private void printContains3(String start, List<String[]> rows, ArrayList<String> currentRow) { 2352 int len = currentRow.size(); 2353 if (len > 3) { 2354 return; // skip long items 2355 } 2356 currentRow.add(getName(CLDRFile.TERRITORY_NAME, start, false)); 2357 //Collection<String> contains = (Collection<String>) group_contains.get(start); 2358 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2359 if (contains == null) { 2360 contains = sc.getCountryToZoneSet().get(start); 2361 currentRow.add(""); 2362 if (contains == null) { 2363 currentRow.set(len + 1, "???"); 2364 rows.add(currentRow.toArray(stringArrayPattern)); 2365 } else { 2366 for (String item : contains) { 2367 currentRow.set(len + 1, item); 2368 rows.add(currentRow.toArray(stringArrayPattern)); 2369 } 2370 } 2371 currentRow.remove(len + 1); 2372 } else { 2373 for (String item : contains) { 2374 if (territoryAliases.keySet().contains(item)) { 2375 continue; 2376 } 2377 printContains3(item, rows, currentRow); 2378 } 2379 } 2380 currentRow.remove(len); 2381 } 2382 2383 } 2384 2385 /** 2386 * 2387 */ getInverse(Map<String, Set<String>> language_territories)2388 private static Map<String, Set<String>> getInverse(Map<String, Set<String>> language_territories) { 2389 // get inverse relation 2390 Map<String, Set<String>> territory_languages = new TreeMap<>(); 2391 for (Iterator<String> it = language_territories.keySet().iterator(); it.hasNext();) { 2392 String language = it.next(); 2393 Set<String> territories = language_territories.get(language); 2394 for (Iterator<String> it2 = territories.iterator(); it2.hasNext();) { 2395 String territory = it2.next(); 2396 Set<String> languages = territory_languages.get(territory); 2397 if (languages == null) 2398 territory_languages.put(territory, languages = new TreeSet<String>(col)); 2399 languages.add(language); 2400 } 2401 } 2402 return territory_languages; 2403 2404 } 2405 2406 static final Map<String, String> NAME_TO_REGION = getNameToCode(CodeType.territory, "region"); 2407 static final Map<String, String> NAME_TO_CURRENCY = getNameToCode(CodeType.currency, "currency"); 2408 getNameToCode(CodeType codeType, String cldrCodeType)2409 private static SortedMap<String, String> getNameToCode(CodeType codeType, String cldrCodeType) { 2410 SortedMap<String, String> temp = new TreeMap<String, String>(col); 2411 for (String territory : StandardCodes.make().getAvailableCodes(codeType)) { 2412 String name = english.getName(cldrCodeType, territory); 2413 temp.put(name == null ? territory : name, territory); 2414 } 2415 temp = Collections.unmodifiableSortedMap(temp); 2416 return temp; 2417 } 2418 2419 /** 2420 * @param value_delimiter 2421 * TODO 2422 * 2423 */ addTokens(String key, String values, String value_delimiter, Map<String, Set<String>> key_value)2424 private static void addTokens(String key, String values, String value_delimiter, Map<String, Set<String>> key_value) { 2425 if (values != null) { 2426 Set<String> s = key_value.get(key); 2427 if (s == null) 2428 key_value.put(key, s = new TreeSet<String>(col)); 2429 s.addAll(Arrays.asList(values.split(value_delimiter))); 2430 } 2431 } 2432 addTokens(String key, String values, String value_delimiter, Multimap<String, String> key_value)2433 private static void addTokens(String key, String values, String value_delimiter, Multimap<String, String> key_value) { 2434 if (values != null) { 2435 key_value.putAll(key, Arrays.asList(values.split(value_delimiter))); 2436 } 2437 } 2438 showContents(Appendable pw, String... items)2439 public static void showContents(Appendable pw, String... items) { 2440 try { 2441 pw.append("</div>" + System.lineSeparator()); 2442 pw.append("<h3>Contents</h3>" + System.lineSeparator()); 2443 pw.append("<ol>" + System.lineSeparator()); 2444 for (int i = 0; i < items.length; i += 2) { 2445 pw.append("<li><a href='#" + items[i] + "'>" + items[i + 1] + "</a></li>" + System.lineSeparator()); 2446 } 2447 pw.append("</ol><hr>" + System.lineSeparator()); 2448 2449 pw.append("<div align='center'>" + System.lineSeparator()); 2450 } catch (IOException e) { 2451 throw new ICUUncheckedIOException(e); 2452 } 2453 } 2454 2455 } 2456