1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004-2011, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import java.io.IOException; 10 import java.io.PrintWriter; 11 import java.io.StringWriter; 12 import java.io.UnsupportedEncodingException; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collection; 16 import java.util.Collections; 17 import java.util.Comparator; 18 import java.util.Date; 19 import java.util.EnumSet; 20 import java.util.HashMap; 21 import java.util.HashSet; 22 import java.util.Iterator; 23 import java.util.LinkedHashSet; 24 import java.util.List; 25 import java.util.Locale; 26 import java.util.Map; 27 import java.util.Map.Entry; 28 import java.util.Set; 29 import java.util.SortedMap; 30 import java.util.TreeMap; 31 import java.util.TreeSet; 32 33 import org.unicode.cldr.draft.FileUtilities; 34 import org.unicode.cldr.draft.ScriptMetadata; 35 import org.unicode.cldr.draft.ScriptMetadata.Info; 36 import org.unicode.cldr.util.ArrayComparator; 37 import org.unicode.cldr.util.CLDRConfig; 38 import org.unicode.cldr.util.CLDRFile; 39 import org.unicode.cldr.util.CLDRFile.WinningChoice; 40 import org.unicode.cldr.util.CLDRPaths; 41 import org.unicode.cldr.util.CLDRTool; 42 import org.unicode.cldr.util.CLDRURLS; 43 import org.unicode.cldr.util.CldrUtility; 44 import org.unicode.cldr.util.Factory; 45 import org.unicode.cldr.util.FileCopier; 46 import org.unicode.cldr.util.Iso639Data; 47 import org.unicode.cldr.util.Iso639Data.Scope; 48 import org.unicode.cldr.util.Iso639Data.Type; 49 import org.unicode.cldr.util.LanguageTagParser; 50 import org.unicode.cldr.util.Level; 51 import org.unicode.cldr.util.LocaleIDParser; 52 import org.unicode.cldr.util.Log; 53 import org.unicode.cldr.util.Organization; 54 import org.unicode.cldr.util.StandardCodes; 55 import org.unicode.cldr.util.StandardCodes.CodeType; 56 import org.unicode.cldr.util.SupplementalDataInfo; 57 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 58 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 60 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 61 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 62 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 63 import org.unicode.cldr.util.TransliteratorUtilities; 64 import org.unicode.cldr.util.XPathParts; 65 66 import com.google.common.collect.Multimap; 67 import com.google.common.collect.Multimaps; 68 import com.google.common.collect.TreeMultimap; 69 import com.ibm.icu.dev.util.CollectionUtilities; 70 import com.ibm.icu.impl.Relation; 71 import com.ibm.icu.impl.Row.R2; 72 import com.ibm.icu.impl.Row.R4; 73 import com.ibm.icu.lang.UCharacter; 74 import com.ibm.icu.text.Collator; 75 import com.ibm.icu.text.Normalizer; 76 import com.ibm.icu.text.Normalizer2; 77 import com.ibm.icu.text.NumberFormat; 78 import com.ibm.icu.text.UTF16; 79 import com.ibm.icu.text.UnicodeSet; 80 import com.ibm.icu.util.ICUUncheckedIOException; 81 import com.ibm.icu.util.ULocale; 82 83 @CLDRTool(alias = "showlanguages", description = "Generate Lanugage info charts") 84 public class ShowLanguages { 85 private static final boolean SHOW_NATIVE = true; 86 87 static Comparator col = new org.unicode.cldr.util.MultiComparator( 88 Collator.getInstance(new ULocale("en")), 89 new UTF16.StringComparator(true, false, 0)); 90 91 static StandardCodes sc = StandardCodes.make(); 92 93 static Factory cldrFactory = CLDRConfig.getInstance().getCldrFactory();//.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 94 static CLDRFile english = CLDRConfig.getInstance().getEnglish(); 95 main(String[] args)96 public static void main(String[] args) throws IOException { 97 System.out.println("Writing into " + FormattedFileWriter.CHART_TARGET_DIR); 98 FileCopier.ensureDirectoryExists(FormattedFileWriter.CHART_TARGET_DIR); 99 FileCopier.copy(ShowLanguages.class, "index.css", FormattedFileWriter.CHART_TARGET_DIR); 100 printLanguageData(cldrFactory, "index.html"); 101 // cldrFactory = Factory.make(Utility.COMMON_DIRECTORY + "../dropbox/extra2/", ".*"); 102 // printLanguageData(cldrFactory, "language_info2.txt"); 103 System.out.println("Done - wrote into " + FormattedFileWriter.CHART_TARGET_DIR); 104 } 105 106 /** 107 * 108 */ 109 public static FormattedFileWriter.Anchors SUPPLEMENTAL_INDEX_ANCHORS = new FormattedFileWriter.Anchors(); 110 111 static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo 112 .getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 113 printLanguageData(Factory cldrFactory, String filename)114 private static void printLanguageData(Factory cldrFactory, String filename) throws IOException { 115 116 new ChartDtdDelta().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 117 ShowLocaleCoverage.showCoverage(SUPPLEMENTAL_INDEX_ANCHORS); 118 119 new ChartDayPeriods().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 120 new ChartLanguageMatching().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 121 new ChartLanguageGroups().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 122 new ChartSubdivisions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 123 124 // since we don't want these listed on the supplemental page, use null 125 126 StringWriter sw = new StringWriter(); 127 PrintWriter pw = new PrintWriter(sw); 128 129 new ShowPlurals().printPlurals(english, null, pw, cldrFactory); 130 131 LanguageInfo linfo = new LanguageInfo(cldrFactory); 132 133 linfo.showCoverageGoals(pw); 134 135 linfo.printLikelySubtags(pw); 136 137 linfo.showCountryLanguageInfo(pw); 138 139 linfo.showLanguageCountryInfo(pw); 140 141 // linfo.showTerritoryInfo(); 142 // linfo.printCountryData(pw); 143 144 // linfo.printDeprecatedItems(pw); 145 146 // PrintWriter pw1 = new PrintWriter(new FormattedFileWriter(pw, "Languages and Territories", null)); 147 // pw1.println("<tr><th>Language \u2192 Territories"); 148 // pw1.println("</th><th>Territory \u2192 Language"); 149 // pw1.println("</th><th>Territories Not Represented"); 150 // pw1.println("</th><th>Languages Not Represented"); 151 // pw1.println("</th></tr>"); 152 // 153 // pw1.println("<tr><td>"); 154 // linfo.print(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 155 // pw1.println("</td><td>"); 156 // linfo.print(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.LANGUAGE_NAME); 157 // pw1.println("</td><td>"); 158 // linfo.printMissing(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.TERRITORY_NAME); 159 // pw1.println("</td><td>"); 160 // linfo.printMissing(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 161 // pw1.println("</td></tr>"); 162 // 163 // pw1.close(); 164 165 printLanguageScript(linfo, pw); 166 printScriptLanguageTerritory(linfo, pw); 167 168 linfo.showCorrespondances(); 169 170 // linfo.showCalendarData(pw); 171 172 linfo.showCountryInfo(pw); 173 linfo.printCurrency(pw); 174 linfo.printContains(pw); 175 176 linfo.printWindows_Tzid(pw); 177 linfo.printAliases(pw); 178 179 linfo.printCharacters(pw); 180 181 pw.close(); 182 183 String[] replacements = { "%date%", CldrUtility.isoFormatDateOnly(new Date()), "%contents%", SUPPLEMENTAL_INDEX_ANCHORS.toString(), "%data%", 184 sw.toString() }; 185 PrintWriter pw2 = org.unicode.cldr.draft.FileUtilities.openUTF8Writer(FormattedFileWriter.CHART_TARGET_DIR, filename); 186 FileUtilities.appendFile(CLDRPaths.BASE_DIRECTORY + java.io.File.separatorChar 187 + "tools/java/org/unicode/cldr/tool/supplemental.html", "utf-8", pw2, replacements); 188 pw2.close(); 189 } 190 printLanguageScript(LanguageInfo linfo, PrintWriter pw)191 private static void printLanguageScript(LanguageInfo linfo, PrintWriter pw) throws IOException { 192 PrintWriter pw1; 193 TablePrinter tablePrinter = new TablePrinter() 194 .addColumn("Language", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0) 195 .setBreakSpans(true) 196 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 197 .setSpanRows(true) 198 .addColumn("ML", "class='target' title='modern language'", null, "class='target'", true).setSpanRows(true) 199 .setSortPriority(1) 200 .addColumn("P", "class='target' title='primary'", null, "class='target'", true).setSortPriority(3) 201 .addColumn("Script", "class='target'", null, "class='target'", true).setSortPriority(3) 202 .addColumn("Code", "class='target'", null, "class='target'", true) 203 .addColumn("MS", "class='target' title='modern script'", null, "class='target'", true).setSortPriority(2); 204 205 TablePrinter tablePrinter2 = new TablePrinter() 206 .addColumn("Script", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0) 207 .setBreakSpans(true) 208 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 209 .setSpanRows(true) 210 .addColumn("MS", "class='target' title='modern script'", null, "class='target'", true).setSpanRows(true) 211 .setSortPriority(1) 212 .addColumn("Language", "class='target'", null, "class='target'", true).setSortPriority(3) 213 .addColumn("Code", "class='target'", null, "class='target'", true) 214 .addColumn("ML", "class='target' title='modern language'", null, "class='target'", true).setSortPriority(2) 215 .addColumn("P", "class='target' title='primary'", null, "class='target'", true).setSortPriority(3); 216 217 // get the codes so we can show the remainder 218 Set<String> remainingScripts = new TreeSet<String>(getScriptsToShow()); // StandardCodes.MODERN_SCRIPTS); 219 UnicodeSet temp = new UnicodeSet(); 220 for (String script : getScriptsToShow()) { 221 temp.clear(); 222 try { 223 temp.applyPropertyAlias("script", script); 224 } catch (RuntimeException e) { 225 } // fall through 226 if (temp.size() == 0) { 227 remainingScripts.remove(script); 228 System.out.println("Removing: " + script); 229 } else { 230 System.out.println("Keeping: " + script); 231 } 232 } 233 remainingScripts.remove("Brai"); 234 remainingScripts.remove("Hira"); 235 remainingScripts.remove("Qaai"); 236 remainingScripts.remove("Hrkt"); 237 remainingScripts.remove("Zzzz"); 238 remainingScripts.remove("Zyyy"); 239 240 Set<String> remainingLanguages = new TreeSet<String>(getLanguagesToShow()); 241 for (String language : getLanguagesToShow()) { 242 Scope s = Iso639Data.getScope(language); 243 Type t = Iso639Data.getType(language); 244 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 245 remainingLanguages.remove(language); 246 } 247 } 248 249 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 250 for (String language : languages) { 251 Set<BasicLanguageData> basicLanguageData = supplementalDataInfo.getBasicLanguageData(language); 252 for (BasicLanguageData basicData : basicLanguageData) { 253 String secondary = isOfficial(language) // basicData.getType() == BasicLanguageData.Type.primary 254 ? "\u00A0" 255 : "N"; 256 for (String script : basicData.getScripts()) { 257 addLanguageScriptCells(tablePrinter, tablePrinter2, language, script, secondary); 258 remainingScripts.remove(script); 259 remainingLanguages.remove(language); 260 } 261 } 262 } 263 for (String language : remainingLanguages) { 264 addLanguageScriptCells(tablePrinter, tablePrinter2, language, "Zzzz", "?"); 265 } 266 for (String script : remainingScripts) { 267 addLanguageScriptCells(tablePrinter, tablePrinter2, "und", script, "?"); 268 } 269 270 pw1 = new PrintWriter(new FormattedFileWriter(null, "Languages and Scripts", null, SUPPLEMENTAL_INDEX_ANCHORS)); 271 pw1.println(tablePrinter.toTable()); 272 pw1.close(); 273 274 pw1 = new PrintWriter(new FormattedFileWriter(null, "Scripts and Languages", null, SUPPLEMENTAL_INDEX_ANCHORS)); 275 pw1.println(tablePrinter2.toTable()); 276 pw1.close(); 277 278 } 279 280 static final Map<String, OfficialStatus> languageToBestStatus = new HashMap<String, OfficialStatus>(); 281 static { 282 for (String language : supplementalDataInfo.getLanguagesForTerritoriesPopulationData()) { 283 Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language); 284 if (territories == null) { 285 continue; 286 } 287 int underbar = language.indexOf('_'); 288 String base = underbar < 0 ? null : language.substring(0, underbar); 289 290 for (String territory : territories) { 291 PopulationData data = supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory); 292 OfficialStatus status = data.getOfficialStatus(); 293 OfficialStatus old; 294 old = languageToBestStatus.get(language); 295 if (old == null || status.compareTo(old) > 0) { 296 languageToBestStatus.put(language, status); 297 } 298 if (base != null) { 299 old = languageToBestStatus.get(base); 300 if (old == null || status.compareTo(old) > 0) { 301 languageToBestStatus.put(base, status); 302 } 303 } 304 } 305 } 306 } 307 308 private static boolean isOfficial(String language) { 309 OfficialStatus status = languageToBestStatus.get(language); 310 if (status != null && status.isMajor()) { 311 return true; 312 } 313 int underbar = language.indexOf('_'); 314 if (underbar < 0) { 315 return false; 316 } 317 return isOfficial(language.substring(0, underbar)); 318 } 319 320 private static Set<String> getLanguagesToShow() { 321 return getEnglishTypes("language", CLDRFile.LANGUAGE_NAME); 322 } 323 324 private static Set<String> getEnglishTypes(String type, int code) { 325 Set<String> result = new HashSet<String>(sc.getSurveyToolDisplayCodes(type)); 326 XPathParts parts = new XPathParts(); 327 for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext();) { 328 parts.set(it.next()); 329 String newType = parts.getAttributeValue(-1, "type"); 330 if (!result.contains(newType)) { 331 result.add(newType); 332 } 333 } 334 return result; 335 } 336 337 private static Set<String> getScriptsToShow() { 338 return getEnglishTypes("script", CLDRFile.SCRIPT_NAME); 339 } 340 341 private static void printScriptLanguageTerritory(LanguageInfo linfo, PrintWriter pw) throws IOException { 342 PrintWriter pw1; 343 TablePrinter tablePrinter2 = new TablePrinter() 344 .addColumn("Sample Char", "class='source'", null, "class='source sample'", true).setSpanRows(true) 345 .addColumn("Script", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0) 346 .setBreakSpans(true) 347 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 348 .setSpanRows(true) 349 .addColumn("T", "class='target'", null, "class='target'", true).setSortPriority(1) 350 .addColumn("Language", "class='target'", null, "class='target'", true).setSortPriority(2) 351 .addColumn("Native", "class='target'", null, "class='target'", true) 352 .addColumn("Code", "class='target'", null, "class='target'", true) 353 .addColumn("T", "class='target'", null, "class='target'", true).setSortPriority(3) 354 .addColumn("Territory", "class='target'", null, "class='target'", true).setSortPriority(4) 355 .addColumn("Native", "class='target'", null, "class='target'", true) 356 .addColumn("Code", "class='target'", null, "class='target'", true); 357 358 // get the codes so we can show the remainder 359 Set<String> remainingScripts = new TreeSet<String>(getScriptsToShow()); 360 Set<String> remainingTerritories = new TreeSet<String>(sc.getGoodAvailableCodes("territory")); 361 UnicodeSet temp = new UnicodeSet(); 362 for (String script : getScriptsToShow()) { 363 temp.clear(); 364 try { 365 temp.applyPropertyAlias("script", script); 366 } catch (RuntimeException e) { 367 } // fall through 368 if (temp.size() == 0) { 369 remainingScripts.remove(script); 370 System.out.println("Removing: " + script); 371 } else { 372 System.out.println("Keeping: " + script); 373 } 374 } 375 remainingScripts.remove("Brai"); 376 remainingScripts.remove("Hira"); 377 remainingScripts.remove("Qaai"); 378 remainingScripts.remove("Hrkt"); 379 remainingScripts.remove("Zzzz"); 380 remainingScripts.remove("Zyyy"); 381 382 Set<String> remainingLanguages = new TreeSet<String>(getLanguagesToShow()); 383 for (String language : getLanguagesToShow()) { 384 Scope s = Iso639Data.getScope(language); 385 Type t = Iso639Data.getType(language); 386 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 387 remainingLanguages.remove(language); 388 } 389 } 390 391 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 392 for (String language : languages) { 393 Set<BasicLanguageData> basicLanguageData = supplementalDataInfo.getBasicLanguageData(language); 394 for (BasicLanguageData basicData : basicLanguageData) { 395 if (basicData.getType() != BasicLanguageData.Type.primary) { 396 continue; 397 } 398 Set<String> mainTerritories = getTerritories(language); 399 if (mainTerritories.size() == 0) { 400 continue; 401 // mainTerritories.add("ZZ"); 402 } 403 404 TreeSet<String> mainScripts = new TreeSet<String>(basicData.getScripts()); 405 if (mainScripts.size() == 0) { 406 continue; 407 } 408 for (String script : mainScripts) { 409 for (String territory : mainTerritories) { 410 addLanguageScriptCells2(tablePrinter2, language, script, territory); 411 remainingTerritories.remove(territory); 412 } 413 remainingScripts.remove(script); 414 } 415 } 416 remainingLanguages.remove(language); 417 } 418 // for (String language : remainingLanguages) { 419 // addLanguageScriptCells2( tablePrinter2, language, "Zzzz", "ZZ"); 420 // } 421 // for (String script : remainingScripts) { 422 // addLanguageScriptCells2( tablePrinter2, "und", script, "ZZ"); 423 // } 424 // for (String territory : remainingTerritories) { 425 // addLanguageScriptCells2( tablePrinter2, "und", "Zzzz", territory); 426 // } 427 428 pw1 = new PrintWriter(new FormattedFileWriter(null, "Scripts, Languages, and Territories", null, SUPPLEMENTAL_INDEX_ANCHORS)); 429 pw1.println(tablePrinter2.toTable()); 430 pw1.close(); 431 } 432 433 private static Relation<String, String> territoryFix; 434 getTerritories(String language)435 private static Set<String> getTerritories(String language) { 436 if (territoryFix == null) { // set up the data 437 initTerritoryFix(); 438 } 439 Set<String> territories = territoryFix.getAll(language); 440 if (territories == null) { 441 territories = new TreeSet<String>(); 442 } 443 return territories; 444 } 445 initTerritoryFix()446 private static void initTerritoryFix() { 447 territoryFix = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 448 Set<String> languages = supplementalDataInfo.getLanguages(); 449 LanguageTagParser ltp = new LanguageTagParser(); 450 for (String language2 : languages) { 451 if (language2.contains("_")) { 452 ltp.set(language2).getLanguage(); 453 addOfficialTerritory(ltp, language2, ltp.getLanguage()); 454 } else { 455 addOfficialTerritory(ltp, language2, language2); 456 } 457 } 458 } 459 addOfficialTerritory(LanguageTagParser ltp, String language, String baseLanguage)460 private static void addOfficialTerritory(LanguageTagParser ltp, String language, String baseLanguage) { 461 // territoryFix.putAll(baseLanguage, supplementalDataInfo.getTerritoriesForPopulationData(language)); 462 Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language); 463 if (territories == null) { 464 return; 465 } 466 for (String territory : territories) { 467 PopulationData data = supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory); 468 OfficialStatus status = data.getOfficialStatus(); 469 if (status.isMajor()) { 470 territoryFix.put(baseLanguage, territory); 471 System.out.println("\tAdding\t" + baseLanguage + "\t" + territory + "\t" + language); 472 } 473 } 474 } 475 addLanguageScriptCells2(TablePrinter tablePrinter2, String language, String script, String territory)476 private static void addLanguageScriptCells2(TablePrinter tablePrinter2, String language, String script, 477 String territory) { 478 CLDRFile nativeLanguage = null; 479 if (SHOW_NATIVE) { 480 try { 481 nativeLanguage = cldrFactory.make(language + "_" + script + "_" + territory, true); 482 } catch (RuntimeException e) { 483 try { 484 nativeLanguage = cldrFactory.make(language + "_" + script, true); 485 } catch (RuntimeException e2) { 486 try { 487 nativeLanguage = cldrFactory.make(language, true); 488 } catch (RuntimeException e3) { 489 } 490 } 491 } 492 // check for overlap 493 if (nativeLanguage != null && !script.equals("Jpan") && !script.equals("Hans") && !script.equals("Hant")) { 494 UnicodeSet scriptSet; 495 try { 496 String tempScript = script.equals("Kore") ? "Hang" : script; 497 scriptSet = new UnicodeSet("[:script=" + tempScript + ":]"); 498 } catch (RuntimeException e) { 499 scriptSet = new UnicodeSet(); 500 } 501 UnicodeSet exemplars = nativeLanguage.getExemplarSet("", WinningChoice.WINNING); 502 if (scriptSet.containsNone(exemplars)) { 503 System.out.println("Skipping CLDR file -- exemplars differ: " + language + "\t" 504 + nativeLanguage.getLocaleID() + "\t" + scriptSet + "\t" + exemplars); 505 nativeLanguage = null; 506 } 507 } 508 } 509 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 510 if (languageName == null) languageName = "???"; 511 String isLanguageTranslated = ""; 512 String nativeLanguageName = nativeLanguage == null ? null : nativeLanguage.getName(CLDRFile.LANGUAGE_NAME, 513 language); 514 if (nativeLanguageName == null || nativeLanguageName.equals(language)) { 515 nativeLanguageName = "<i>n/a</i>"; 516 isLanguageTranslated = "n"; 517 } 518 519 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 520 // String nativeScriptName = nativeLanguage == null ? null : 521 // nativeLanguage.getName(CLDRFile.SCRIPT_NAME,script); 522 // if (nativeScriptName != null && !nativeScriptName.equals(script)) { 523 // scriptName = nativeScriptName + "[" + scriptName + "]"; 524 // } 525 526 String isTerritoryTranslated = ""; 527 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territory); 528 String nativeTerritoryName = nativeLanguage == null ? null : nativeLanguage.getName(CLDRFile.TERRITORY_NAME, 529 territory); 530 if (nativeTerritoryName == null || nativeTerritoryName.equals(territory)) { 531 nativeTerritoryName = "<i>n/a</i>"; 532 isTerritoryTranslated = "n"; 533 } 534 535 // Type t = Iso639Data.getType(language); 536 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == Type.Living) { 537 // // ok 538 // } else if (!language.equals("und")){ 539 // scriptModern = "N"; 540 // } 541 //String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : ""; 542 543 Info scriptMetatdata = ScriptMetadata.getInfo(script); 544 tablePrinter2.addRow() 545 .addCell(scriptMetatdata.sampleChar) 546 .addCell(scriptName) 547 .addCell(script) 548 .addCell(isLanguageTranslated) 549 .addCell(languageName) 550 .addCell(nativeLanguageName) 551 .addCell(language) 552 .addCell(isTerritoryTranslated) 553 .addCell(territoryName) 554 .addCell(nativeTerritoryName) 555 .addCell(territory) 556 .finishRow(); 557 } 558 559 static Map<String, String> fixScriptGif = CollectionUtilities.asMap(new String[][] { 560 { "hangul", "hangulsyllables" }, 561 { "japanese", "hiragana" }, 562 { "unknown or invalid script", "unknown" }, 563 { "Hant", "Hant" }, 564 { "Hans", "Hans" }, 565 }); 566 getGifName(String script)567 private static String getGifName(String script) { 568 String temp = fixScriptGif.get(script); 569 if (temp != null) { 570 return temp; 571 } 572 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 573 scriptName = scriptName.toLowerCase(Locale.ENGLISH); 574 temp = fixScriptGif.get(scriptName); 575 if (temp != null) { 576 return temp; 577 } 578 return scriptName; 579 } 580 581 private static Set<Type> oldLanguage = Collections.unmodifiableSet(EnumSet.of(Type.Ancient, Type.Extinct, 582 Type.Historical, Type.Constructed)); 583 addLanguageScriptCells(TablePrinter tablePrinter, TablePrinter tablePrinter2, String language, String script, String secondary)584 private static void addLanguageScriptCells(TablePrinter tablePrinter, TablePrinter tablePrinter2, String language, 585 String script, String secondary) { 586 try { 587 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 588 if (languageName == null) { 589 languageName = "¿" + language + "?"; 590 System.err.println("No English Language Name for:" + language); 591 } 592 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 593 if (scriptName == null) { 594 scriptName = "¿" + script + "?"; 595 System.err.println("No English Language Name for:" + script); 596 } 597 String scriptModern = StandardCodes.isScriptModern(script) ? "" : script.equals("Zzzz") ? "n/a" : "N"; 598 //Scope s = Iso639Data.getScope(language); 599 Type t = Iso639Data.getType(language); 600 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == Type.Living) { 601 // // ok 602 // } else if (!language.equals("und")){ 603 // scriptModern = "N"; 604 // } 605 String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : ""; 606 607 tablePrinter.addRow() 608 .addCell(languageName) 609 .addCell(language) 610 .addCell(languageModern) 611 .addCell(secondary) 612 .addCell(scriptName) 613 .addCell(script) 614 .addCell(scriptModern) 615 .finishRow(); 616 617 tablePrinter2.addRow() 618 .addCell(scriptName) 619 .addCell(script) 620 .addCell(scriptModern) 621 .addCell(languageName) 622 .addCell(language) 623 .addCell(languageModern) 624 .addCell(secondary) 625 .finishRow(); 626 } catch (RuntimeException e) { 627 throw e; 628 } 629 } 630 631 static class LanguageInfo { 632 private static final Map<String, Map<String, String>> localeAliasInfo = new TreeMap<String, Map<String, String>>(); 633 634 Multimap<String, String> language_scripts = TreeMultimap.create(); 635 636 Multimap<String, String> language_territories = TreeMultimap.create(); 637 638 List<Map<String, String>> deprecatedItems = new ArrayList<Map<String, String>>(); 639 640 Multimap<String, String> territory_languages; 641 642 Multimap<String, String> script_languages; 643 644 //Map group_contains = new TreeMap(); 645 646 Set<String[]> aliases = new TreeSet<String[]>(new ArrayComparator(new Comparator[] { new UTF16.StringComparator(), col })); 647 648 Comparator col3 = new ArrayComparator(new Comparator[] { col, col, col }); 649 650 Map<String, String> currency_fractions = new TreeMap<String, String>(col); 651 652 Map<String, Set> currency_territory = new TreeMap<String, Set>(col); 653 654 Map<String, Set> territory_currency = new TreeMap<String, Set>(col); 655 656 Set<String> territoriesWithCurrencies = new TreeSet<String>(); 657 658 Set<String> currenciesWithTerritories = new TreeSet<String>(); 659 660 Map<String, Map<String, Set<String>>> territoryData = new TreeMap<String, Map<String, Set<String>>>(); 661 662 Set<String> territoryTypes = new TreeSet<String>(); 663 664 Map<String, LinkedHashSet<String>> charSubstitutions = new TreeMap<String, LinkedHashSet<String>>(col); 665 666 String defaultDigits = null; 667 668 Map<String, Map<String, Object>> territoryLanguageData = new TreeMap<String, Map<String, Object>>(); 669 670 private Relation<String, String> territoriesToModernCurrencies = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, 671 null); 672 LanguageInfo(Factory cldrFactory)673 public LanguageInfo(Factory cldrFactory) throws IOException { 674 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 675 XPathParts parts = new XPathParts(new UTF16.StringComparator(), null); 676 for (Iterator<String> it = supp.iterator(); it.hasNext();) { 677 String path = it.next(); 678 String fullPath = supp.getFullXPath(path); 679 if (fullPath == null) { 680 supp.getFullXPath(path); 681 } 682 parts.set(fullPath); 683 684 // <zoneItem type="America/Adak" territory="US" aliases="America/Atka US/Aleutian"/> 685 if (path.indexOf("/zoneItem") >= 0) { 686 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 687 String type = attributes.get("type"); 688 //String territory = attributes.get("territory"); 689 String aliasAttributes = attributes.get("aliases"); 690 if (aliasAttributes != null) { 691 String[] aliasesList = aliasAttributes.split("\\s+"); 692 693 for (int i = 0; i < aliasesList.length; ++i) { 694 String alias = aliasesList[i]; 695 aliases.add(new String[] { "timezone", alias, type }); 696 } 697 } 698 // TODO territory, multizone 699 continue; 700 } 701 702 if (path.indexOf("/currencyData") >= 0) { 703 if (path.indexOf("/fractions") >= 0) { 704 // <info iso4217="ADP" digits="0" rounding="0"/> 705 String element = parts.getElement(parts.size() - 1); 706 if (!element.equals("info")) 707 throw new IllegalArgumentException("Unexpected fractions element: " + element); 708 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 709 String iso4217 = attributes.get("iso4217"); 710 String digits = attributes.get("digits"); 711 String rounding = attributes.get("rounding"); 712 digits = digits + (rounding.equals("0") ? "" : " (" + rounding + ")"); 713 if (iso4217.equals("DEFAULT")) 714 defaultDigits = digits; 715 else 716 currency_fractions.put(getName(CLDRFile.CURRENCY_NAME, iso4217, false), digits); 717 continue; 718 } 719 // <region iso3166="AR"> 720 // <currency iso4217="ARS" from="1992-01-01"/> 721 if (path.indexOf("/region") >= 0) { 722 Map<String, String> attributes = parts.getAttributes(parts.size() - 2); 723 String iso3166 = attributes.get("iso3166"); 724 attributes = parts.getAttributes(parts.size() - 1); 725 String iso4217 = attributes.get("iso4217"); 726 String to = attributes.get("to"); 727 if (to == null) 728 to = "\u221E"; 729 String from = attributes.get("from"); 730 if (from == null) 731 from = "-\u221E"; 732 String countryName = getName(CLDRFile.TERRITORY_NAME, iso3166, false); 733 String currencyName = getName(CLDRFile.CURRENCY_NAME, iso4217, false); 734 Set info = territory_currency.get(countryName); 735 if (info == null) 736 territory_currency.put(countryName, info = new TreeSet(col3)); 737 info.add(new String[] { from, to, currencyName }); 738 info = currency_territory.get(currencyName); 739 if (info == null) 740 currency_territory.put(currencyName, info = new TreeSet(col)); 741 territoriesWithCurrencies.add(iso3166); 742 currenciesWithTerritories.add(iso4217); 743 if (to.equals("\u221E") || to.compareTo("2006") > 0) { 744 territoriesToModernCurrencies.put(iso3166, iso4217); 745 info.add("<b>" + countryName + "</b>"); 746 747 } else { 748 info.add("<i>" + countryName + "</i>"); 749 750 } 751 continue; 752 } 753 } 754 755 if (path.indexOf("/languageData") >= 0) { 756 Map<String, String> attributes = parts.findAttributes("language"); 757 String language = attributes.get("type"); 758 String alt = attributes.get("alt"); 759 addTokens(language, attributes.get("scripts"), " ", language_scripts); 760 // mark the territories 761 if (alt == null) 762 ; // nothing 763 else if ("secondary".equals(alt)) 764 language += "*"; 765 else 766 language += "*" + alt; 767 // <language type="af" scripts="Latn" territories="ZA"/> 768 addTokens(language, attributes.get("territories"), " ", language_territories); 769 continue; 770 } 771 772 if (path.indexOf("/deprecatedItems") >= 0) { 773 deprecatedItems.add(parts.findAttributes("deprecatedItems")); 774 continue; 775 } 776 if (path.indexOf("/calendarData") >= 0) { 777 Map<String, String> attributes = parts.findAttributes("calendar"); 778 if (attributes == null) { 779 System.err.println("Err: on path " + fullPath 780 + " , no attributes on 'calendar'. Probably, this tool is out of date."); 781 } else { 782 String type = attributes.get("type"); 783 String territories = attributes.get("territories"); 784 if (territories == null) { 785 System.err.println("Err: on path " + fullPath 786 + ", missing territories. Probably, this tool is out of date."); 787 } else if (type == null) { 788 System.err.println("Err: on path " + fullPath 789 + ", missing type. Probably, this tool is out of date."); 790 } else { 791 addTerritoryInfo(territories, "calendar", type); 792 } 793 } 794 } 795 if (path.indexOf("/weekData") >= 0 || path.indexOf("measurementData") >= 0) { 796 String element = parts.getElement(parts.size() - 1); 797 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 798 // later, make this a table 799 String key = "count"; 800 String display = "Days in week (min)"; 801 boolean useTerritory = true; 802 switch (element) { 803 case "firstDay": 804 key = "day"; 805 display = "First day of week"; 806 break; 807 case "weekendStart": 808 key = "day"; 809 display = "First day of weekend"; 810 break; 811 case "weekendEnd": 812 key = "day"; 813 display = "Last day of weekend"; 814 break; 815 case "measurementSystem": 816 // <measurementSystem type="metric" territories="001"/> 817 key = "type"; 818 display = "Meas. system"; 819 break; 820 case "paperSize": 821 key = "type"; 822 display = "Paper Size"; 823 break; 824 case "weekOfPreference": 825 useTerritory = false; 826 break; 827 } 828 if (useTerritory) { 829 String type = attributes.get(key); 830 String territories = attributes.get("territories"); 831 addTerritoryInfo(territories, display, type); 832 } 833 } 834 if (path.indexOf("/generation") >= 0 || path.indexOf("/version") >= 0) 835 continue; 836 System.out.println("Skipped Element: " + path); 837 } 838 839 for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) { 840 for (String language : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territory)) { 841 language_territories.put(language, territory); 842 } 843 } 844 territory_languages = Multimaps.invertFrom(language_territories, TreeMultimap.create()); 845 script_languages = Multimaps.invertFrom(language_scripts, TreeMultimap.create()); 846 847 // now get some metadata 848 localeAliasInfo.put("language", new TreeMap<String, String>()); 849 localeAliasInfo.put("script", new TreeMap<String, String>()); 850 localeAliasInfo.put("territory", new TreeMap<String, String>()); 851 localeAliasInfo.put("variant", new TreeMap<String, String>()); 852 localeAliasInfo.put("zone", new TreeMap<String, String>()); 853 localeAliasInfo.put("subdivision", new TreeMap<String, String>()); 854 855 localeAliasInfo.get("language").put("no", "nb"); 856 localeAliasInfo.get("language").put("zh_CN", "zh_Hans_CN"); 857 localeAliasInfo.get("language").put("zh_SG", "zh_Hans_SG"); 858 localeAliasInfo.get("language").put("zh_TW", "zh_Hant_TW"); 859 localeAliasInfo.get("language").put("zh_MO", "zh_Hant_MO"); 860 localeAliasInfo.get("language").put("zh_HK", "zh_Hant_HK"); 861 862 // CLDRFile supp2 = cldrFactory.make(CLDRFile.SUPPLEMENTAL_METADATA, false); 863 Map<String, Map<String, R2<List<String>, String>>> localeAliasInfo2 = supplementalDataInfo 864 .getLocaleAliasInfo(); 865 for (Entry<String, Map<String, R2<List<String>, String>>> entry1 : localeAliasInfo2.entrySet()) { 866 String element = entry1.getKey(); 867 for (Entry<String, R2<List<String>, String>> entry2 : entry1.getValue().entrySet()) { 868 String type = entry2.getKey(); 869 R2<List<String>, String> replacementReason = entry2.getValue(); 870 List<String> replacementList = replacementReason.get0(); 871 String replacement = replacementList == null ? null : CollectionUtilities 872 .join(replacementList, " "); 873 String reason = replacementReason.get1(); 874 875 // for (Iterator it = supp2.iterator(); it.hasNext();) { 876 // String path = (String) it.next(); 877 // parts.set(supp2.getFullXPath(path)); 878 // if (path.indexOf("/alias") >= 0) { 879 // String element = parts.getElement(parts.size() - 1); 880 // Map attributes = parts.getAttributes(parts.size() - 1); 881 // String type = (String) attributes.get("type"); 882 // if (!element.endsWith("Alias")) 883 // throw new IllegalArgumentException("Unexpected alias element: " + element); 884 // element = element.substring(0, element.length() - 5); 885 // String replacement = (String) attributes.get("replacement"); 886 if (element.equals("timezone")) { 887 element = "zone"; 888 } 889 localeAliasInfo.get(element).put(type, replacement == null ? "?" : replacement); 890 891 String name = ""; 892 if (replacement == null) { 893 name = "(none)"; 894 } else if (element.equals("language")) { 895 name = getName(replacement, false); 896 } else if (element.equals("zone")) { 897 element = "timezone"; 898 name = replacement + "*"; 899 } else { 900 int typeCode = CLDRFile.typeNameToCode(element); 901 if (typeCode >= 0) { 902 name = getName(typeCode, replacement, false); 903 } else { 904 name = "*" + replacement; 905 } 906 } 907 if (element.equals("territory")) { 908 territoryAliases.put(type, name); 909 aliases 910 .add(new String[] { element, getName(CLDRFile.TERRITORY_NAME, type, false), name, reason }); 911 } else { 912 aliases.add(new String[] { element, type, name, reason }); 913 } 914 continue; 915 // } 916 // if (path.indexOf("/generation") >= 0 || path.indexOf("/version") >= 0) 917 // continue; 918 // System.out.println("Skipped Element: " + path); 919 } 920 } 921 Log.setLog(CLDRPaths.CHART_DIRECTORY + "supplemental/", "characterLog.txt"); 922 // CLDRFile chars = cldrFactory.make("characters", false); 923 // int count = 0; 924 // for (Iterator it = chars.iterator("", CLDRFile.getLdmlComparator()); it.hasNext();) { 925 // String path = (String) it.next(); 926 // parts.set(chars.getFullXPath(path)); 927 // if (parts.getElement(1).equals("version")) 928 // continue; 929 // if (parts.getElement(1).equals("generation")) 930 // continue; 931 // String value = parts.getAttributeValue(-2, "value"); 932 // String substitute = chars.getStringValue(path, true); 933 // addCharSubstitution(value, substitute); 934 // } 935 // if (count != 0) 936 // System.out.println("Skipped NFKC/NFC items: " + count); 937 Log.close(); 938 } 939 printLikelySubtags(PrintWriter index)940 public void printLikelySubtags(PrintWriter index) throws IOException { 941 942 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Likely Subtags", null, SUPPLEMENTAL_INDEX_ANCHORS)); 943 944 TablePrinter tablePrinter = new TablePrinter() 945 .addColumn("Source Lang", "class='source'", null, "class='source'", true).setSortPriority(1) 946 .setSpanRows(false) 947 .addColumn("Source Script", "class='source'", null, "class='source'", true).setSortPriority(0) 948 .setSpanRows(false).setBreakSpans(true) 949 .addColumn("Source Region", "class='source'", null, "class='source'", true).setSortPriority(2) 950 .setSpanRows(false) 951 .addColumn("Target Lang", "class='target'", null, "class='target'", true).setSortPriority(3) 952 .setBreakSpans(true) 953 .addColumn("Target Script", "class='target'", null, "class='target'", true).setSortPriority(4) 954 .addColumn("Target Region", "class='target'", null, "class='target'", true).setSortPriority(5) 955 .addColumn("Source ID", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 956 .addColumn("Target ID", "class='target'", null, "class='target'", true); 957 Map<String, String> subtags = supplementalDataInfo.getLikelySubtags(); 958 LanguageTagParser sourceParsed = new LanguageTagParser(); 959 LanguageTagParser targetParsed = new LanguageTagParser(); 960 for (String source : subtags.keySet()) { 961 String target = subtags.get(source); 962 sourceParsed.set(source); 963 targetParsed.set(target); 964 tablePrinter.addRow() 965 .addCell(getName(CLDRFile.LANGUAGE_NAME, sourceParsed.getLanguage())) 966 .addCell(getName(CLDRFile.SCRIPT_NAME, sourceParsed.getScript())) 967 .addCell(getName(CLDRFile.TERRITORY_NAME, sourceParsed.getRegion())) 968 .addCell(getName(CLDRFile.LANGUAGE_NAME, targetParsed.getLanguage())) 969 .addCell(getName(CLDRFile.SCRIPT_NAME, targetParsed.getScript())) 970 .addCell(getName(CLDRFile.TERRITORY_NAME, targetParsed.getRegion())) 971 .addCell(source) 972 .addCell(target) 973 .finishRow(); 974 } 975 pw.println(tablePrinter.toTable()); 976 pw.close(); 977 } 978 979 static class LanguageData extends R4<Double, Double, Double, String> { LanguageData(Double a, Double b, Double c, String d)980 public LanguageData(Double a, Double b, Double c, String d) { 981 super(a, b, c, d); 982 } 983 } 984 getName(final int type, final String value)985 private String getName(final int type, final String value) { 986 if (value == null || value.equals("") || value.equals("und")) { 987 return "\u00A0"; 988 } 989 String result = english.getName(type, value); 990 if (result == null) { 991 result = value; 992 } 993 return result; 994 } 995 996 static final Comparator INVERSE_COMPARABLE = new Comparator() { 997 public int compare(Object o1, Object o2) { 998 return ((Comparable) o2).compareTo(o1); 999 } 1000 }; 1001 1002 // public void printCountryData(PrintWriter pw) throws IOException { 1003 // NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1004 // nf.setGroupingUsed(true); 1005 // NumberFormat pf = NumberFormat.getPercentInstance(ULocale.ENGLISH); 1006 // pf.setMinimumFractionDigits(1); 1007 // pf.setMaximumFractionDigits(1); 1008 // PrintWriter pw2 = showCountryDataHeader(pw, "Territory-Language Information"); 1009 // pw2.println("<tr>" + 1010 // "<th class='source'>Territory</th>" + 1011 // "<th class='source'>Code</th>" + 1012 // "<th class='target'>Population</th>" + 1013 // "<th class='target'>Literacy</th>" + 1014 // "<th class='target'>GDP (PPP)</th>" + 1015 // 1016 // "<th class='target'>Language</th>" + 1017 // "<th class='target'>Code</th>" + 1018 // "<th class='target'>Population</th>" + 1019 // "<th class='target'>Literacy</th>" + 1020 // "</tr>"); 1021 // for (String territoryName : territoryLanguageData.keySet()) { 1022 // Map<String,Object>results = territoryLanguageData.get(territoryName); 1023 // Set<Pair<Double,Pair<Double,String>>> language = 1024 // (Set<Pair<Double,Pair<Double,String>>>)results.get("language"); 1025 // int span = language == null ? 0 : language.size(); 1026 // String spanString = span == 0 ? "" : " rowSpan='"+span+"'"; 1027 // double population = Double.parseDouble((String)results.get("population")); 1028 // double gdp = Double.parseDouble((String)results.get("gdp")); 1029 // pw2.println("<tr>" + 1030 // "<td class='source'" + spanString + ">" + territoryName + "</td>" + 1031 // "<td class='source'" + spanString + ">" + results.get("code") + "</td>" + 1032 // "<td class='targetRight'" + spanString + ">" + (population <= 1 ? "<i>na</i>" : nf.format(population)) + 1033 // "</td>" + 1034 // "<td class='targetRight'" + spanString + ">" + 1035 // pf.format(Double.parseDouble((String)results.get("literacyPercent"))/100) + "</td>" + 1036 // "<td class='targetRight'" + spanString + ">" + (gdp <= 1 ? "<i>na</i>" : nf.format(gdp)) + "</td>"); 1037 // if (span == 0) { 1038 // pw2.println("<td class='source'><i>na</i></td>" + 1039 // "<td class='source'><i>na</i></td>" 1040 // + "<td class='targetRight'><i>na</i></td>" 1041 // + "<td class='targetRight'><i>na</i></td>" 1042 // + "</tr>"); 1043 // } else { 1044 // boolean first = true; 1045 // for (Pair<Double,Pair<Double,String>> languageCodePair : language) { 1046 // double languagePopulation = languageCodePair.first; 1047 // double languageliteracy = languageCodePair.second.first; 1048 // String languageCode = languageCodePair.second.second; 1049 // if (first) { 1050 // first = false; 1051 // } else { 1052 // pw2.println("<tr>"); 1053 // } 1054 // double proportion = languagePopulation/population; 1055 // if (proportion > 1) { 1056 // System.out.println("Warning - proportion > 100:" + territoryName + ", " + english.getName(languageCode, 1057 // false)); 1058 // proportion = 1; 1059 // } 1060 // pw2.println( 1061 // "<td class='source'>" + english.getName(languageCode, false) + "</td>" + 1062 // "<td class='source'>" + languageCode + "</td>" 1063 // + "<td class='targetRight'>" + pf.format(languagePopulation/100) + "</td>" 1064 // + "<td class='targetRight'>" + (Double.isNaN(languageliteracy) ? "<i>na</i>" : 1065 // pf.format(languageliteracy/100)) + "</td>" 1066 // + "</tr>"); 1067 // } 1068 // } 1069 // } 1070 // pw2.close(); 1071 // /* 1072 // * Map languageData = (Map) territoryLanguageData.get(type); 1073 // if (languageData == null) territoryLanguageData.put(type, languageData = new TreeMap()); 1074 // languageData.put("gdp", attributes.get("gdp")); 1075 // languageData.put("literacy", attributes.get("literacy")); 1076 // languageData.put("population", attributes.get("population")); 1077 // attributes = parts.getAttributes(3); 1078 // if (attributes != null) { 1079 // Map languageData2 = (Map) languageData.get("language"); 1080 // if (languageData2 == null) territoryLanguageData.put(type, languageData2 = new LinkedHashMap()); 1081 // languageData.put(attributes.get("type"), attributes.get("functionallyLiterate")); 1082 // } 1083 // 1084 // */ 1085 // //pw.println("<tr><th class='source'>Territory</th><th class='target'>From</th><th class='target'>To</th><th class='target'>Currency</th></tr>"); 1086 // //for (Iterator it = territory_currency.keySet().iterator(); it.hasNext();) { 1087 // //String territory = (String) it.next(); 1088 // //Set info = (Set) territory_currency.get(territory); 1089 // //pw.println("<tr><td class='source' rowSpan='" + info.size() + "'>" + territory + "</td>"); 1090 // //boolean first = true; 1091 // //for (Iterator it2 = info.iterator(); it2.hasNext();) { 1092 // //String[] items = (String[]) it2.next(); 1093 // //if (first) 1094 // //first = false; 1095 // //else 1096 // //pw.println("<tr>"); 1097 // //pw.println("<td class='target'>" + items[0] + "</td><td class='target'>" + items[1] + 1098 // "</td><td class='target'>" + items[2] + "</td></tr>"); 1099 // //} 1100 // //} 1101 // ////doFooter(pw); 1102 // //pw.close(); 1103 // //pw = new PrintWriter(new FormattedFileWriter(index, "Currency Format Info")); 1104 // // 1105 // ////doTitle(pw, "Currency Format Info"); 1106 // //pw.println("<tr><th class='source'>Currency</th><th class='target'>Digits</th><th class='target'>Countries</th></tr>"); 1107 // //Set currencyList = new TreeSet(col); 1108 // //currencyList.addAll(currency_fractions.keySet()); 1109 // //currencyList.addAll(currency_territory.keySet()); 1110 // // 1111 // //for (Iterator it = currencyList.iterator(); it.hasNext();) { 1112 // //String currency = (String) it.next(); 1113 // //String fractions = (String) currency_fractions.get(currency); 1114 // //if (fractions == null) 1115 // //fractions = defaultDigits; 1116 // //Set territories = (Set) currency_territory.get(currency); 1117 // //pw.print("<tr><td class='source'>" + currency + "</td><td class='target'>" + fractions + 1118 // "</td><td class='target'>"); 1119 // //if (territories != null) { 1120 // //boolean first = true; 1121 // //for (Iterator it2 = territories.iterator(); it2.hasNext();) { 1122 // //if (first) 1123 // //first = false; 1124 // //else 1125 // //pw.print(", "); 1126 // //pw.print(it2.next()); 1127 // //} 1128 // //} 1129 // //pw.println("</td></tr>"); 1130 // //} 1131 // //pw.close(); 1132 // ////doFooter(pw); 1133 // // 1134 // } 1135 1136 // http://www.faqs.org/rfcs/rfc2396.html 1137 // delims = "<" | ">" | "#" | "%" | <"> 1138 // "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" 1139 // Within a query component, the characters ";", "/", "?", ":", "@", 1140 // "&", "=", "+", ",", and "$" are reserved. 1141 static final UnicodeSet ESCAPED_URI_QUERY = new UnicodeSet( 1142 "[\\u0000-\\u0020\\u007F <>#%\"\\{}|\\\\\\^\\[\\]`;/?:@\\&=+,$\\u0080-\\U0001FFFF]").freeze(); 1143 1144 private static final int MINIMAL_BIG_VENDOR = 8; 1145 1146 static { System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement())1147 System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement()); 1148 } 1149 urlEncode(String input)1150 private String urlEncode(String input) { 1151 try { 1152 byte[] utf8 = input.getBytes("utf-8"); 1153 StringBuffer output = new StringBuffer(); 1154 for (int i = 0; i < utf8.length; ++i) { 1155 int b = utf8[i] & 0xFF; 1156 if (ESCAPED_URI_QUERY.contains(b)) { 1157 output.append('%'); 1158 if (b < 0x10) output.append('0'); 1159 output.append(Integer.toString(b, 16)); 1160 } else { 1161 output.append((char) b); 1162 } 1163 } 1164 return output.toString(); 1165 } catch (UnsupportedEncodingException e) { 1166 throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); 1167 } 1168 } 1169 1170 // http://trac.edgewall.org/wiki/TracTickets#PresetValuesforNewTickets 1171 // http://unicode.org/cldr/trac/newticket?summary=Fix_XXX addBug(int bugNumber, String text, String from, String subject, String body)1172 private String addBug(int bugNumber, String text, String from, String subject, String body) { 1173 String parameters = ""; 1174 // if (from != null && from.length() != 0) { 1175 // parameters += "&from=" + urlEncode(from); 1176 // } 1177 if (body != null && body.length() != 0) { 1178 parameters += "&description=" + urlEncode(body); 1179 } 1180 if (subject != null && subject.length() != 0) { 1181 parameters += "&summary=" + urlEncode(subject); 1182 } 1183 if (parameters.length() != 0) parameters = "?" + parameters; 1184 return "<a target='_blank' href='" + CLDRURLS.CLDR_NEWTICKET_URL 1185 + parameters + "'>" + text + "</a>"; 1186 } 1187 showLanguageCountryInfo(PrintWriter pw)1188 private void showLanguageCountryInfo(PrintWriter pw) throws IOException { 1189 FormattedFileWriter ffw = new FormattedFileWriter(null, "Language-Territory Information", 1190 null 1191 // "<div style='margin:1em'><p>The language data is provided for localization testing, and is under development for CLDR 1.5. " 1192 // + 1193 // "To add a new territory for a language, see the <i>add new</i> links below. " + 1194 // "For more information, see <a href=\"territory_language_information.html\">Territory-Language Information.</a>" 1195 // + 1196 // "<p></div>" 1197 , SUPPLEMENTAL_INDEX_ANCHORS); 1198 PrintWriter pw21 = new PrintWriter(ffw); 1199 PrintWriter pw2 = pw21; 1200 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1201 nf.setGroupingUsed(true); 1202 //NumberFormat percent = new DecimalFormat("000.0%"); 1203 TablePrinter tablePrinter = new TablePrinter() 1204 // tablePrinter.setSortPriorities(0,5) 1205 .addColumn("L", "class='source'", null, "class='source'", true) 1206 .setSortPriority(0) 1207 .setBreakSpans(true) 1208 .setRepeatHeader(true) 1209 .setHidden(true) 1210 .addColumn("Language", "class='source'", null, "class='source'", true) 1211 .setSortPriority(0) 1212 .setBreakSpans(true) 1213 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 1214 // .addColumn("Report Bug", "class='target'", null, "class='target'", false) 1215 .addColumn("Territory", "class='target'", null, "class='target'", true) 1216 .addColumn("Code", "class='target'", "<a href=\"territory_language_information.html#{0}\">{0}</a>", 1217 "class='target'", true) 1218 .addColumn("Language Population", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1219 .setSortPriority(1).setSortAscending(false) 1220 // .addColumn("Territory Population", "class='target'", "{0,number,#,##0}", "class='targetRight'", true) 1221 // .addColumn("Language Literacy", "class='target'", "{0,number,00.0}%", "class='targetRight'", true) 1222 // .addColumn("Territory Literacy", "class='target'", "{0,number,00.0}%", "class='targetRight'", true) 1223 // .addColumn("Territory GDP (PPP)", "class='target'", "{0,number,#,##0}", "class='targetRight'", true) 1224 ; 1225 TreeSet<String> languages = new TreeSet<String>(); 1226 Collection<Comparable[]> data = new ArrayList<Comparable[]>(); 1227 String msg = "<br><i>Please click on each country code</i>"; 1228 1229 Collection<Comparable[]> plainData = new ArrayList<Comparable[]>(); 1230 1231 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1232 // PopulationData territoryData = supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1233 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1234 for (String languageCode : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territoryCode)) { 1235 PopulationData languageData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1236 languages.add(languageCode); 1237 Comparable[] items = new Comparable[] { 1238 getFirstPrimaryWeight(getLanguageName(languageCode)), 1239 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode), 1240 languageCode, 1241 // bug, 1242 territoryName + getOfficialStatus(territoryCode, languageCode), 1243 territoryCode, 1244 languageData.getPopulation(), 1245 // population, 1246 // languageliteracy, 1247 // territoryLiteracy, 1248 // gdp 1249 }; 1250 Comparable[] plainItems = new Comparable[] { 1251 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode), 1252 languageCode, 1253 territoryName, 1254 territoryCode, 1255 getRawOfficialStatus(territoryCode, languageCode), 1256 languageData.getPopulation(), 1257 languageData.getLiteratePopulation() 1258 }; 1259 1260 data.add(items); 1261 plainData.add(plainItems); 1262 } 1263 } 1264 for (String languageCode : languages) { 1265 Comparable[] items = new Comparable[] { 1266 getFirstPrimaryWeight(getLanguageName(languageCode)), 1267 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode), 1268 languageCode, 1269 // bug, 1270 addBug(1217, "<i>add new</i>", "<email>", "Add territory to " + getLanguageName(languageCode) 1271 + " (" + languageCode + ")", "<territory, speaker population in territory, and references>"), 1272 "", 1273 0.0d, 1274 // 0.0d, 1275 // 0.0d, 1276 // 0.0d, 1277 // gdp 1278 }; 1279 data.add(items); 1280 } 1281 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1282 String value = tablePrinter.addRows(flattened).toTable(); 1283 pw2.println(value); 1284 pw2.close(); 1285 try (PrintWriter pw21plain = FileUtilities.openUTF8Writer(ffw.getDir(), ffw.getBaseFileName() + ".txt")) { 1286 for (Comparable[] row : plainData) { 1287 pw21plain.println(CollectionUtilities.join(row, "\t")); 1288 } 1289 } 1290 } 1291 getLanguagePluralMessage(String msg, String languageCode)1292 private String getLanguagePluralMessage(String msg, String languageCode) { 1293 String mainLanguageCode = new LanguageTagParser().set(languageCode).getLanguage(); 1294 String messageWithPlurals = msg + ", on <a href='language_plural_rules.html#" + mainLanguageCode 1295 + "'>plurals</a>" + 1296 ", and on <a href='likely_subtags.html#" + mainLanguageCode + "'>likely-subtags</a>"; 1297 return messageWithPlurals; 1298 } 1299 getLanguageName(String languageCode)1300 private String getLanguageName(String languageCode) { 1301 String result = english.getName(languageCode); 1302 if (!result.equals(languageCode)) return result; 1303 Set<String> names = Iso639Data.getNames(languageCode); 1304 if (names != null && names.size() != 0) { 1305 return names.iterator().next(); 1306 } 1307 return languageCode; 1308 } 1309 showCoverageGoals(PrintWriter pw)1310 private void showCoverageGoals(PrintWriter pw) throws IOException { 1311 PrintWriter pw2 = new PrintWriter(new FormattedFileWriter(null, "Coverage Goals", 1312 null 1313 // "<p>" + 1314 // "The following show default coverage goals for larger organizations. " + 1315 // "<i>[n/a]</i> shows where there is no specific value for a given organization, " + 1316 // "while <i>(...)</i> indicates that the goal is inherited from the parent. " + 1317 // "A * is added if the goal differs from the parent locale's goal. " + 1318 // "For information on what these goals mean (comprehensive, modern, moderate,...), see the LDML specification " 1319 // + 1320 // "<a href='http://www.unicode.org/reports/tr35/#Coverage_Levels'>Appendix M: Coverage Levels</a>. " + 1321 // + 1322 // "</p>" 1323 , null)); 1324 1325 TablePrinter tablePrinter = new TablePrinter() 1326 // tablePrinter.setSortPriorities(0,4) 1327 .addColumn("Language", "class='source'", null, "class='source'", true) 1328 .setSortPriority(0) 1329 .setBreakSpans(true) 1330 .addColumn("Code", "class='source'", 1331 "<a href=\"http://www.unicode.org/cldr/data/common/main/{0}.xml\">{0}</a>", "class='source'", false); 1332 Map<Organization, Map<String, Level>> vendordata = sc.getLocaleTypes(); 1333 Set<String> locales = new TreeSet<String>(); 1334 Set<Organization> vendors = new LinkedHashSet<>(); 1335 Set<Organization> smallVendors = new LinkedHashSet<>(); 1336 1337 for (Entry<Organization, Map<String, Level>> vendorData : vendordata.entrySet()) { 1338 Organization vendor = vendorData.getKey(); 1339 //if (vendor.equals(Organization.java)) continue; 1340 Map<String, Level> data = vendorData.getValue(); 1341 if (data.size() < MINIMAL_BIG_VENDOR) { 1342 smallVendors.add(vendor); 1343 continue; 1344 } 1345 vendors.add(vendor); 1346 tablePrinter.addColumn(vendor.getDisplayName(), "class='target'", null, "class='target'", false) 1347 .setSpanRows(true); 1348 locales.addAll(data.keySet()); 1349 } 1350 1351 Collection<Comparable[]> data = new ArrayList<Comparable[]>(); 1352 List<String> list = new ArrayList<String>(); 1353 LanguageTagParser ltp = new LanguageTagParser(); 1354 //String alias2 = getAlias("sh_YU"); 1355 1356 for (String locale : locales) { 1357 list.clear(); 1358 String localeCode = locale.equals("*") ? "und" : locale; 1359 String alias = getAlias(localeCode); 1360 if (!alias.equals(localeCode)) { 1361 System.out.println("Should use canonical form: " + locale + " => " + alias); 1362 } 1363 String baseLang = ltp.set(localeCode).getLanguage(); 1364 String baseLangName = getLanguageName(baseLang); 1365 list.add("und".equals(localeCode) ? "other" : baseLangName); 1366 list.add(locale); 1367 for (Organization vendor : vendors) { 1368 String status = getVendorStatus(locale, vendor, vendordata); 1369 if (!baseLang.equals(locale) && !status.startsWith("<")) { 1370 String langStatus = getVendorStatus(baseLang, vendor, vendordata); 1371 if (!langStatus.equals(status)) { 1372 status += "*"; 1373 } 1374 } 1375 list.add(status); 1376 } 1377 data.add(list.toArray(new String[list.size()])); 1378 } 1379 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1380 String value = tablePrinter.addRows(flattened).toTable(); 1381 pw2.println(value); 1382 pw2.append("<h2>Others</h2><div align='left'><ul>"); 1383 for (Organization vendor2 : smallVendors) { 1384 pw2.append("<li><b>"); 1385 pw2.append(TransliteratorUtilities.toHTML.transform( 1386 vendor2.getDisplayName())).append(": </b>"); 1387 boolean first1 = true; 1388 for (Level level : Level.values()) { 1389 boolean first2 = true; 1390 Level other = null; 1391 for (Entry<String, Level> data2 : vendordata.get(vendor2).entrySet()) { 1392 String key = data2.getKey(); 1393 Level level2 = data2.getValue(); 1394 if (level != level2) { 1395 continue; 1396 } 1397 if (key.equals("*")) { 1398 other = level2; 1399 continue; 1400 } 1401 if (first2) { 1402 if (first1) { 1403 first1 = false; 1404 } else { 1405 pw2.append("; "); 1406 } 1407 pw2.append(level2.toString()).append(": "); 1408 first2 = false; 1409 } else { 1410 pw2.append(", "); 1411 } 1412 pw2.append(TransliteratorUtilities.toHTML.transform(key)); 1413 } 1414 if (other != null) { 1415 if (first2) { 1416 if (first1) { 1417 first1 = false; 1418 } else { 1419 pw2.append("; "); 1420 } 1421 pw2.append(level.toString()).append(": "); 1422 first2 = false; 1423 } else { 1424 pw2.append(", "); 1425 } 1426 pw2.append("<i>other</i>"); 1427 } 1428 } 1429 pw2.append("</li>"); 1430 } 1431 pw2.append("</ul></div>"); 1432 pw2.close(); 1433 } 1434 1435 LanguageTagParser lpt2 = new LanguageTagParser(); 1436 getAlias(String locale)1437 private String getAlias(String locale) { 1438 lpt2.set(locale); 1439 locale = lpt2.toString(); // normalize 1440 //String language = lpt2.getLanguage(); 1441 String script = lpt2.getScript(); 1442 String region = lpt2.getRegion(); 1443 // List variants = lpt2.getVariants(); 1444 String temp; 1445 for (String old : localeAliasInfo.get("language").keySet()) { 1446 if (locale.startsWith(old)) { 1447 // the above is a rough check, and will fail with old=moh and locale=mo 1448 if (!locale.equals(old) && !locale.startsWith(old + "_")) { 1449 continue; 1450 } 1451 temp = localeAliasInfo.get("language").get(old); 1452 lpt2.setLanguage(temp.split("\\s+")[0] + locale.substring(old.length())); 1453 break; 1454 } 1455 } 1456 temp = localeAliasInfo.get("script").get(script); 1457 if (temp != null) { 1458 lpt2.setScript(temp.split("\\s+")[0]); 1459 } 1460 temp = localeAliasInfo.get("territory").get(region); 1461 if (temp != null) { 1462 lpt2.setRegion(temp.split("\\s+")[0]); 1463 } 1464 return lpt2.toString(); 1465 } 1466 getVendorStatus(String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata)1467 private String getVendorStatus(String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata) { 1468 Level statusLevel = vendordata.get(vendor).get(locale); 1469 String status = statusLevel == null ? null : statusLevel.toString(); 1470 String curLocale = locale; 1471 while (status == null) { 1472 curLocale = LocaleIDParser.getParent(curLocale); 1473 if (curLocale.equals("root")) { 1474 status = " "; 1475 break; 1476 } 1477 statusLevel = vendordata.get(vendor).get(curLocale); 1478 if (statusLevel != null) { 1479 status = "<i>(" + statusLevel + ")</i>"; 1480 } 1481 } 1482 return status; 1483 } 1484 showCountryLanguageInfo(PrintWriter pw)1485 private void showCountryLanguageInfo(PrintWriter pw) throws IOException { 1486 PrintWriter pw21 = new PrintWriter(new FormattedFileWriter(null, "Territory-Language Information", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1487 PrintWriter pw2 = pw21; 1488 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1489 nf.setGroupingUsed(true); 1490 //NumberFormat percent = new DecimalFormat("000.0%"); 1491 TablePrinter tablePrinter = new TablePrinter() 1492 // tablePrinter.setSortPriorities(0,4) 1493 .addColumn("T", "class='source'", null, "class='source'", true) 1494 .setSortPriority(0) 1495 .setBreakSpans(true) 1496 .setRepeatHeader(true) 1497 .setHidden(true) 1498 .addColumn("Territory", "class='source'", null, "class='source'", true) 1499 .setSortPriority(0) 1500 .setBreakSpans(true) 1501 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), 1502 "class='source'", true) 1503 .addColumn("Terr. Literacy", "class='target'", "{0,number,@@}%", "class='targetRight'", true); 1504 1505 tablePrinter 1506 .addColumn("Language", "class='target'", null, "class='target'", false) 1507 .addColumn("Code", "class='target'", "<a href=\"language_territory_information.html#{0}\">{0}</a>", 1508 "class='target'", false) 1509 .addColumn("Lang. Pop.", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1510 .addColumn("Pop.%", "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1511 .setSortAscending(false).setSortPriority(1) 1512 .addColumn("Literacy%", "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1513 .addColumn("Written%", "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1514 .addColumn("Report Bug", "class='target'", null, "class='target'", false); 1515 1516 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1517 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1518 PopulationData territoryData2 = supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1519 double territoryLiteracy = territoryData2.getLiteratePopulationPercent(); 1520 1521 for (String languageCode : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territoryCode)) { 1522 PopulationData languageData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1523 double languagePopulationPercent = 100 * languageData.getPopulation() / territoryData2.getPopulation(); 1524 double languageliteracy = languageData.getLiteratePopulationPercent(); 1525 double writingFrequency = languageData.getWritingPercent(); 1526 1527 tablePrinter.addRow() 1528 .addCell(getFirstPrimaryWeight(territoryName)) 1529 .addCell(territoryName) 1530 .addCell(territoryCode) 1531 .addCell(territoryLiteracy) 1532 .addCell(getLanguageName(languageCode) + getOfficialStatus(territoryCode, languageCode)) 1533 .addCell(languageCode) 1534 .addCell(languageData.getPopulation()) 1535 .addCell(languagePopulationPercent) 1536 .addCell(languageliteracy) 1537 .addCell(writingFrequency) 1538 .addCell( 1539 addBug(1217, "<i>bug</i>", "<email>", "Fix info for " + getLanguageName(languageCode) 1540 + " (" + languageCode + ")" 1541 + " in " + territoryName + " (" + territoryCode + ")", 1542 "<fixed data for territory, plus references>")) 1543 .finishRow(); 1544 } 1545 1546 tablePrinter.addRow() 1547 .addCell(getFirstPrimaryWeight(territoryName)) 1548 .addCell(territoryName) 1549 .addCell(territoryCode) 1550 .addCell(territoryLiteracy) 1551 .addCell( 1552 addBug(1217, "<i>add new</i>", "<email>", "Add language to " + territoryName + "(" 1553 + territoryCode + ")", 1554 "<language, speaker pop. and literacy in territory, plus references>")) 1555 .addCell("") 1556 .addCell(0.0d) 1557 .addCell(0.0d) 1558 .addCell(0.0d) 1559 .addCell(0.0d) 1560 .addCell("") 1561 .finishRow(); 1562 1563 } 1564 String value = tablePrinter.toTable(); 1565 pw2.println(value); 1566 pw2.close(); 1567 } 1568 showCountryInfo(PrintWriter pw)1569 private void showCountryInfo(PrintWriter pw) throws IOException { 1570 PrintWriter pw21 = new PrintWriter(new FormattedFileWriter(null, "Territory Information", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1571 PrintWriter pw2 = pw21; 1572 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1573 nf.setGroupingUsed(true); 1574 //NumberFormat percent = new DecimalFormat("000.0%"); 1575 TablePrinter tablePrinter = new TablePrinter() 1576 // tablePrinter.setSortPriorities(0,4) 1577 .addColumn("T", "class='source'", null, "class='source'", true) 1578 .setSortPriority(0) 1579 .setBreakSpans(true) 1580 .setRepeatHeader(true) 1581 .setHidden(true) 1582 .addColumn("Territory", "class='source'", null, "class='source'", true) 1583 .setSortPriority(0) 1584 .setBreakSpans(true) 1585 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), 1586 "class='source'", true) 1587 .addColumn("Terr. Pop (M)", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1588 .addColumn("Terr. GDP ($M PPP)", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true) 1589 .addColumn("Currencies (2006...)", "class='target'", null, "class='target'", true); 1590 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext();) { 1591 String header = it.next(); 1592 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1593 tablePrinter.addColumn(header).setHeaderAttributes("class='target'") 1594 .setCellAttributes("class='target'").setSpanRows(true); 1595 } 1596 1597 tablePrinter 1598 .addColumn("Report Bug", "class='target'", null, "class='target'", false); 1599 1600 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1601 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1602 PopulationData territoryData2 = supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1603 double population = territoryData2.getPopulation() / 1000000; 1604 double gdp = territoryData2.getGdp() / 1000000; 1605 1606 Map<String, Set<String>> worldData = territoryData.get(getName(CLDRFile.TERRITORY_NAME, "001", false)); 1607 Map<String, Set<String>> countryData = territoryData.get(getName(CLDRFile.TERRITORY_NAME, territoryCode, false)); 1608 1609 tablePrinter.addRow() 1610 .addCell(getFirstPrimaryWeight(territoryName)) 1611 .addCell(territoryName) 1612 .addCell(territoryCode) 1613 .addCell(population) 1614 .addCell(gdp) 1615 .addCell(getCurrencyNames(territoryCode)); 1616 1617 addOtherCountryData(tablePrinter, worldData, countryData); 1618 1619 tablePrinter 1620 .addCell( 1621 addBug(1217, "<i>bug</i>", "<email>", "Fix info for " + territoryName + " (" + territoryCode + ")", 1622 "<fixed data for territory, plus references>")) 1623 .finishRow(); 1624 1625 } 1626 String value = tablePrinter.toTable(); 1627 pw2.println(value); 1628 pw2.close(); 1629 } 1630 1631 static Normalizer2 nfd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE); 1632 1633 // Do just an approximation for now 1634 getFirstPrimaryWeight(String territoryName)1635 private String getFirstPrimaryWeight(String territoryName) { 1636 char first = territoryName.charAt(0); 1637 String result = nfd.getDecomposition(first); 1638 if (result == null) { 1639 return UTF16.valueOf(first); 1640 } 1641 return UTF16.valueOf(result.codePointAt(0)); 1642 } 1643 1644 // private String getTerritoryWithLikelyLink(String territoryCode) { 1645 // return "<a href='likely_subtags.html#und_"+ territoryCode + "'>" + territoryCode + "</a>"; 1646 // } 1647 getOfficialStatus(String territoryCode, String languageCode)1648 private String getOfficialStatus(String territoryCode, String languageCode) { 1649 PopulationData x = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1650 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1651 return " <span title='" + x.getOfficialStatus().toString().replace('_', ' ') + "'>{" 1652 + x.getOfficialStatus().toShortString() + "}</span>"; 1653 } 1654 getRawOfficialStatus(String territoryCode, String languageCode)1655 private String getRawOfficialStatus(String territoryCode, String languageCode) { 1656 PopulationData x = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode); 1657 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1658 return x.getOfficialStatus().toString(); 1659 } 1660 addOtherCountryData(TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData)1661 private void addOtherCountryData(TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData) { 1662 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext();) { 1663 String type = it2.next(); 1664 Set<String> worldResults = worldData.get(type); 1665 Set<String> territoryResults = null; 1666 if (countryData != null) { 1667 territoryResults = countryData.get(type); 1668 } 1669 if (territoryResults == null) { 1670 territoryResults = worldResults; 1671 } 1672 String out = ""; 1673 if (territoryResults != null) { 1674 out = territoryResults + ""; 1675 out = out.substring(1, out.length() - 1); // remove [ and ] 1676 } 1677 tablePrinter.addCell(out); 1678 } 1679 } 1680 getCurrencyNames(String territoryCode)1681 private String getCurrencyNames(String territoryCode) { 1682 Set<String> currencies = territoriesToModernCurrencies.getAll(territoryCode); 1683 if (currencies == null || currencies.size() == 0) return ""; 1684 StringBuilder buffer = new StringBuilder(); 1685 for (String code : currencies) { 1686 if (buffer.length() != 0) buffer.append(",<br>"); 1687 buffer.append(getName(CLDRFile.CURRENCY_NAME, code, false)); 1688 } 1689 return buffer.toString(); 1690 } 1691 addCharSubstitution(String value, String substitute)1692 private void addCharSubstitution(String value, String substitute) { 1693 if (substitute.equals(value)) 1694 return; 1695 LinkedHashSet<String> already = charSubstitutions.get(value); 1696 if (already == null) 1697 charSubstitutions.put(value, already = new LinkedHashSet<String>(0)); 1698 already.add(substitute); 1699 Log.logln(hex(value, " ") + "; " + hex(substitute, " ")); 1700 } 1701 1702 /** 1703 * 1704 */ 1705 // public void showTerritoryInfo() { 1706 // Map territory_parent = new TreeMap(); 1707 // gather("001", territory_parent); 1708 // for (Iterator it = territory_parent.keySet().iterator(); it.hasNext();) { 1709 // String territory = (String) it.next(); 1710 // String parent = (String) territory_parent.get(territory); 1711 // System.out.println(territory + "\t" + english.getName(english.TERRITORY_NAME, territory) + "\t" 1712 // + parent + "\t" + english.getName(english.TERRITORY_NAME, parent)); 1713 // } 1714 // } 1715 1716 // private void gather(String item, Map territory_parent) { 1717 // Collection containedByItem = (Collection) group_contains.get(item); 1718 // if (containedByItem == null) 1719 // return; 1720 // for (Iterator it = containedByItem.iterator(); it.hasNext();) { 1721 // String contained = (String) it.next(); 1722 // territory_parent.put(contained, item); 1723 // gather(contained, territory_parent); 1724 // } 1725 // } 1726 addTerritoryInfo(String territoriesList, String type, String info)1727 private void addTerritoryInfo(String territoriesList, String type, String info) { 1728 String[] territories = territoriesList.split("\\s+"); 1729 territoryTypes.add(type); 1730 for (int i = 0; i < territories.length; ++i) { 1731 String territory = getName(CLDRFile.TERRITORY_NAME, territories[i], false); 1732 Map<String, Set<String>> s = territoryData.get(territory); 1733 if (s == null) { 1734 territoryData.put(territory, s = new TreeMap<String, Set<String>>()); 1735 } 1736 Set<String> ss = s.get(type); 1737 if (ss == null) { 1738 s.put(type, ss = new TreeSet<String>()); 1739 } 1740 ss.add(info); 1741 } 1742 } 1743 showCalendarData(PrintWriter pw0)1744 public void showCalendarData(PrintWriter pw0) throws IOException { 1745 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Other Territory Data", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1746 pw.println("<table>"); 1747 pw.println("<tr><th class='source'>Territory</th>"); 1748 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext();) { 1749 String header = it.next(); 1750 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1751 pw.println("<th class='target'>" + header + "</th>"); 1752 } 1753 pw.println("</tr>"); 1754 1755 String worldName = getName(CLDRFile.TERRITORY_NAME, "001", false); 1756 Map<String, Set<String>> worldData = territoryData.get(worldName); 1757 for (Iterator<String> it = territoryData.keySet().iterator(); it.hasNext();) { 1758 String country = it.next(); 1759 if (country.equals(worldName)) 1760 continue; 1761 showCountry(pw, country, country, worldData); 1762 } 1763 showCountry(pw, worldName, "Other", worldData); 1764 pw.println("</table>"); 1765 pw.close(); 1766 } 1767 showCountry(PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData)1768 private void showCountry(PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData) { 1769 pw.println("<tr><td class='source'>" + countryTitle + "</td>"); 1770 Map<String, Set<String>> data = territoryData.get(country); 1771 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext();) { 1772 String type = it2.next(); 1773 String target = "target"; 1774 Set<String> results = data.get(type); 1775 Set<String> worldResults = worldData.get(type); 1776 if (results == null) { 1777 results = worldResults; 1778 target = "target2"; 1779 } else if (results.equals(worldResults)) { 1780 target = "target2"; 1781 } 1782 String out = ""; 1783 if (results != null) { 1784 out = results + ""; 1785 out = out.substring(1, out.length() - 1); // remove [ and ] 1786 } 1787 pw.println("<td class='" + target + "'>" + out + "</td>"); 1788 } 1789 pw.println("</tr>"); 1790 } 1791 showCorrespondances()1792 public void showCorrespondances() { 1793 // show correspondances between language and script 1794 Map<String, String> name_script = new TreeMap<String, String>(); 1795 for (Iterator<String> it = sc.getAvailableCodes("script").iterator(); it.hasNext();) { 1796 String script = it.next(); 1797 String name = english.getName(CLDRFile.SCRIPT_NAME, script); 1798 if (name == null) 1799 name = script; 1800 name_script.put(name, script); 1801 /* 1802 * source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages 1803 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories 1804 * : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages 1805 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts 1806 */} 1807 String delimiter = "\\P{L}+"; 1808 Map<String, String> name_language = new TreeMap<String, String>(); 1809 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); it.hasNext();) { 1810 String language = it.next(); 1811 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 1812 if (names == null) 1813 names = language; 1814 name_language.put(names, language); 1815 } 1816 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); it.hasNext();) { 1817 String language = it.next(); 1818 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 1819 if (names == null) 1820 names = language; 1821 String[] words = names.split(delimiter); 1822 if (words.length > 1) { 1823 // System.out.println(names); 1824 } 1825 for (int i = 0; i < words.length; ++i) { 1826 String name = words[i]; 1827 String script = name_script.get(name); 1828 if (script != null) { 1829 Set<String> langSet = (Set<String>) script_languages.asMap().get(script); 1830 if (langSet != null && langSet.contains(language)) 1831 System.out.print("*"); 1832 System.out.println("\t" + name + " [" + language + "]\t=> " + name + " [" + script + "]"); 1833 } else { 1834 String language2 = name_language.get(name); 1835 if (language2 != null && !language.equals(language2)) { 1836 Set<String> langSet = (Set<String>) language_scripts.get(language); 1837 if (langSet != null) 1838 System.out.print("*"); 1839 System.out.print("?\tSame script?\t + " + getName(CLDRFile.LANGUAGE_NAME, language, false) 1840 + "\t & " + getName(CLDRFile.LANGUAGE_NAME, language2, false)); 1841 langSet = (Set<String>) language_scripts.get(language2); 1842 if (langSet != null) 1843 System.out.print("*"); 1844 System.out.println(); 1845 } 1846 } 1847 } 1848 } 1849 } 1850 1851 /** 1852 * @throws IOException 1853 * 1854 */ printCurrency(PrintWriter index)1855 public void printCurrency(PrintWriter index) throws IOException { 1856 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Detailed Territory-Currency Information", 1857 null 1858 // "<p>The following table shows when currencies were in use in different countries. " + 1859 // "See also <a href='#format_info'>Decimal Digits and Rounding</a>. " + 1860 // "To correct any information here, please file a " + 1861 // addBug(1274, "bug", "<email>", "Currency Bug", 1862 // "<currency, country, and references supporting change>") + 1863 // ".</p>" 1864 , SUPPLEMENTAL_INDEX_ANCHORS)); 1865 String section1 = "Territory to Currency"; 1866 String section2 = "Decimal Digits and Rounding"; 1867 showContents(pw, "territory_currency", section1, "format_info", section2); 1868 1869 pw.println("<h2>" + CldrUtility.getDoubleLinkedText("territory_currency", "1. " + section1) + "</h2>"); 1870 1871 // doTitle(pw, "Territory \u2192 Currency"); 1872 pw.println("<table>"); 1873 pw.println("<tr><th class='source'>Territory</th>" + 1874 "<th class='source'>Code</th>" + 1875 "<th class='target'>From</th>" + 1876 "<th class='target'>To</th>" + 1877 "<th class='target'>Currency</th>" + 1878 "<th class='target'>Name</th>" + 1879 "</tr>"); 1880 1881 Relation<String, String> currencyToTerritory = Relation.of(new HashMap<String, Set<String>>(), 1882 HashSet.class); 1883 Relation<String, String> modernCurrencyToTerritory = Relation.of(new HashMap<String, Set<String>>(), 1884 HashSet.class); 1885 1886 for (Entry<String, String> nameCode : NAME_TO_REGION.entrySet()) { 1887 String name = nameCode.getKey(); 1888 String regionCode = nameCode.getValue(); 1889 if (!StandardCodes.isCountry(regionCode)) { 1890 continue; 1891 } 1892 if (sc.isLstregPrivateUse("region", regionCode)) { 1893 continue; 1894 } 1895 Set<CurrencyDateInfo> info = supplementalDataInfo.getCurrencyDateInfo(regionCode); 1896 1897 int infoSize = 1; 1898 if (info != null) { 1899 infoSize = info.size(); 1900 } 1901 pw.println("<tr>" + 1902 "<td class='source' rowSpan='" + infoSize + "'>" + name + "</td>" + 1903 "<td class='source' rowSpan='" + infoSize + "'>" + CldrUtility.getDoubleLinkedText(regionCode) 1904 + "</td>"); 1905 if (info == null) { 1906 pw.println("<td class='target'>" + "<i>na</i>" + "</td>" + 1907 "<td class='target'>" + "<i>na</i>" + "</td>" + 1908 "<td class='target'>" + "<i>na</i>" + "</td>" + 1909 "<td class='target'>" + "<i>na</i>" + "</td>" + 1910 "</tr>"); 1911 continue; 1912 } 1913 boolean first = true; 1914 for (CurrencyDateInfo infoItem : info) { 1915 Date endData = infoItem.getEnd(); 1916 if (endData.equals(CurrencyDateInfo.END_OF_TIME)) { 1917 modernCurrencyToTerritory.put(infoItem.getCurrency(), getTerritoryName(regionCode)); 1918 } else { 1919 currencyToTerritory.put(infoItem.getCurrency(), getTerritoryName(regionCode)); 1920 } 1921 if (first) 1922 first = false; 1923 else 1924 pw.println("<tr>"); 1925 pw.println("<td class='target'>" + CurrencyDateInfo.formatDate(infoItem.getStart()) + "</td>" + 1926 "<td class='target'>" + CurrencyDateInfo.formatDate(endData) + "</td>" + 1927 "<td class='target'>" + infoItem.getCurrency() + "</td>" + 1928 "<td class='target'>" + english.getName("currency", infoItem.getCurrency()) + "</td>" + 1929 "</tr>"); 1930 } 1931 } 1932 // doFooter(pw); 1933 // pw.close(); 1934 // pw = new PrintWriter(new FormattedFileWriter(index, "Currency Format Info", null)); 1935 pw.write("</table>"); 1936 1937 pw.println("<h2>" + CldrUtility.getDoubleLinkedText("format_info", "2. " + section2) + "</h2>"); 1938 1939 pw.write("<p>This table shows the number of digits used for each currency, " 1940 + " and the countries where it is or was in use. " 1941 + "Countries where the currency is in current use are bolded. " 1942 + "If the currency uses ‘nickel rounding’ in transactions, the digits are followed by ‘(5)’. " 1943 + "Where the values are different in a cash context, that is shown in a second column." 1944 + "</p>"); 1945 pw.write("<div align='center'><table>"); 1946 1947 // doTitle(pw, "Currency Format Info"); 1948 // <info iso4217="CZK" digits="2" rounding="0" cashDigits="0" cashRounding="0"/> 1949 1950 pw.println("<tr>" + 1951 "<th class='source nowrap'>Name</th>" + 1952 "<th class='source'>Currency</th>" + 1953 "<th class='target'>Digits</th>" + 1954 "<th class='target'>Cash Digits</th>" + 1955 "<th class='target'>Countries</th>" + 1956 "</tr>"); 1957 Set<String> currencyList = new TreeSet<String>(col); 1958 currencyList.addAll(currency_fractions.keySet()); 1959 currencyList.addAll(currency_territory.keySet()); 1960 1961 for (Entry<String, String> nameCode : NAME_TO_CURRENCY.entrySet()) { 1962 //String name = nameCode.getKey(); 1963 String currency = nameCode.getValue(); 1964 CurrencyNumberInfo info = supplementalDataInfo.getCurrencyNumberInfo(currency); 1965 Set<String> territories = currencyToTerritory.get(currency); 1966 Set<String> modernTerritories = modernCurrencyToTerritory.get(currency); 1967 1968 // String fractions = (String) currency_fractions.get(currency); 1969 // if (fractions == null) 1970 // fractions = defaultDigits; 1971 // Set territories = (Set) currency_territory.get(currency); 1972 pw.print("<tr>" + 1973 "<td class='source nowrap'>" 1974 + TransliteratorUtilities.toHTML.transform(english.getName("currency", currency)) + "</td>" + 1975 "<td class='source'>" + CldrUtility.getDoubleLinkedText(currency) + "</td>" + 1976 "<td class='target'>" + 1977 info.getDigits() 1978 + (info.getRounding() == 0 ? "" : " (" + info.getRounding() + ")") 1979 + "</td>" 1980 + "<td class='target'>" 1981 + (info.cashDigits == info.getDigits() && info.cashRounding == info.getRounding() ? "" : (info.cashDigits 1982 + (info.cashRounding == 0 ? "" : " (" + info.cashRounding + ")"))) 1983 + "</td>" + 1984 "<td class='target'>"); 1985 boolean first = true; 1986 boolean needBreak = false; 1987 if (modernTerritories != null) { 1988 needBreak = true; 1989 for (String territory : modernTerritories) { 1990 if (first) 1991 first = false; 1992 else 1993 pw.print(", "); 1994 pw.print("<b>" + territory + "</b>"); 1995 } 1996 } 1997 //boolean haveBreak = true; 1998 if (territories != null) { 1999 for (String territory : territories) { 2000 if (first) 2001 first = false; 2002 else if (!needBreak) 2003 pw.print(", "); 2004 else { 2005 pw.print(",<br>"); 2006 needBreak = false; 2007 } 2008 pw.print(territory); 2009 } 2010 } 2011 pw.println("</td></tr>"); 2012 } 2013 pw.println("</table>"); 2014 pw.close(); 2015 // doFooter(pw); 2016 2017 // if (false) { 2018 // doTitle(pw, "Territories Versus Currencies"); 2019 // pw.println("<tr><th>Territories Without Currencies</th><th>Currencies Without Territories</th></tr>"); 2020 // pw.println("<tr><td class='target'>"); 2021 // Set territoriesWithoutCurrencies = new TreeSet(); 2022 // territoriesWithoutCurrencies.addAll(sc.getGoodAvailableCodes("territory")); 2023 // territoriesWithoutCurrencies.removeAll(territoriesWithCurrencies); 2024 // territoriesWithoutCurrencies.removeAll(group_contains.keySet()); 2025 // boolean first = true; 2026 // for (Iterator it = territoriesWithoutCurrencies.iterator(); it.hasNext();) { 2027 // if (first) first = false; 2028 // else pw.print(", "); 2029 // pw.print(english.getName(CLDRFile.TERRITORY_NAME, it.next().toString(), false)); 2030 // } 2031 // pw.println("</td><td class='target'>"); 2032 // Set currenciesWithoutTerritories = new TreeSet(); 2033 // currenciesWithoutTerritories.addAll(sc.getGoodAvailableCodes("currency")); 2034 // currenciesWithoutTerritories.removeAll(currenciesWithTerritories); 2035 // first = true; 2036 // for (Iterator it = currenciesWithoutTerritories.iterator(); it.hasNext();) { 2037 // if (first) first = false; 2038 // else pw.print(", "); 2039 // pw.print(english.getName(CLDRFile.CURRENCY_NAME, it.next().toString(), false)); 2040 // } 2041 // pw.println("</td></tr>"); 2042 // doFooter(pw); 2043 // } 2044 } 2045 getTerritoryName(String territory)2046 private String getTerritoryName(String territory) { 2047 String name; 2048 name = english.getName("territory", territory); 2049 if (name == null) { 2050 name = sc.getData("territory", territory); 2051 } 2052 if (name != null) { 2053 return TransliteratorUtilities.toHTML.transform(name) + " (" + territory + ")"; 2054 } else { 2055 return territory; 2056 } 2057 } 2058 2059 /** 2060 * @throws IOException 2061 * 2062 */ printAliases(PrintWriter index)2063 public void printAliases(PrintWriter index) throws IOException { 2064 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Aliases", null, SUPPLEMENTAL_INDEX_ANCHORS)); 2065 2066 // doTitle(pw, "Aliases"); 2067 pw.println("<table>"); 2068 pw.println("<tr><th class='source'>" + "Type" + "</th>" + 2069 "<th class='source'>" + "Code" + "</th>" + 2070 "<th class='target'>" + "Reason" + "</th>" + 2071 "<th class='target'>" + "Substitute (if available)" + "</th></tr>"); 2072 for (Iterator<String[]> it = aliases.iterator(); it.hasNext();) { 2073 String[] items = it.next(); 2074 pw.println("<tr><td class='source'>" + items[0] + "</td>" + 2075 "<td class='source'>" + CldrUtility.getDoubleLinkedText(items[1]) + "</td>" + 2076 "<td class='target'>" + items[3] + "</td>" + 2077 "<td class='target'>" + items[2] + "</td></tr>"); 2078 } 2079 // doFooter(pw); 2080 pw.println("</table>"); 2081 pw.close(); 2082 } 2083 2084 // deprecatedItems 2085 // public void printDeprecatedItems(PrintWriter pw) { 2086 // doTitle(pw, "Deprecated Items"); 2087 // pw.print("<tr><td class='z0'><b>Type</b></td><td class='z1'><b>Elements</b></td><td class='z2'><b>Attributes</b></td><td class='z4'><b>Values</b></td>"); 2088 // for (Iterator it = deprecatedItems.iterator(); it.hasNext();) { 2089 // Map source = (Map)it.next(); 2090 // Object item; 2091 // pw.print("<tr>"); 2092 // pw.print("<td class='z0'>" + ((item = source.get("type")) != null ? item : "<i>any</i>") + "</td>"); 2093 // pw.print("<td class='z1'>" + ((item = source.get("elements")) != null ? item : "<i>any</i>") + "</td>"); 2094 // pw.print("<td class='z2'>" + ((item = source.get("attributes")) != null ? item : "<i>any</i>") + "</td>"); 2095 // pw.print("<td class='z4'>" + ((item = source.get("values")) != null ? item : "<i>any</i>") + "</td>"); 2096 // pw.print("</tr>"); 2097 // } 2098 // doFooter(pw); 2099 // } 2100 printWindows_Tzid(PrintWriter index)2101 public void printWindows_Tzid(PrintWriter index) throws IOException { 2102 Map<String, Map<String, Map<String, String>>> zoneMapping = supplementalDataInfo 2103 .getTypeToZoneToRegionToZone(); 2104 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Zone \u2192 Tzid", null, SUPPLEMENTAL_INDEX_ANCHORS)); 2105 for (Entry<String, Map<String, Map<String, String>>> typeAndZoneToRegionToZone : zoneMapping.entrySet()) { 2106 String type = typeAndZoneToRegionToZone.getKey(); 2107 Map<String, Map<String, String>> zoneToRegionToZone = typeAndZoneToRegionToZone.getValue(); 2108 pw.println("<br><h1>Mapping for: " + type + "</h1><br>"); 2109 // doTitle(pw, "Windows \u2192 Tzid"); 2110 pw.println("<table>"); 2111 pw.println("<tr><th class='source'>" + type + "</th><th class='source'>" + "Region" 2112 + "</th><th class='target'>" + "TZID" + "</th></tr>"); 2113 2114 for (Entry<String, Map<String, String>> zoneAndregionToZone : zoneToRegionToZone.entrySet()) { 2115 String source = zoneAndregionToZone.getKey(); 2116 Map<String, String> regionToZone = zoneAndregionToZone.getValue(); 2117 for (Entry<String, String> regionAndZone : regionToZone.entrySet()) { 2118 String region = regionAndZone.getKey(); 2119 String target = regionAndZone.getValue(); 2120 if (region == null) region = "<i>any</a>"; 2121 pw.println("<tr><td class='source'>" + source + "</td><td class='source'>" + region 2122 + "</td><td class='target'>" + target + "</td></tr>"); 2123 } 2124 } 2125 // doFooter(pw); 2126 pw.println("</table>"); 2127 } 2128 pw.close(); 2129 } 2130 2131 // <info iso4217="ADP" digits="0" rounding="0"/> 2132 printCharacters(PrintWriter index)2133 public void printCharacters(PrintWriter index) throws IOException { 2134 String title = "Character Fallback Substitutions"; 2135 2136 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 2137 // doTitle(pw, title); 2138 pw.println("<table>"); 2139 2140 pw.println( 2141 "<tr><th colSpan='3'>Substitute for character (if not in repertoire)</th><th colSpan='4'>The following (in priority order, first string that <i>is</i> in repertoire)</th></tr>"); 2142 UnicodeSet chars = new UnicodeSet("[:NFKC_QuickCheck=N:]"); 2143 for (com.ibm.icu.text.UnicodeSetIterator it = new com.ibm.icu.text.UnicodeSetIterator(chars); it.next();) { 2144 String value = it.getString(); 2145 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFC)); 2146 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFKC)); 2147 } 2148 int[] counts = new int[4]; 2149 for (Iterator<String> it = charSubstitutions.keySet().iterator(); it.hasNext();) { 2150 String value = it.next(); 2151 LinkedHashSet<String> substitutes = charSubstitutions.get(value); 2152 String nfc = Normalizer.normalize(value, Normalizer.NFC); 2153 String nfkc = Normalizer.normalize(value, Normalizer.NFKC); 2154 2155 String sourceTag = "<td class='source'>"; 2156 if (substitutes.size() > 1) { 2157 sourceTag = "<td class='source' rowSpan='" + substitutes.size() + "'>"; 2158 } 2159 boolean first = true; 2160 for (Iterator<String> it2 = substitutes.iterator(); it2.hasNext();) { 2161 String substitute = it2.next(); 2162 String type = "Explicit"; 2163 String targetTag = "<td class='target3'>"; 2164 if (substitute.equals(nfc)) { 2165 type = "NFC"; 2166 targetTag = "<td class='target'>"; 2167 counts[2]++; 2168 } else if (substitute.equals(nfkc)) { 2169 type = "NFKC"; 2170 targetTag = "<td class='target4'>"; 2171 counts[3]++; 2172 } else { 2173 counts[0]++; 2174 } 2175 pw.println("<tr>" 2176 + (!first ? "" : sourceTag + hex(value, ", ") + "</td>" + sourceTag 2177 + TransliteratorUtilities.toHTML.transliterate(value) + "</td>" + sourceTag 2178 + UCharacter.getName(value, ", ") 2179 + "</td>") 2180 + targetTag + type + "</td>" + targetTag + hex(substitute, ", ") + "</td>" 2181 + targetTag + TransliteratorUtilities.toHTML.transliterate(substitute) + "</td>" + targetTag 2182 + UCharacter.getName(substitute, ", ") + "</td></tr>"); 2183 first = false; 2184 } 2185 } 2186 // doFooter(pw); 2187 pw.println("</table>"); 2188 2189 pw.close(); 2190 for (int i = 0; i < counts.length; ++i) { 2191 System.out.println("Count\t" + i + "\t" + counts[i]); 2192 } 2193 } 2194 hex(String s, String separator)2195 public static String hex(String s, String separator) { 2196 StringBuffer result = new StringBuffer(); 2197 int cp; 2198 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 2199 cp = UTF16.charAt(s, i); 2200 if (i != 0) 2201 result.append(separator); 2202 result.append(com.ibm.icu.impl.Utility.hex(cp)); 2203 } 2204 return result.toString(); 2205 } 2206 2207 /** 2208 * 2209 */ 2210 // private PrintWriter doTitle(PrintWriter pw, String title) { 2211 // //String anchor = FileUtilities.anchorize(title); 2212 // pw.println("<div align='center'><table>"); 2213 // //anchors.put(title, anchor); 2214 // //PrintWriter result = null; 2215 // //return result; 2216 // } 2217 2218 // private void doFooter(PrintWriter pw) { 2219 // pw.println("</table></div>"); 2220 // } printContains2(PrintWriter pw, String lead, String start, int depth, boolean isFirst)2221 public void printContains2(PrintWriter pw, String lead, String start, int depth, boolean isFirst) { 2222 String name = depth == 4 ? start : getName(CLDRFile.TERRITORY_NAME, start, false); 2223 if (!isFirst) 2224 pw.print(lead); 2225 int count = getTotalContainedItems(start, depth); 2226 pw.print("<td class='z" + depth + "' rowSpan='" + count + "'>" + name + "</td>"); // colSpan='" + (5 - 2227 // depth) + "' 2228 if (depth == 4) 2229 pw.println("</tr>"); 2230 Collection<String> contains = getContainedCollection(start, depth); 2231 if (contains != null) { 2232 Collection<String> contains2 = new TreeSet<String>(territoryNameComparator); 2233 contains2.addAll(contains); 2234 boolean first = true; 2235 for (Iterator<String> it = contains2.iterator(); it.hasNext();) { 2236 String item = it.next(); 2237 printContains2(pw, lead, item, depth + 1, first); // + "<td> </td>" 2238 first = false; 2239 } 2240 } 2241 } 2242 getTotalContainedItems(String start, int depth)2243 private int getTotalContainedItems(String start, int depth) { 2244 Collection<String> c = getContainedCollection(start, depth); 2245 if (c == null) 2246 return 1; 2247 int sum = 0; 2248 for (Iterator<String> it = c.iterator(); it.hasNext();) { 2249 sum += getTotalContainedItems(it.next(), depth + 1); 2250 } 2251 return sum; 2252 } 2253 2254 /** 2255 * 2256 */ getContainedCollection(String start, int depth)2257 private Collection<String> getContainedCollection(String start, int depth) { 2258 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2259 if (contains == null) { 2260 contains = sc.getCountryToZoneSet().get(start); 2261 if (contains == null && depth == 3) { 2262 contains = new TreeSet<String>(); 2263 if (start.compareTo("A") >= 0) { 2264 contains.add("<font color='red'>MISSING TZID</font>"); 2265 } else { 2266 contains.add("<font color='red'>Not yet ISO code</font>"); 2267 } 2268 } 2269 } 2270 return contains; 2271 } 2272 2273 /** 2274 * @param table 2275 * TODO 2276 * 2277 */ printMissing(PrintWriter pw, int source, int table)2278 public void printMissing(PrintWriter pw, int source, int table) { 2279 Set<String> missingItems = new HashSet<String>(); 2280 String type = null; 2281 if (source == CLDRFile.TERRITORY_NAME) { 2282 type = "territory"; 2283 missingItems.addAll(sc.getAvailableCodes(type)); 2284 missingItems.removeAll(territory_languages.keySet()); 2285 missingItems.removeAll(supplementalDataInfo.getContainmentCore().keySet()); 2286 missingItems.remove("200"); // czechoslovakia 2287 } else if (source == CLDRFile.SCRIPT_NAME) { 2288 type = "script"; 2289 missingItems.addAll(sc.getAvailableCodes(type)); 2290 missingItems.removeAll(script_languages.keySet()); 2291 } else if (source == CLDRFile.LANGUAGE_NAME) { 2292 type = "language"; 2293 missingItems.addAll(sc.getAvailableCodes(type)); 2294 if (table == CLDRFile.SCRIPT_NAME) 2295 missingItems.removeAll(language_scripts.keySet()); 2296 if (table == CLDRFile.TERRITORY_NAME) 2297 missingItems.removeAll(language_territories.keySet()); 2298 } else { 2299 throw new IllegalArgumentException("Illegal code"); 2300 } 2301 Set<String> missingItemsNamed = new TreeSet<String>(col); 2302 for (Iterator<String> it = missingItems.iterator(); it.hasNext();) { 2303 String item = it.next(); 2304 List<String> data = sc.getFullData(type, item); 2305 if (data.get(0).equals("PRIVATE USE")) 2306 continue; 2307 if (data.size() < 3) 2308 continue; 2309 if (!"".equals(data.get(2))) 2310 continue; 2311 2312 String itemName = getName(source, item, true); 2313 missingItemsNamed.add(itemName); 2314 } 2315 pw.println("<div align='center'><table>"); 2316 for (Iterator<String> it = missingItemsNamed.iterator(); it.hasNext();) { 2317 pw.println("<tr><td class='target'>" + it.next() + "</td></tr>"); 2318 } 2319 pw.println("</table></div>"); 2320 } 2321 2322 // source, eg english.TERRITORY_NAME 2323 // target, eg english.LANGUAGE_NAME print(PrintWriter pw, int source, int target)2324 public void print(PrintWriter pw, int source, int target) { 2325 Multimap<String, String> data = source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages 2326 : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories 2327 : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages 2328 : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts 2329 : null; 2330 // transform into names, and sort 2331 Map<String, Set<String>> territory_languageNames = new TreeMap<String, Set<String>>(col); 2332 for (Iterator<String> it = data.keySet().iterator(); it.hasNext();) { 2333 String territory = it.next(); 2334 String territoryName = getName(source, territory, true); 2335 Set<String> s = territory_languageNames.get(territoryName); 2336 if (s == null) 2337 territory_languageNames.put(territoryName, s = new TreeSet<String>(col)); 2338 for (Iterator<String> it2 = data.get(territory).iterator(); it2.hasNext();) { 2339 String language = it2.next(); 2340 String languageName = getName(target, language, true); 2341 s.add(languageName); 2342 } 2343 } 2344 2345 pw.println("<div align='center'><table>"); 2346 2347 for (Iterator<String> it = territory_languageNames.keySet().iterator(); it.hasNext();) { 2348 String territoryName = it.next(); 2349 pw.println("<tr><td class='source' colspan='2'>" + territoryName + "</td></tr>"); 2350 Set<String> s = territory_languageNames.get(territoryName); 2351 for (Iterator<String> it2 = s.iterator(); it2.hasNext();) { 2352 String languageName = it2.next(); 2353 pw.println("<tr><td> </td><td class='target'>" + languageName + "</td></tr>"); 2354 } 2355 } 2356 pw.println("</table></div>"); 2357 2358 } 2359 2360 /** 2361 * @param codeFirst 2362 * TODO 2363 * 2364 */ getName(int type, String oldcode, boolean codeFirst)2365 private String getName(int type, String oldcode, boolean codeFirst) { 2366 if (oldcode.contains(" ")) { 2367 String[] result = oldcode.split("\\s+"); 2368 for (int i = 0; i < result.length; ++i) { 2369 result[i] = getName(type, result[i], codeFirst); 2370 } 2371 return CldrUtility.join(Arrays.asList(result), ", "); 2372 } else { 2373 int pos = oldcode.indexOf('*'); 2374 String code = pos < 0 ? oldcode : oldcode.substring(0, pos); 2375 String ename = english.getName(type, code); 2376 String nameString = ename == null ? code : ename; 2377 return nameString.equals(oldcode) ? nameString 2378 : codeFirst ? "[" + oldcode + "]" + "\t" + nameString 2379 : nameString + "\t" + "[" + oldcode + "]"; 2380 } 2381 } 2382 2383 private String getName(String locale, boolean codeFirst) { 2384 String ename = getLanguageName(locale); 2385 return codeFirst ? "[" + locale + "]\t" + (ename == null ? locale : ename) : (ename == null ? locale 2386 : ename) + "\t[" + locale + "]"; 2387 } 2388 2389 Comparator territoryNameComparator = new Comparator() { 2390 public int compare(Object o1, Object o2) { 2391 return col.compare(getName(CLDRFile.TERRITORY_NAME, (String) o1, false), 2392 getName(CLDRFile.TERRITORY_NAME, (String) o2, false)); 2393 } 2394 }; 2395 2396 static String[] stringArrayPattern = new String[0]; 2397 static String[][] string2ArrayPattern = new String[0][]; 2398 2399 public static Map<String, String> territoryAliases = new HashMap<String, String>(); 2400 2401 public void printContains(PrintWriter index) throws IOException { 2402 String title = "Territory Containment (UN M.49)"; 2403 2404 PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 2405 // doTitle(pw, title); 2406 List<String[]> rows = new ArrayList<String[]>(); 2407 printContains3("001", rows, new ArrayList<String>()); 2408 TablePrinter tablePrinter = new TablePrinter() 2409 .addColumn("World", "class='source'", null, "class='z0'", true).setSortPriority(0) 2410 .addColumn("Continent", "class='source'", null, "class='z1'", true).setSortPriority(1) 2411 .addColumn("Subcontinent", "class='source'", null, "class='z2'", true).setSortPriority(2) 2412 .addColumn("Country (Territory)", "class='source'", null, "class='z3'", true).setSortPriority(3) 2413 .addColumn("Time Zone", "class='source'", null, "class='z4'", true).setSortPriority(4); 2414 String[][] flatData = rows.toArray(string2ArrayPattern); 2415 pw.println(tablePrinter.addRows(flatData).toTable()); 2416 2417 showSubtable(pw, ContainmentStyle.grouping, "Groupings", "Grouping", "Contained Regions"); 2418 showSubtable(pw, ContainmentStyle.deprecated, "Deprecated", "Container", "Deprecated Region"); 2419 2420 // Relation<String, String> deprecated = supplementalDataInfo 2421 // .getTerritoryToContained(ContainmentStyle.deprecated); 2422 // 2423 // for (String region : deprecated.keySet()) { 2424 // nameToContainers.add(region); 2425 // } 2426 // pw.println("<h2>Groupings and Deprecated Regions</h2>"); 2427 // for (String region : nameToContainers) { 2428 // String name = getName(CLDRFile.TERRITORY_NAME, region, false); 2429 // Set<String> dep = deprecated.get(region); 2430 // Set<String> gro = grouping.get(region); 2431 // Iterator<String> depIt = (dep == null ? Collections.EMPTY_SET : dep).iterator(); 2432 // Iterator<String> groIt = (gro == null ? Collections.EMPTY_SET : gro).iterator(); 2433 // while (depIt.hasNext() || groIt.hasNext()) { 2434 // String dep1 = depIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, depIt.next(), false) : ""; 2435 // String gro1 = groIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, groIt.next(), false) : ""; 2436 // tablePrinter2.addRow() 2437 // .addCell(name) 2438 // .addCell(gro1) 2439 // .addCell(dep1) 2440 // .finishRow(); 2441 // } 2442 // } 2443 // pw.println(tablePrinter2.toTable()); 2444 // pw.println("<h2>Other Groupings</h2>"); 2445 // for (Entry<String, Set<String>> regionContained : grouping.keyValuesSet()) { 2446 // showContainers(pw, regionContained); 2447 // } 2448 // 2449 // pw.println("<h2>Deprecated Codes</h2>"); 2450 // for (Entry<String, Set<String>> regionContained : deprecated.keyValuesSet()) { 2451 // showContainers(pw, regionContained); 2452 // } 2453 pw.close(); 2454 } 2455 2456 public void showSubtable(PrintWriter pw, ContainmentStyle containmentStyle, String title, String containerTitle, String containeeTitle) { 2457 pw.println("<h2>" + 2458 title + 2459 "</h2>"); 2460 TablePrinter tablePrinter2 = new TablePrinter() 2461 .addColumn(containerTitle, "class='source'", null, "class='z0'", true).setSortPriority(0) 2462 .addColumn(containeeTitle, "class='source'", null, "class='z4'", true).setSortPriority(1); 2463 2464 Relation<String, String> grouping = supplementalDataInfo 2465 .getTerritoryToContained(containmentStyle); 2466 2467 for (Entry<String, String> containerRegion : grouping.keyValueSet()) { 2468 String container = getName(CLDRFile.TERRITORY_NAME, containerRegion.getKey(), false); 2469 String containee = getName(CLDRFile.TERRITORY_NAME, containerRegion.getValue(), false); 2470 tablePrinter2.addRow() 2471 .addCell(container) 2472 .addCell(containee) 2473 .finishRow(); 2474 } 2475 pw.println(tablePrinter2.toTable()); 2476 } 2477 2478 public void showContainers(PrintWriter pw, Entry<String, Set<String>> regionContained) { 2479 String region = regionContained.getKey(); 2480 Set<String> contained = regionContained.getValue(); 2481 pw.println("<ul><li>" + getName(CLDRFile.TERRITORY_NAME, region, false) + "<ul>"); 2482 for (String sub : contained) { 2483 pw.println("<li>" + getName(CLDRFile.TERRITORY_NAME, sub, false) + "</li>"); 2484 } 2485 pw.println("</ul></li></ul>"); 2486 } 2487 2488 private void printContains3(String start, List<String[]> rows, ArrayList<String> currentRow) { 2489 int len = currentRow.size(); 2490 if (len > 3) { 2491 return; // skip long items 2492 } 2493 currentRow.add(getName(CLDRFile.TERRITORY_NAME, start, false)); 2494 //Collection<String> contains = (Collection<String>) group_contains.get(start); 2495 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2496 if (contains == null) { 2497 contains = sc.getCountryToZoneSet().get(start); 2498 currentRow.add(""); 2499 if (contains == null) { 2500 currentRow.set(len + 1, "???"); 2501 rows.add(currentRow.toArray(stringArrayPattern)); 2502 } else { 2503 for (String item : contains) { 2504 currentRow.set(len + 1, item); 2505 rows.add(currentRow.toArray(stringArrayPattern)); 2506 } 2507 } 2508 currentRow.remove(len + 1); 2509 } else { 2510 for (String item : contains) { 2511 if (territoryAliases.keySet().contains(item)) { 2512 continue; 2513 } 2514 printContains3(item, rows, currentRow); 2515 } 2516 } 2517 currentRow.remove(len); 2518 } 2519 2520 } 2521 2522 /** 2523 * 2524 */ getInverse(Map<String, Set<String>> language_territories)2525 private static Map<String, Set<String>> getInverse(Map<String, Set<String>> language_territories) { 2526 // get inverse relation 2527 Map<String, Set<String>> territory_languages = new TreeMap<String, Set<String>>(); 2528 for (Iterator<String> it = language_territories.keySet().iterator(); it.hasNext();) { 2529 String language = it.next(); 2530 Set<String> territories = language_territories.get(language); 2531 for (Iterator<String> it2 = territories.iterator(); it2.hasNext();) { 2532 String territory = it2.next(); 2533 Set<String> languages = territory_languages.get(territory); 2534 if (languages == null) 2535 territory_languages.put(territory, languages = new TreeSet<String>(col)); 2536 languages.add(language); 2537 } 2538 } 2539 return territory_languages; 2540 2541 } 2542 2543 static final Map<String, String> NAME_TO_REGION = getNameToCode(CodeType.territory, "region"); 2544 static final Map<String, String> NAME_TO_CURRENCY = getNameToCode(CodeType.currency, "currency"); 2545 getNameToCode(CodeType codeType, String cldrCodeType)2546 private static SortedMap<String, String> getNameToCode(CodeType codeType, String cldrCodeType) { 2547 SortedMap<String, String> temp = new TreeMap<String, String>(col); 2548 for (String territory : StandardCodes.make().getAvailableCodes(codeType)) { 2549 String name = english.getName(cldrCodeType, territory); 2550 temp.put(name == null ? territory : name, territory); 2551 } 2552 temp = Collections.unmodifiableSortedMap(temp); 2553 return temp; 2554 } 2555 2556 /** 2557 * @param value_delimiter 2558 * TODO 2559 * 2560 */ addTokens(String key, String values, String value_delimiter, Map<String, Set<String>> key_value)2561 private static void addTokens(String key, String values, String value_delimiter, Map<String, Set<String>> key_value) { 2562 if (values != null) { 2563 Set<String> s = key_value.get(key); 2564 if (s == null) 2565 key_value.put(key, s = new TreeSet<String>(col)); 2566 s.addAll(Arrays.asList(values.split(value_delimiter))); 2567 } 2568 } 2569 addTokens(String key, String values, String value_delimiter, Multimap<String, String> key_value)2570 private static void addTokens(String key, String values, String value_delimiter, Multimap<String, String> key_value) { 2571 if (values != null) { 2572 key_value.putAll(key, Arrays.asList(values.split(value_delimiter))); 2573 } 2574 } 2575 showContents(Appendable pw, String... items)2576 public static void showContents(Appendable pw, String... items) { 2577 try { 2578 pw.append("</div>" + System.lineSeparator()); 2579 pw.append("<h3>Contents</h3>" + System.lineSeparator()); 2580 pw.append("<ol>" + System.lineSeparator()); 2581 for (int i = 0; i < items.length; i += 2) { 2582 pw.append("<li><a href='#" + items[i] + "'>" + items[i + 1] + "</a></li>" + System.lineSeparator()); 2583 } 2584 pw.append("</ol><hr>" + System.lineSeparator()); 2585 2586 pw.append("<div align='center'>" + System.lineSeparator()); 2587 } catch (IOException e) { 2588 throw new ICUUncheckedIOException(e); 2589 } 2590 } 2591 2592 } 2593