1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.tool; 10 11 import java.io.File; 12 import java.io.IOException; 13 import java.io.PrintWriter; 14 import java.util.Arrays; 15 import java.util.Comparator; 16 import java.util.Date; 17 import java.util.EnumSet; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.Locale; 21 import java.util.Map; 22 import java.util.Map.Entry; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import java.util.regex.Matcher; 27 28 import org.unicode.cldr.draft.FileUtilities; 29 import org.unicode.cldr.tool.ShowData.DataShower; 30 import org.unicode.cldr.util.CLDRFile; 31 import org.unicode.cldr.util.CLDRFile.Status; 32 import org.unicode.cldr.util.CLDRPaths; 33 import org.unicode.cldr.util.CldrUtility; 34 import org.unicode.cldr.util.Factory; 35 import org.unicode.cldr.util.FileCopier; 36 import org.unicode.cldr.util.LanguageTagParser; 37 import org.unicode.cldr.util.LanguageTagParser.Fields; 38 import org.unicode.cldr.util.LocaleIDParser; 39 import org.unicode.cldr.util.PathHeader; 40 import org.unicode.cldr.util.PathHeader.PageId; 41 import org.unicode.cldr.util.PatternCache; 42 import org.unicode.cldr.util.SimpleFactory; 43 import org.unicode.cldr.util.StringId; 44 import org.unicode.cldr.util.TransliteratorUtilities; 45 import org.unicode.cldr.util.XPathParts; 46 import org.xml.sax.SAXException; 47 48 import com.google.common.collect.ImmutableMap; 49 import com.ibm.icu.dev.tool.UOption; 50 import com.ibm.icu.dev.util.UnicodeMap; 51 import com.ibm.icu.impl.Relation; 52 import com.ibm.icu.impl.Utility; 53 import com.ibm.icu.lang.UCharacter; 54 import com.ibm.icu.lang.UScript; 55 import com.ibm.icu.text.BreakIterator; 56 import com.ibm.icu.text.Collator; 57 import com.ibm.icu.text.Normalizer; 58 import com.ibm.icu.text.RuleBasedCollator; 59 import com.ibm.icu.text.RuleBasedNumberFormat; 60 import com.ibm.icu.text.Transliterator; 61 import com.ibm.icu.text.UTF16; 62 import com.ibm.icu.text.UnicodeSet; 63 import com.ibm.icu.text.UnicodeSetIterator; 64 import com.ibm.icu.util.Output; 65 import com.ibm.icu.util.ULocale; 66 67 /** 68 * This is a simple class that walks through the CLDR hierarchy. 69 * It gathers together all the items from all the locales that share the 70 * same element chain, and thus presents a "sideways" view of the data, in files called 71 * by_type/X.html, where X is a type. X may be the concatenation of more than more than 72 * one element, where the file would otherwise be too large. 73 * 74 * @author medavis 75 */ 76 /* 77 * Notes: 78 * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3 79 * http://developers.sun.com/dev/coolstuff/xml/readme.html 80 * http://lists.xml.org/archives/xml-dev/200007/msg00284.html 81 * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html 82 */ 83 public class GenerateSidewaysView { 84 private static final String DIR_NAME = "by_type"; 85 // debug flags 86 static final boolean DEBUG = false; 87 static final boolean DEBUG2 = false; 88 static final boolean DEBUG_SHOW_ADD = false; 89 static final boolean DEBUG_ELEMENT = false; 90 static final boolean DEBUG_SHOW_BAT = false; 91 92 static final boolean FIX_ZONE_ALIASES = true; 93 94 private static final int HELP1 = 0, 95 HELP2 = 1, 96 SOURCEDIR = 2, 97 DESTDIR = 3, 98 MATCH = 4, 99 SKIP = 5, 100 TZADIR = 6, 101 NONVALIDATING = 7, 102 SHOW_DTD = 8, 103 TRANSLIT = 9, 104 PATH = 10; 105 106 private static final UOption[] options = { 107 UOption.HELP_H(), 108 UOption.HELP_QUESTION_MARK(), 109 UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY), 110 UOption.DESTDIR().setDefault(CLDRPaths.CHART_DIRECTORY + DIR_NAME + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/ 111 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 112 UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"), 113 UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault( 114 "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"), 115 UOption.create("nonvalidating", 'n', UOption.NO_ARG), 116 UOption.create("dtd", 'w', UOption.NO_ARG), 117 UOption.create("transliterate", 'y', UOption.NO_ARG), 118 UOption.create("path", 'p', UOption.REQUIRES_ARG), 119 }; 120 121 private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher(""); 122 // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF); 123 protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze(); 124 getFirstScript(UnicodeSet exemplars)125 static int getFirstScript(UnicodeSet exemplars) { 126 for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) { 127 int script = UScript.getScript(it.codepoint); 128 if (script == UScript.COMMON || script == UScript.INHERITED) { 129 continue; 130 } 131 return script; 132 } 133 return UScript.COMMON; 134 } 135 136 static Comparator<Object> UCA; 137 static { 138 RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 139 UCA2.setNumericCollation(true); 140 UCA2.setStrength(Collator.IDENTICAL); 141 UCA = new org.unicode.cldr.util.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0)); 142 } 143 144 private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<>(); 145 private static long startTime = System.currentTimeMillis(); 146 147 static RuleBasedCollator standardCollation = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); 148 static { 149 standardCollation.setStrength(Collator.IDENTICAL); 150 standardCollation.setNumericCollation(true); 151 } 152 153 private static CLDRFile english; 154 // private static DataShower dataShower = new DataShower(); 155 private static Matcher pathMatcher; 156 main(String[] args)157 public static void main(String[] args) throws SAXException, IOException { 158 startTime = System.currentTimeMillis(); 159 ToolUtilities.registerExtraTransliterators(); 160 UOption.parseArgs(args, options); 161 162 pathMatcher = options[PATH].value == null ? null : PatternCache.get(options[PATH].value).matcher(""); 163 164 File[] paths = { 165 new File(CLDRPaths.MAIN_DIRECTORY), 166 new File(CLDRPaths.ANNOTATIONS_DIRECTORY), 167 new File(CLDRPaths.SUBDIVISIONS_DIRECTORY) 168 }; 169 Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value); 170 171 // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value); 172 english = cldrFactory.make("en", true); 173 pathHeaderFactory = PathHeader.getFactory(english); 174 175 FileCopier.ensureDirectoryExists(options[DESTDIR].value); 176 FileCopier.copy(GenerateSidewaysView.class, "bytype-index.css", options[DESTDIR].value, "index.css"); 177 FormattedFileWriter.copyIncludeHtmls(options[DESTDIR].value); 178 179 // now get the info 180 181 loadInformation(cldrFactory); 182 String oldMain = ""; 183 PrintWriter out = null; 184 185 System.out.println("Getting types " + path_value_locales.size()); 186 // Set<String> types = new TreeSet<String>(); 187 // for (PathHeader path : path_value_locales.keySet()) { 188 // String main = getFileName2(path); 189 // if (!main.equals(oldMain)) { 190 // oldMain = main; 191 // types.add(main); 192 // } 193 // } 194 String headerString = getHeader(path_value_locales.keySet()); 195 FileCopier.copyAndReplace(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html", 196 ImmutableMap.of( 197 "%header%", headerString, 198 "%version%", ToolConstants.CHART_DISPLAY_VERSION, 199 "%index%", "../index.html", 200 "%index-title%", "Main Charts Index", 201 "%date%", CldrUtility.isoFormatDateOnly(new Date()))); 202 // FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html", 203 // new String[] { "%header%", headerString }); 204 205 System.out.println("Printing files in " + new File(options[DESTDIR].value).getAbsolutePath()); 206 // Transliterator toLatin = Transliterator.getInstance("any-latin"); 207 toHTML = TransliteratorUtilities.toHTML; 208 // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]"); 209 210 String oldHeader = ""; 211 Output<PrintWriter> tsvFile = new Output<>(); 212 213 for (PathHeader path : path_value_locales.keySet()) { 214 String main = getFileName2(path, null); 215 if (!main.equals(oldMain)) { 216 oldMain = main; 217 out = start(out, main, headerString, path.getSection() + ":" + path.getPage(), tsvFile); 218 out.println("<table class='table'>"); 219 oldHeader = ""; 220 } 221 String key = path.getCode(); 222 String anchor = toHTML.transliterate(key); 223 224 String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path); 225 String englishValue = english.getStringValue(originalPath); 226 if (englishValue != null) { 227 englishValue = "English: ‹" + englishValue + "›"; 228 } else { 229 englishValue = ""; 230 } 231 232 String header = path.getHeader(); 233 if (!header.equals(oldHeader) && !header.equals("null")) { 234 out.println("<tr><th colSpan='2' class='pathHeader'>" + CldrUtility.getDoubleLinkedText(header) 235 + "</th></tr>"); 236 oldHeader = header; 237 } 238 String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath())); 239 out.println("<tr>" + 240 "<th class='path'>" + CldrUtility.getDoubleLinkedText(anchorId, anchor) + "</th>" + 241 "<th class='path'>" + toHTML.transliterate(englishValue) + "</th>" + 242 "</tr>"); 243 Map<String, Set<String>> value_locales = path_value_locales.get(path); 244 for (String value : value_locales.keySet()) { 245 // String outValue = toHTML.transliterate(value); 246 // String transValue = value; 247 // try { 248 // transValue = toLatin.transliterate(value); 249 // } catch (RuntimeException e) { 250 // } 251 // if (!transValue.equals(value)) { 252 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + "</span>"; 253 // } 254 String valueClass = " class='value'"; 255 if (DataShower.getBidiStyle(value).length() != 0) { 256 valueClass = " class='rtl_value'"; 257 } 258 out.println("<tr><th" + valueClass + ">" + DataShower.getPrettyValue(value) + "</th><td class='td'>"); 259 tsvFile.value.print( 260 path.getSection() 261 + "\t" + path.getPage() 262 + "\t" + path.getHeader() 263 + "\t" + path.getCode() 264 + "\t" + value 265 + "\t"); 266 267 Set<String> locales = value_locales.get(value); 268 boolean first = true; 269 boolean containsRoot = locales.contains("root"); 270 for (String locale : locales) { 271 if (first) 272 first = false; 273 else 274 out.print(" "); 275 if (locale.endsWith("*")) { 276 locale = locale.substring(0, locale.length() - 1); 277 out.print("<i>\u00B7" + locale + "\u00B7</i>"); 278 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 279 } else if (!containsRoot) { 280 out.print("\u00B7" + locale + "\u00B7"); 281 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 282 } else if (locale.contains("_")) { 283 // not same as root, but need to test for parent 284 // if the parent is not in the same list, then we include anyway. 285 // Cf http://unicode.org/cldr/trac/ticket/7228 286 String parent = LocaleIDParser.getParent(locale); 287 if (!locales.contains(parent)) { 288 out.print("<b>\u00B7" + locale + "\u00B7</b>"); 289 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 290 } 291 } 292 } 293 if (containsRoot) { 294 out.print("<b>\u00B7all\u00B7others\u00B7</b>"); 295 tsvFile.value.print("\u00B7all-others\u00B7"); 296 } 297 out.println("</td></tr>"); 298 tsvFile.value.println(); 299 } 300 } 301 for (String[] pair : EXEMPLARS) { 302 showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile); 303 } 304 finish(out, tsvFile.value); 305 finishAll(out, tsvFile.value); 306 System.out.println("Done in " + new RuleBasedNumberFormat(new ULocale("en"), RuleBasedNumberFormat.DURATION) 307 .format((System.currentTimeMillis() - startTime) / 1000.0)); 308 } 309 310 static final String[][] EXEMPLARS = { 311 { "//ldml/characters/exemplarCharacters", "main", "Main Exemplars" }, 312 { "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", "punctuation", "Punctuation Exemplars" }, 313 { "//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars" }, 314 // TODO look at numbers, auxiliary 315 }; 316 showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, Output<PrintWriter> tsvFile)317 private static PrintWriter showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, 318 Output<PrintWriter> tsvFile) 319 throws IOException { 320 PathHeader ph = fixPath(pathName, null); 321 String filename = getFileName2(ph, variant); 322 out = start(out, filename, headerString, title, tsvFile); 323 Map<String, Set<String>> value_locales = path_value_locales.get(ph); 324 325 // TODO change logic so that aux characters characters work well. 326 327 Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<>(); 328 // UnicodeMap mapping = new UnicodeMap(); 329 UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]"); 330 331 // get the locale information 332 UnicodeSet totalExemplars = new UnicodeSet(); 333 for (String value : value_locales.keySet()) { 334 // flatten out UnicodeSet 335 UnicodeSet exemplars = new UnicodeSet(value); 336 if (variant.equals("main")) { 337 UnicodeSet extras = new UnicodeSet(); 338 for (String item : exemplars) { 339 extras.addAll(Normalizer.normalize(item, Normalizer.NFD)); 340 } 341 exemplars.addAll(extras); 342 } 343 totalExemplars.addAll(exemplars); 344 exemplars.removeAll(stuffToSkip); 345 346 Set<String> locales = value_locales.get(value); 347 //String script = UScript.getName(getFirstScript(exemplars)); 348 for (String locale : locales) { 349 checkTr(script_UnicodeMap); 350 String key = locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale; 351 String script = LOCALE_TO_SCRIPT.get(key); 352 // try a few variants until we get the script 353 if (script == null && key.contains("_")) { 354 String simpleParent = LanguageTagParser.getSimpleParent(key); 355 script = LOCALE_TO_SCRIPT.get(simpleParent); 356 if (script == null && simpleParent.contains("_")) { 357 simpleParent = LanguageTagParser.getSimpleParent(simpleParent); 358 script = LOCALE_TO_SCRIPT.get(simpleParent); 359 } 360 } 361 if (script == null) { 362 script = UScript.getName(UScript.UNKNOWN); 363 } 364 Set<String> temp = new HashSet<>(); 365 temp.add(locale); 366 checkTr(script_UnicodeMap); 367 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 368 if (mapping == null) { 369 script_UnicodeMap.put(script, mapping = new UnicodeMap<>()); 370 } 371 checkTr(script_UnicodeMap); 372 mapping.composeWith(exemplars, temp, setComposer); 373 checkTr(script_UnicodeMap); 374 } 375 } 376 System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false)); 377 for (String script : script_UnicodeMap.keySet()) { 378 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 379 writeCharToLocaleMapping(out, script, mapping); 380 } 381 return out; 382 } 383 checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap)384 private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) { 385 UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic"); 386 if (unicodeMap == null) { 387 return; 388 } 389 Set<String> foo = unicodeMap.get(0x21); 390 if (foo == null) { 391 return; 392 } 393 if (foo.contains("tr")) { 394 System.out.println("huh?"); 395 } 396 } 397 writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping)398 private static void writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping) { 399 BreakIterator charBreaks = BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for 400 // script 401 System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet()); 402 if (script.equals("Hangul")) { // || script.equals("Common") 403 return; // skip these 404 } 405 // find out all the locales and all the characters 406 Set<String> allLocales = new TreeSet<>(UCA); 407 Set<String> allChars = new TreeSet<>(UCA); 408 Set<String> allStrings = new TreeSet<>(UCA); 409 for (Set<String> locales : mapping.getAvailableValues()) { 410 allLocales.addAll(locales); 411 UnicodeSet unicodeSet = mapping.keySet(locales); 412 for (String item : unicodeSet) { 413 charBreaks.setText(item); 414 int endFirst = charBreaks.next(); 415 if (endFirst == item.length()) { 416 allChars.add(item); 417 } else { 418 allStrings.add(item); 419 } 420 } 421 } 422 // get the columns, and show them 423 out.println("<table class='table' style='width:1%'>"); 424 out.println("<caption>" + script + "</caption>"); 425 exemplarHeader(out, allChars); 426 427 for (String locale : allLocales) { 428 String headerHeader = "<th class='head'>" + cleanLocale(locale, false) + "</th><td class='head nowrap left'>" 429 + cleanLocale(locale, true) + "</td>"; 430 out.println("<tr>"); 431 out.println(headerHeader); 432 433 for (String item : allChars) { 434 // String exemplarsWithoutBrackets = displayExemplars(item); 435 if (mapping.get(item).contains(locale)) { 436 out.println("<td class='cell'" + 437 ">" + displayCharacter(item) + "</td>"); 438 } else { 439 out.println("<td class='empty'>\u00a0</td>"); 440 } 441 } 442 // now strings, if any 443 StringBuilder strings = new StringBuilder(); 444 int lastLineStart = 0; 445 for (String item : allStrings) { 446 // String exemplarsWithoutBrackets = displayExemplars(item); 447 if (mapping.get(item).contains(locale)) { 448 int str_len = strings.length(); 449 if (str_len != 0) { 450 if (str_len - lastLineStart > 20) { 451 strings.append(System.lineSeparator()); 452 lastLineStart = str_len; 453 } else { 454 strings.append(' '); 455 } 456 } 457 strings.append(displayCharacter(item)); 458 } 459 } 460 if (strings.length() == 0) { 461 out.println("<td class='empty'>\u00a0</td>"); 462 } else { 463 out.println("<td class='cell nowrap'>" + displayCharacter(strings.toString()).replace(System.lineSeparator(), "<br>") 464 + "</td>"); 465 } 466 467 out.println(headerHeader); 468 out.println("</tr>"); 469 } 470 exemplarHeader(out, allChars); 471 out.println("</table>"); 472 out.flush(); 473 } 474 characterTitle(String item)475 private static String characterTitle(String item) { 476 return ("title='U+" + 477 toHTML.transform( 478 Utility.hex(item, 4, ", U+", true, new StringBuilder()) 479 + " " + UCharacter.getName(item, ", ")) 480 + "'"); 481 } 482 exemplarHeader(PrintWriter out, Set<String> allChars)483 private static void exemplarHeader(PrintWriter out, Set<String> allChars) { 484 out.println("<tr>"); 485 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 486 for (String item : allChars) { 487 out.println("<th class='head' " + characterTitle(item) + ">" + displayCharacter(item) + "</th>"); 488 } 489 out.println("<th class='head'>Clusters</th>"); 490 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 491 out.println("</tr>"); 492 } 493 494 static final UnicodeSet NONSPACING = new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze(); 495 displayCharacter(String item)496 public static String displayCharacter(String item) { 497 if (item.length() == 0) return "<i>none</i>"; 498 int ch = item.codePointAt(0); 499 if (NONSPACING.contains(ch)) { 500 item = "\u00a0" + item + "\u00a0"; 501 } 502 String result = toHTML.transform(item); 503 return result; 504 } 505 506 static LanguageTagParser cleanLocaleParser = new LanguageTagParser(); 507 static Set<Fields> allButScripts = EnumSet.allOf(Fields.class); 508 static { 509 allButScripts.remove(Fields.SCRIPT); 510 } 511 cleanLocale(String item, boolean name)512 private static String cleanLocale(String item, boolean name) { 513 if (item == null) { 514 return "<i>null</i>"; 515 } 516 boolean draft = item.endsWith("*"); 517 if (draft) { 518 item = item.substring(0, item.length() - 1); 519 } 520 cleanLocaleParser.set(item); 521 item = cleanLocaleParser.toString(allButScripts); 522 String core = item; 523 item = toHTML.transform(item); 524 if (name) { 525 item = english.getName(core); 526 item = item == null ? "<i>null</i>" : toHTML.transform(item); 527 } 528 if (draft) { 529 item = "<i>" + item + "</i>"; 530 } 531 return item; 532 } 533 534 // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet lastChars, Set locales) { 535 // String exemplarsWithoutBrackets = displayExemplars(lastChars); 536 // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>"); 537 // for (String item : allLocales) { 538 // String cleanItem; 539 // if (locales.contains(item)) { 540 // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>"; 541 // } else { 542 // cleanItem = "<td class='value'>\u00a0</td>"; 543 // } 544 // out.println(cleanItem); 545 // } 546 // out.println("</tr>"); 547 // } 548 549 // private static final StringTransform MyTransform = new StringTransform() { 550 // 551 // public String transform(String source) { 552 // StringBuilder builder = new StringBuilder(); 553 // int cp = 0; 554 // builder.append("<span title='"); 555 // String prefix = ""; 556 // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { 557 // cp = UTF16.charAt(source, i); 558 // if (i == 0) { 559 // if (COMBINING.contains(cp)) { 560 // prefix = "\u25CC"; 561 // } 562 // } else { 563 // builder.append(" + "); 564 // } 565 // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' ').append(UCharacter.getExtendedName(cp)); 566 // } 567 // builder.append("'>").append(prefix).append(source).append("</span>"); 568 // return builder.toString(); 569 // } 570 // 571 // }; 572 573 // private static String displayExemplars(UnicodeSet lastChars) { 574 // String exemplarsWithoutBrackets = new PrettyPrinter() 575 // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)) 576 // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT) 577 // .setStrength2(Collator.PRIMARY)) 578 // .setCompressRanges(true) 579 // .setToQuote(ALL_CHARS) 580 // .setQuoter(MyTransform) 581 // .format(lastChars); 582 // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, exemplarsWithoutBrackets.length() - 1); 583 // return exemplarsWithoutBrackets; 584 // } 585 586 // private static boolean isNextCharacter(String last, String value) { 587 // if (UTF16.hasMoreCodePointsThan(last, 1)) return false; 588 // if (UTF16.hasMoreCodePointsThan(value, 1)) return false; 589 // int lastChar = UTF16.charAt(last,0); 590 // int valueChar = UTF16.charAt(value,0); 591 // return lastChar + 1 == valueChar; 592 // } 593 594 static UnicodeMap.Composer<Set<String>> setComposer = new UnicodeMap.Composer<>() { 595 @Override 596 public Set<String> compose(int codepoint, String string, Set<String> a, Set<String> b) { 597 if (a == null) { 598 return b; 599 } else if (b == null) { 600 return a; 601 } else { 602 TreeSet<String> result = new TreeSet<>(a); 603 result.addAll(b); 604 return result; 605 } 606 } 607 }; 608 609 static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<>(); 610 loadInformation(Factory cldrFactory)611 private static void loadInformation(Factory cldrFactory) { 612 Set<String> alllocales = cldrFactory.getAvailable(); 613 String[] postFix = new String[] { "" }; 614 // gather all information 615 // TODO tweek for value-laden attributes 616 for (String localeID : alllocales) { 617 System.out.println("Loading: " + localeID); 618 System.out.flush(); 619 620 CLDRFile cldrFile; 621 try { 622 cldrFile = cldrFactory.make(localeID, localeID.equals("root")); 623 } catch (IllegalArgumentException e) { 624 System.err.println("Couldn't open " + localeID); 625 continue; 626 } 627 if (cldrFile.isNonInheriting()) continue; 628 for (String path : cldrFile) { 629 if (pathMatcher != null && !pathMatcher.reset(path).matches()) { 630 continue; 631 } 632 if (altProposedMatcher.reset(path).matches()) { 633 continue; 634 } 635 if (path.indexOf("/alias") >= 0) continue; 636 if (path.indexOf("/identity") >= 0) continue; 637 if (path.indexOf("/references") >= 0) continue; 638 PathHeader ph = fixPath(path, postFix); 639 if (ph == null || ph.shouldHide()) { 640 continue; 641 } 642 String fullPath = cldrFile.getFullXPath(path); 643 String value = getValue(cldrFile, path, fullPath); 644 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) { 645 continue; 646 } 647 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0 648 || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) { 649 postFix[0] = "*"; 650 } 651 if (path.equals("//ldml/characters/exemplarCharacters")) { 652 UnicodeSet exemplars; 653 try { 654 exemplars = new UnicodeSet(value); 655 String script = UScript.getName(getFirstScript(exemplars)); 656 LOCALE_TO_SCRIPT.put(localeID, script); 657 } catch (Exception e) { 658 659 } 660 } 661 Map<String, Set<String>> value_locales = path_value_locales.get(ph); 662 if (value_locales == null) { 663 path_value_locales.put(ph, value_locales = new TreeMap<>( 664 standardCollation)); 665 } 666 Set<String> locales = value_locales.get(value); 667 if (locales == null) { 668 value_locales.put(value, locales = new TreeSet<>()); 669 } 670 locales.add(localeID + postFix[0]); 671 } 672 } 673 Relation<String, String> sorted = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 674 for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) { 675 sorted.put(s.getValue(), s.getKey()); 676 } 677 for (Entry<String, Set<String>> s : sorted.keyValuesSet()) { 678 System.out.println(s); 679 } 680 } 681 682 static PathHeader.Factory pathHeaderFactory; 683 684 /** 685 * 686 * @param path 687 * @param localePrefix 688 * @return 689 */ fixPath(String path, String[] localePrefix)690 private static PathHeader fixPath(String path, String[] localePrefix) { 691 if (localePrefix != null) { 692 localePrefix[0] = ""; 693 } 694 return pathHeaderFactory.fromPath(path); 695 } 696 removeAttributes(String xpath, Set<String> skipAttributes)697 private static String removeAttributes(String xpath, Set<String> skipAttributes) { 698 XPathParts parts = XPathParts.getFrozenInstance(xpath).cloneAsThawed(); // not frozen, for removeAttributes 699 removeAttributes(parts, skipAttributes); 700 return parts.toString(); 701 } 702 703 /** 704 * 705 * @param parts 706 * @param skipAttributes 707 */ removeAttributes(XPathParts parts, Set<String> skipAttributes)708 private static void removeAttributes(XPathParts parts, Set<String> skipAttributes) { 709 for (int i = 0; i < parts.size(); ++i) { 710 for (String attributeToRemove : skipAttributes) { 711 parts.removeAttribute(i, attributeToRemove); 712 } 713 } 714 } 715 716 static Set<String> skipSet = new HashSet<>(Arrays.asList("draft", "alt")); 717 718 static Status status = new Status(); 719 720 /** 721 * 722 */ getValue(CLDRFile cldrFile, String path, String fullPath)723 private static String getValue(CLDRFile cldrFile, String path, String fullPath) { 724 String value = cldrFile.getStringValue(path); 725 if (value == null) { 726 System.out.println("Null value for " + path); 727 return value; 728 } 729 cldrFile.getSourceLocaleID(path, status); 730 if (!path.equals(status.pathWhereFound)) { 731 // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]"; 732 value = null; 733 return value; 734 } 735 if (value.length() == 0) { 736 XPathParts parts = XPathParts.getFrozenInstance(fullPath).cloneAsThawed(); // not frozen, for removeAttributes 737 removeAttributes(parts, skipSet); 738 int limit = parts.size(); 739 value = parts.toString(limit - 1, limit); 740 return value; 741 } 742 return value; 743 } 744 getFileName2(PathHeader header, String suffix)745 private static String getFileName2(PathHeader header, String suffix) { 746 String result = (header.getSection() + "." + header.getPage()) 747 .replace(" ", "_") 748 .replace("/", "_") 749 .replace("(", "_") 750 .replace(")", "_"); 751 if (suffix != null) { 752 result += "." + suffix; 753 } 754 return result.toLowerCase(Locale.ENGLISH); 755 } 756 757 static String[] headerAndFooter = new String[2]; 758 private static Transliterator toHTML; 759 760 /** 761 * @param tsvFile TODO 762 * @param path2 763 * 764 */ start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)765 private static PrintWriter start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile) 766 throws IOException { 767 finish(out, tsvFile.value); 768 out = writeHeader(main, title, tsvFile); 769 out.println(headerString); 770 return out; 771 } 772 getHeader(Set<PathHeader> set)773 public static String getHeader(Set<PathHeader> set) { 774 StringBuffer out = new StringBuffer("<table class='simple'><tr>"); 775 String lastMain = ""; 776 String lastSub = ""; 777 for (PathHeader pathHeader : set) { 778 String mainName = pathHeader.getSection(); 779 String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage()); 780 if (!mainName.equals(lastMain)) { 781 if (lastMain.length() != 0) { 782 out.append("</tr>" + System.lineSeparator() + "<tr>"); 783 } 784 out.append("<th align='right' nowrap style='vertical-align: top'><b>" 785 + TransliteratorUtilities.toHTML.transform(mainName) 786 + ": </b></th><td>"); 787 lastMain = mainName; 788 lastSub = subName; 789 } else if (!subName.equals(lastSub)) { 790 out.append(" | "); 791 lastSub = subName; 792 } else { 793 continue; // identical, skip 794 } 795 out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>"); 796 if (pathHeader.getPageId() == PageId.Alphabetic_Information) { 797 for (String[] pair : EXEMPLARS) { 798 out.append(" | <a href='" + getFileName2(pathHeader, pair[1]) + ".html'>" + pair[2] + "</a>"); 799 } 800 } 801 continue; 802 } 803 return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString(); 804 } 805 writeHeader(String main, String title, Output<PrintWriter> tsvFile)806 private static PrintWriter writeHeader(String main, String title, Output<PrintWriter> tsvFile) throws IOException { 807 PrintWriter out; 808 out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html"); 809 if (tsvFile.value == null) { 810 tsvFile.value = FileUtilities.openUTF8Writer(Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv"); 811 tsvFile.value.println("# By-Type Data"); 812 tsvFile.value.println("# Section\tPage\tHeader\tCode\tValue\tLocales"); 813 } 814 815 ShowData.getChartTemplate("By-Type Chart: " + title, 816 ToolConstants.CHART_DISPLAY_VERSION, 817 "", 818 headerAndFooter, null, false); 819 out.println(headerAndFooter[0]); 820 return out; 821 } 822 823 /** 824 * @param tsvFile TODO 825 * 826 */ finish(PrintWriter out, PrintWriter tsvFile)827 private static void finish(PrintWriter out, PrintWriter tsvFile) { 828 if (out == null) return; 829 out.println("</table>"); 830 out.println(headerAndFooter[1]); 831 out.close(); 832 } 833 finishAll(PrintWriter out, PrintWriter tsvFile)834 private static void finishAll(PrintWriter out, PrintWriter tsvFile) { 835 // TODO Auto-generated method stub 836 //tsvFile.println("# EOF"); 837 tsvFile.close(); 838 } 839 } 840