1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.tool; 10 11 import java.io.File; 12 import java.io.IOException; 13 import java.io.PrintWriter; 14 import java.util.Arrays; 15 import java.util.Comparator; 16 import java.util.Date; 17 import java.util.EnumSet; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.Iterator; 21 import java.util.Locale; 22 import java.util.Map; 23 import java.util.Map.Entry; 24 import java.util.Set; 25 import java.util.TreeMap; 26 import java.util.TreeSet; 27 import java.util.regex.Matcher; 28 29 import org.unicode.cldr.draft.FileUtilities; 30 import org.unicode.cldr.tool.ShowData.DataShower; 31 import org.unicode.cldr.util.CLDRFile; 32 import org.unicode.cldr.util.CLDRFile.Status; 33 import org.unicode.cldr.util.CLDRPaths; 34 import org.unicode.cldr.util.CldrUtility; 35 import org.unicode.cldr.util.Factory; 36 import org.unicode.cldr.util.FileCopier; 37 import org.unicode.cldr.util.LanguageTagParser; 38 import org.unicode.cldr.util.LanguageTagParser.Fields; 39 import org.unicode.cldr.util.LocaleIDParser; 40 import org.unicode.cldr.util.PathHeader; 41 import org.unicode.cldr.util.PathHeader.PageId; 42 import org.unicode.cldr.util.PatternCache; 43 import org.unicode.cldr.util.SimpleFactory; 44 import org.unicode.cldr.util.StringId; 45 import org.unicode.cldr.util.TransliteratorUtilities; 46 import org.unicode.cldr.util.XPathParts; 47 import org.xml.sax.SAXException; 48 49 import com.google.common.collect.ImmutableMap; 50 import com.ibm.icu.dev.tool.UOption; 51 import com.ibm.icu.dev.util.UnicodeMap; 52 import com.ibm.icu.impl.Relation; 53 import com.ibm.icu.impl.Utility; 54 import com.ibm.icu.lang.UCharacter; 55 import com.ibm.icu.lang.UScript; 56 import com.ibm.icu.text.BreakIterator; 57 import com.ibm.icu.text.Collator; 58 import com.ibm.icu.text.Normalizer; 59 import com.ibm.icu.text.RuleBasedCollator; 60 import com.ibm.icu.text.RuleBasedNumberFormat; 61 import com.ibm.icu.text.Transliterator; 62 import com.ibm.icu.text.UTF16; 63 import com.ibm.icu.text.UnicodeSet; 64 import com.ibm.icu.text.UnicodeSetIterator; 65 import com.ibm.icu.util.Output; 66 import com.ibm.icu.util.ULocale; 67 68 /** 69 * This is a simple class that walks through the CLDR hierarchy. 70 * It gathers together all the items from all the locales that share the 71 * same element chain, and thus presents a "sideways" view of the data, in files called 72 * by_type/X.html, where X is a type. X may be the concatenation of more than more than 73 * one element, where the file would otherwise be too large. 74 * 75 * @author medavis 76 */ 77 /* 78 * Notes: 79 * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3 80 * http://developers.sun.com/dev/coolstuff/xml/readme.html 81 * http://lists.xml.org/archives/xml-dev/200007/msg00284.html 82 * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html 83 */ 84 public class GenerateSidewaysView { 85 private static final String DIR_NAME = "by_type"; 86 // debug flags 87 static final boolean DEBUG = false; 88 static final boolean DEBUG2 = false; 89 static final boolean DEBUG_SHOW_ADD = false; 90 static final boolean DEBUG_ELEMENT = false; 91 static final boolean DEBUG_SHOW_BAT = false; 92 93 static final boolean FIX_ZONE_ALIASES = true; 94 95 private static final int HELP1 = 0, 96 HELP2 = 1, 97 SOURCEDIR = 2, 98 DESTDIR = 3, 99 MATCH = 4, 100 SKIP = 5, 101 TZADIR = 6, 102 NONVALIDATING = 7, 103 SHOW_DTD = 8, 104 TRANSLIT = 9, 105 PATH = 10; 106 107 private static final UOption[] options = { 108 UOption.HELP_H(), 109 UOption.HELP_QUESTION_MARK(), 110 UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY), 111 UOption.DESTDIR().setDefault(CLDRPaths.CHART_DIRECTORY + DIR_NAME + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/ 112 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 113 UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"), 114 UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault( 115 "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"), 116 UOption.create("nonvalidating", 'n', UOption.NO_ARG), 117 UOption.create("dtd", 'w', UOption.NO_ARG), 118 UOption.create("transliterate", 'y', UOption.NO_ARG), 119 UOption.create("path", 'p', UOption.REQUIRES_ARG), 120 }; 121 122 private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher(""); 123 // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF); 124 protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze(); 125 getFirstScript(UnicodeSet exemplars)126 static int getFirstScript(UnicodeSet exemplars) { 127 for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) { 128 int script = UScript.getScript(it.codepoint); 129 if (script == UScript.COMMON || script == UScript.INHERITED) { 130 continue; 131 } 132 return script; 133 } 134 return UScript.COMMON; 135 } 136 137 static Comparator<Object> UCA; 138 static { 139 RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 140 UCA2.setNumericCollation(true); 141 UCA2.setStrength(Collator.IDENTICAL); 142 UCA = new org.unicode.cldr.util.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0)); 143 } 144 145 private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<>(); 146 private static long startTime = System.currentTimeMillis(); 147 148 static RuleBasedCollator standardCollation = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); 149 static { 150 standardCollation.setStrength(Collator.IDENTICAL); 151 standardCollation.setNumericCollation(true); 152 } 153 154 private static CLDRFile english; 155 // private static DataShower dataShower = new DataShower(); 156 private static Matcher pathMatcher; 157 main(String[] args)158 public static void main(String[] args) throws SAXException, IOException { 159 startTime = System.currentTimeMillis(); 160 ToolUtilities.registerExtraTransliterators(); 161 UOption.parseArgs(args, options); 162 163 pathMatcher = options[PATH].value == null ? null : PatternCache.get(options[PATH].value).matcher(""); 164 165 File[] paths = { 166 new File(CLDRPaths.MAIN_DIRECTORY), 167 new File(CLDRPaths.ANNOTATIONS_DIRECTORY), 168 new File(CLDRPaths.SUBDIVISIONS_DIRECTORY) 169 }; 170 Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value); 171 172 // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value); 173 english = cldrFactory.make("en", true); 174 pathHeaderFactory = PathHeader.getFactory(english); 175 176 FileCopier.ensureDirectoryExists(options[DESTDIR].value); 177 FileCopier.copy(GenerateSidewaysView.class, "bytype-index.css", options[DESTDIR].value, "index.css"); 178 FormattedFileWriter.copyIncludeHtmls(options[DESTDIR].value); 179 180 // now get the info 181 182 loadInformation(cldrFactory); 183 String oldMain = ""; 184 PrintWriter out = null; 185 186 System.out.println("Getting types " + path_value_locales.size()); 187 // Set<String> types = new TreeSet<String>(); 188 // for (PathHeader path : path_value_locales.keySet()) { 189 // String main = getFileName2(path); 190 // if (!main.equals(oldMain)) { 191 // oldMain = main; 192 // types.add(main); 193 // } 194 // } 195 String headerString = getHeader(path_value_locales.keySet()); 196 FileCopier.copyAndReplace(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html", 197 ImmutableMap.of( 198 "%header%", headerString, 199 "%version%", ToolConstants.CHART_DISPLAY_VERSION, 200 "%index%", "../index.html", 201 "%index-title%", "Main Charts Index", 202 "%date%", CldrUtility.isoFormatDateOnly(new Date()))); 203 // FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html", 204 // new String[] { "%header%", headerString }); 205 206 System.out.println("Printing files in " + new File(options[DESTDIR].value).getAbsolutePath()); 207 // Transliterator toLatin = Transliterator.getInstance("any-latin"); 208 toHTML = TransliteratorUtilities.toHTML; 209 // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]"); 210 211 String oldHeader = ""; 212 Output<PrintWriter> tsvFile = new Output<>(); 213 214 for (PathHeader path : path_value_locales.keySet()) { 215 String main = getFileName2(path, null); 216 if (!main.equals(oldMain)) { 217 oldMain = main; 218 out = start(out, main, headerString, path.getSection() + ":" + path.getPage(), tsvFile); 219 out.println("<table class='table'>"); 220 oldHeader = ""; 221 } 222 String key = path.getCode(); 223 String anchor = toHTML.transliterate(key); 224 225 String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path); 226 String englishValue = english.getStringValue(originalPath); 227 if (englishValue != null) { 228 englishValue = "English: ‹" + englishValue + "›"; 229 } else { 230 englishValue = ""; 231 } 232 233 String header = path.getHeader(); 234 if (!header.equals(oldHeader) && !header.equals("null")) { 235 out.println("<tr><th colSpan='2' class='pathHeader'>" + CldrUtility.getDoubleLinkedText(header) 236 + "</th></tr>"); 237 oldHeader = header; 238 } 239 String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath())); 240 out.println("<tr>" + 241 "<th class='path'>" + CldrUtility.getDoubleLinkedText(anchorId, anchor) + "</th>" + 242 "<th class='path'>" + toHTML.transliterate(englishValue) + "</th>" + 243 "</tr>"); 244 Map<String, Set<String>> value_locales = path_value_locales.get(path); 245 for (String value : value_locales.keySet()) { 246 // String outValue = toHTML.transliterate(value); 247 // String transValue = value; 248 // try { 249 // transValue = toLatin.transliterate(value); 250 // } catch (RuntimeException e) { 251 // } 252 // if (!transValue.equals(value)) { 253 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + "</span>"; 254 // } 255 String valueClass = " class='value'"; 256 if (DataShower.getBidiStyle(value).length() != 0) { 257 valueClass = " class='rtl_value'"; 258 } 259 out.println("<tr><th" + valueClass + ">" + DataShower.getPrettyValue(value) + "</th><td class='td'>"); 260 tsvFile.value.print( 261 path.getSection() 262 + "\t" + path.getPage() 263 + "\t" + path.getHeader() 264 + "\t" + path.getCode() 265 + "\t" + value 266 + "\t"); 267 268 Set<String> locales = value_locales.get(value); 269 boolean first = true; 270 boolean containsRoot = locales.contains("root"); 271 for (String locale : locales) { 272 if (first) 273 first = false; 274 else 275 out.print(" "); 276 if (locale.endsWith("*")) { 277 locale = locale.substring(0, locale.length() - 1); 278 out.print("<i>\u00B7" + locale + "\u00B7</i>"); 279 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 280 } else if (!containsRoot) { 281 out.print("\u00B7" + locale + "\u00B7"); 282 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 283 } else if (locale.contains("_")) { 284 // not same as root, but need to test for parent 285 // if the parent is not in the same list, then we include anyway. 286 // Cf http://unicode.org/cldr/trac/ticket/7228 287 String parent = LocaleIDParser.getParent(locale); 288 if (!locales.contains(parent)) { 289 out.print("<b>\u00B7" + locale + "\u00B7</b>"); 290 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 291 } 292 } 293 } 294 if (containsRoot) { 295 out.print("<b>\u00B7all\u00B7others\u00B7</b>"); 296 tsvFile.value.print("\u00B7all-others\u00B7"); 297 } 298 out.println("</td></tr>"); 299 tsvFile.value.println(); 300 } 301 } 302 for (String[] pair : EXEMPLARS) { 303 showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile); 304 } 305 finish(out, tsvFile.value); 306 finishAll(out, tsvFile.value); 307 System.out.println("Done in " + new RuleBasedNumberFormat(new ULocale("en"), RuleBasedNumberFormat.DURATION) 308 .format((System.currentTimeMillis() - startTime) / 1000.0)); 309 } 310 311 static final String[][] EXEMPLARS = { 312 { "//ldml/characters/exemplarCharacters", "main", "Main Exemplars" }, 313 { "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", "punctuation", "Punctuation Exemplars" }, 314 { "//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars" }, 315 // TODO look at numbers, auxiliary 316 }; 317 showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, Output<PrintWriter> tsvFile)318 private static PrintWriter showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, 319 Output<PrintWriter> tsvFile) 320 throws IOException { 321 PathHeader ph = fixPath(pathName, null); 322 String filename = getFileName2(ph, variant); 323 out = start(out, filename, headerString, title, tsvFile); 324 Map<String, Set<String>> value_locales = path_value_locales.get(ph); 325 326 // TODO change logic so that aux characters characters work well. 327 328 Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<>(); 329 // UnicodeMap mapping = new UnicodeMap(); 330 UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]"); 331 332 // get the locale information 333 UnicodeSet totalExemplars = new UnicodeSet(); 334 for (String value : value_locales.keySet()) { 335 // flatten out UnicodeSet 336 UnicodeSet exemplars = new UnicodeSet(value); 337 if (variant.equals("main")) { 338 UnicodeSet extras = new UnicodeSet(); 339 for (String item : exemplars) { 340 extras.addAll(Normalizer.normalize(item, Normalizer.NFD)); 341 } 342 exemplars.addAll(extras); 343 } 344 totalExemplars.addAll(exemplars); 345 exemplars.removeAll(stuffToSkip); 346 347 Set<String> locales = value_locales.get(value); 348 //String script = UScript.getName(getFirstScript(exemplars)); 349 for (String locale : locales) { 350 checkTr(script_UnicodeMap); 351 String key = locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale; 352 String script = LOCALE_TO_SCRIPT.get(key); 353 // try a few variants until we get the script 354 if (script == null && key.contains("_")) { 355 String simpleParent = LanguageTagParser.getSimpleParent(key); 356 script = LOCALE_TO_SCRIPT.get(simpleParent); 357 if (script == null && simpleParent.contains("_")) { 358 simpleParent = LanguageTagParser.getSimpleParent(simpleParent); 359 script = LOCALE_TO_SCRIPT.get(simpleParent); 360 } 361 } 362 if (script == null) { 363 script = UScript.getName(UScript.UNKNOWN); 364 } 365 Set<String> temp = new HashSet<>(); 366 temp.add(locale); 367 checkTr(script_UnicodeMap); 368 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 369 if (mapping == null) { 370 script_UnicodeMap.put(script, mapping = new UnicodeMap<>()); 371 } 372 checkTr(script_UnicodeMap); 373 mapping.composeWith(exemplars, temp, setComposer); 374 checkTr(script_UnicodeMap); 375 } 376 } 377 System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false)); 378 for (String script : script_UnicodeMap.keySet()) { 379 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 380 writeCharToLocaleMapping(out, script, mapping); 381 } 382 return out; 383 } 384 checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap)385 private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) { 386 UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic"); 387 if (unicodeMap == null) { 388 return; 389 } 390 Set<String> foo = unicodeMap.get(0x21); 391 if (foo == null) { 392 return; 393 } 394 if (foo.contains("tr")) { 395 System.out.println("huh?"); 396 } 397 } 398 writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping)399 private static void writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping) { 400 BreakIterator charBreaks = BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for 401 // script 402 System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet()); 403 if (script.equals("Hangul")) { // || script.equals("Common") 404 return; // skip these 405 } 406 // find out all the locales and all the characters 407 Set<String> allLocales = new TreeSet<>(UCA); 408 Set<String> allChars = new TreeSet<>(UCA); 409 Set<String> allStrings = new TreeSet<>(UCA); 410 for (Set<String> locales : mapping.getAvailableValues()) { 411 allLocales.addAll(locales); 412 UnicodeSet unicodeSet = mapping.keySet(locales); 413 for (String item : unicodeSet) { 414 charBreaks.setText(item); 415 int endFirst = charBreaks.next(); 416 if (endFirst == item.length()) { 417 allChars.add(item); 418 } else { 419 allStrings.add(item); 420 } 421 } 422 } 423 // get the columns, and show them 424 out.println("<table class='table' style='width:1%'>"); 425 out.println("<caption>" + script + "</caption>"); 426 exemplarHeader(out, allChars); 427 428 for (String locale : allLocales) { 429 String headerHeader = "<th class='head'>" + cleanLocale(locale, false) + "</th><td class='head nowrap left'>" 430 + cleanLocale(locale, true) + "</td>"; 431 out.println("<tr>"); 432 out.println(headerHeader); 433 434 for (String item : allChars) { 435 // String exemplarsWithoutBrackets = displayExemplars(item); 436 if (mapping.get(item).contains(locale)) { 437 out.println("<td class='cell'" + 438 ">" + displayCharacter(item) + "</td>"); 439 } else { 440 out.println("<td class='empty'>\u00a0</td>"); 441 } 442 } 443 // now strings, if any 444 StringBuilder strings = new StringBuilder(); 445 int lastLineStart = 0; 446 for (String item : allStrings) { 447 // String exemplarsWithoutBrackets = displayExemplars(item); 448 if (mapping.get(item).contains(locale)) { 449 int str_len = strings.length(); 450 if (str_len != 0) { 451 if (str_len - lastLineStart > 20) { 452 strings.append(System.lineSeparator()); 453 lastLineStart = str_len; 454 } else { 455 strings.append(' '); 456 } 457 } 458 strings.append(displayCharacter(item)); 459 } 460 } 461 if (strings.length() == 0) { 462 out.println("<td class='empty'>\u00a0</td>"); 463 } else { 464 out.println("<td class='cell nowrap'>" + displayCharacter(strings.toString()).replace(System.lineSeparator(), "<br>") 465 + "</td>"); 466 } 467 468 out.println(headerHeader); 469 out.println("</tr>"); 470 } 471 exemplarHeader(out, allChars); 472 out.println("</table>"); 473 out.flush(); 474 } 475 characterTitle(String item)476 private static String characterTitle(String item) { 477 return ("title='U+" + 478 toHTML.transform( 479 Utility.hex(item, 4, ", U+", true, new StringBuilder()) 480 + " " + UCharacter.getName(item, ", ")) 481 + "'"); 482 } 483 exemplarHeader(PrintWriter out, Set<String> allChars)484 private static void exemplarHeader(PrintWriter out, Set<String> allChars) { 485 out.println("<tr>"); 486 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 487 for (String item : allChars) { 488 out.println("<th class='head' " + characterTitle(item) + ">" + displayCharacter(item) + "</th>"); 489 } 490 out.println("<th class='head'>Clusters</th>"); 491 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 492 out.println("</tr>"); 493 } 494 495 static final UnicodeSet NONSPACING = new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze(); 496 displayCharacter(String item)497 public static String displayCharacter(String item) { 498 if (item.length() == 0) return "<i>none</i>"; 499 int ch = item.codePointAt(0); 500 if (NONSPACING.contains(ch)) { 501 item = "\u00a0" + item + "\u00a0"; 502 } 503 String result = toHTML.transform(item); 504 return result; 505 } 506 507 static LanguageTagParser cleanLocaleParser = new LanguageTagParser(); 508 static Set<Fields> allButScripts = EnumSet.allOf(Fields.class); 509 static { 510 allButScripts.remove(Fields.SCRIPT); 511 } 512 cleanLocale(String item, boolean name)513 private static String cleanLocale(String item, boolean name) { 514 if (item == null) { 515 return "<i>null</i>"; 516 } 517 boolean draft = item.endsWith("*"); 518 if (draft) { 519 item = item.substring(0, item.length() - 1); 520 } 521 cleanLocaleParser.set(item); 522 item = cleanLocaleParser.toString(allButScripts); 523 String core = item; 524 item = toHTML.transform(item); 525 if (name) { 526 item = english.getName(core); 527 item = item == null ? "<i>null</i>" : toHTML.transform(item); 528 } 529 if (draft) { 530 item = "<i>" + item + "</i>"; 531 } 532 return item; 533 } 534 535 // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet lastChars, Set locales) { 536 // String exemplarsWithoutBrackets = displayExemplars(lastChars); 537 // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>"); 538 // for (String item : allLocales) { 539 // String cleanItem; 540 // if (locales.contains(item)) { 541 // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>"; 542 // } else { 543 // cleanItem = "<td class='value'>\u00a0</td>"; 544 // } 545 // out.println(cleanItem); 546 // } 547 // out.println("</tr>"); 548 // } 549 550 // private static final StringTransform MyTransform = new StringTransform() { 551 // 552 // public String transform(String source) { 553 // StringBuilder builder = new StringBuilder(); 554 // int cp = 0; 555 // builder.append("<span title='"); 556 // String prefix = ""; 557 // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { 558 // cp = UTF16.charAt(source, i); 559 // if (i == 0) { 560 // if (COMBINING.contains(cp)) { 561 // prefix = "\u25CC"; 562 // } 563 // } else { 564 // builder.append(" + "); 565 // } 566 // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' ').append(UCharacter.getExtendedName(cp)); 567 // } 568 // builder.append("'>").append(prefix).append(source).append("</span>"); 569 // return builder.toString(); 570 // } 571 // 572 // }; 573 574 // private static String displayExemplars(UnicodeSet lastChars) { 575 // String exemplarsWithoutBrackets = new PrettyPrinter() 576 // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)) 577 // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT) 578 // .setStrength2(Collator.PRIMARY)) 579 // .setCompressRanges(true) 580 // .setToQuote(ALL_CHARS) 581 // .setQuoter(MyTransform) 582 // .format(lastChars); 583 // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, exemplarsWithoutBrackets.length() - 1); 584 // return exemplarsWithoutBrackets; 585 // } 586 587 // private static boolean isNextCharacter(String last, String value) { 588 // if (UTF16.hasMoreCodePointsThan(last, 1)) return false; 589 // if (UTF16.hasMoreCodePointsThan(value, 1)) return false; 590 // int lastChar = UTF16.charAt(last,0); 591 // int valueChar = UTF16.charAt(value,0); 592 // return lastChar + 1 == valueChar; 593 // } 594 595 static UnicodeMap.Composer<Set<String>> setComposer = new UnicodeMap.Composer<Set<String>>() { 596 @Override 597 public Set<String> compose(int codepoint, String string, Set<String> a, Set<String> b) { 598 if (a == null) { 599 return b; 600 } else if (b == null) { 601 return a; 602 } else { 603 TreeSet<String> result = new TreeSet<>(a); 604 result.addAll(b); 605 return result; 606 } 607 } 608 }; 609 610 static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<>(); 611 loadInformation(Factory cldrFactory)612 private static void loadInformation(Factory cldrFactory) { 613 Set<String> alllocales = cldrFactory.getAvailable(); 614 String[] postFix = new String[] { "" }; 615 // gather all information 616 // TODO tweek for value-laden attributes 617 for (String localeID : alllocales) { 618 System.out.println("Loading: " + localeID); 619 System.out.flush(); 620 621 CLDRFile cldrFile; 622 try { 623 cldrFile = cldrFactory.make(localeID, localeID.equals("root")); 624 } catch (IllegalArgumentException e) { 625 System.err.println("Couldn't open " + localeID); 626 continue; 627 } 628 if (cldrFile.isNonInheriting()) continue; 629 for (String path : cldrFile) { 630 if (pathMatcher != null && !pathMatcher.reset(path).matches()) { 631 continue; 632 } 633 if (altProposedMatcher.reset(path).matches()) { 634 continue; 635 } 636 if (path.indexOf("/alias") >= 0) continue; 637 if (path.indexOf("/identity") >= 0) continue; 638 if (path.indexOf("/references") >= 0) continue; 639 PathHeader ph = fixPath(path, postFix); 640 if (ph == null || ph.shouldHide()) { 641 continue; 642 } 643 String fullPath = cldrFile.getFullXPath(path); 644 String value = getValue(cldrFile, path, fullPath); 645 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) { 646 continue; 647 } 648 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0 649 || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) { 650 postFix[0] = "*"; 651 } 652 if (path.equals("//ldml/characters/exemplarCharacters")) { 653 UnicodeSet exemplars; 654 try { 655 exemplars = new UnicodeSet(value); 656 String script = UScript.getName(getFirstScript(exemplars)); 657 LOCALE_TO_SCRIPT.put(localeID, script); 658 } catch (Exception e) { 659 660 } 661 } 662 Map<String, Set<String>> value_locales = path_value_locales.get(ph); 663 if (value_locales == null) { 664 path_value_locales.put(ph, value_locales = new TreeMap<>( 665 standardCollation)); 666 } 667 Set<String> locales = value_locales.get(value); 668 if (locales == null) { 669 value_locales.put(value, locales = new TreeSet<>()); 670 } 671 locales.add(localeID + postFix[0]); 672 } 673 } 674 Relation<String, String> sorted = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 675 for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) { 676 sorted.put(s.getValue(), s.getKey()); 677 } 678 for (Entry<String, Set<String>> s : sorted.keyValuesSet()) { 679 System.out.println(s); 680 } 681 } 682 683 static PathHeader.Factory pathHeaderFactory; 684 685 /** 686 * 687 * @param path 688 * @param localePrefix 689 * @return 690 */ fixPath(String path, String[] localePrefix)691 private static PathHeader fixPath(String path, String[] localePrefix) { 692 if (localePrefix != null) { 693 localePrefix[0] = ""; 694 } 695 return pathHeaderFactory.fromPath(path); 696 } 697 removeAttributes(String xpath, Set<String> skipAttributes)698 private static String removeAttributes(String xpath, Set<String> skipAttributes) { 699 XPathParts parts = XPathParts.getFrozenInstance(xpath).cloneAsThawed(); // not frozen, for removeAttributes 700 removeAttributes(parts, skipAttributes); 701 return parts.toString(); 702 } 703 704 /** 705 * 706 * @param parts 707 * @param skipAttributes 708 */ removeAttributes(XPathParts parts, Set<String> skipAttributes)709 private static void removeAttributes(XPathParts parts, Set<String> skipAttributes) { 710 for (int i = 0; i < parts.size(); ++i) { 711 // String element = parts.getElement(i); 712 Map<String, String> attributes = parts.getAttributes(i); 713 for (Iterator<String> it = attributes.keySet().iterator(); it.hasNext();) { 714 String attribute = it.next(); 715 if (skipAttributes.contains(attribute)) it.remove(); 716 } 717 } 718 } 719 720 static Set<String> skipSet = new HashSet<>(Arrays.asList("draft", "alt")); 721 722 static Status status = new Status(); 723 724 /** 725 * 726 */ getValue(CLDRFile cldrFile, String path, String fullPath)727 private static String getValue(CLDRFile cldrFile, String path, String fullPath) { 728 String value = cldrFile.getStringValue(path); 729 if (value == null) { 730 System.out.println("Null value for " + path); 731 return value; 732 } 733 cldrFile.getSourceLocaleID(path, status); 734 if (!path.equals(status.pathWhereFound)) { 735 // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]"; 736 value = null; 737 return value; 738 } 739 if (value.length() == 0) { 740 XPathParts parts = XPathParts.getFrozenInstance(fullPath).cloneAsThawed(); // not frozen, for removeAttributes 741 removeAttributes(parts, skipSet); 742 int limit = parts.size(); 743 value = parts.toString(limit - 1, limit); 744 return value; 745 } 746 return value; 747 } 748 getFileName2(PathHeader header, String suffix)749 private static String getFileName2(PathHeader header, String suffix) { 750 String result = (header.getSection() + "." + header.getPage()) 751 .replace(" ", "_") 752 .replace("/", "_") 753 .replace("(", "_") 754 .replace(")", "_"); 755 if (suffix != null) { 756 result += "." + suffix; 757 } 758 return result.toLowerCase(Locale.ENGLISH); 759 } 760 761 static String[] headerAndFooter = new String[2]; 762 private static Transliterator toHTML; 763 764 /** 765 * @param tsvFile TODO 766 * @param path2 767 * 768 */ start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)769 private static PrintWriter start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile) 770 throws IOException { 771 finish(out, tsvFile.value); 772 out = writeHeader(main, title, tsvFile); 773 out.println(headerString); 774 return out; 775 } 776 getHeader(Set<PathHeader> set)777 public static String getHeader(Set<PathHeader> set) { 778 StringBuffer out = new StringBuffer("<table class='simple'><tr>"); 779 String lastMain = ""; 780 String lastSub = ""; 781 for (PathHeader pathHeader : set) { 782 String mainName = pathHeader.getSection(); 783 String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage()); 784 if (!mainName.equals(lastMain)) { 785 if (lastMain.length() != 0) { 786 out.append("</tr>" + System.lineSeparator() + "<tr>"); 787 } 788 out.append("<th align='right' nowrap style='vertical-align: top'><b>" 789 + TransliteratorUtilities.toHTML.transform(mainName) 790 + ": </b></th><td>"); 791 lastMain = mainName; 792 lastSub = subName; 793 } else if (!subName.equals(lastSub)) { 794 out.append(" | "); 795 lastSub = subName; 796 } else { 797 continue; // identical, skip 798 } 799 out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>"); 800 if (pathHeader.getPageId() == PageId.Alphabetic_Information) { 801 for (String[] pair : EXEMPLARS) { 802 out.append(" | <a href='" + getFileName2(pathHeader, pair[1]) + ".html'>" + pair[2] + "</a>"); 803 } 804 } 805 continue; 806 } 807 return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString(); 808 } 809 writeHeader(String main, String title, Output<PrintWriter> tsvFile)810 private static PrintWriter writeHeader(String main, String title, Output<PrintWriter> tsvFile) throws IOException { 811 PrintWriter out; 812 out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html"); 813 if (tsvFile.value == null) { 814 tsvFile.value = FileUtilities.openUTF8Writer(Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv"); 815 tsvFile.value.println("# By-Type Data"); 816 tsvFile.value.println("# Section\tPage\tHeader\tCode\tValue\tLocales"); 817 } 818 819 ShowData.getChartTemplate("By-Type Chart: " + title, 820 ToolConstants.CHART_DISPLAY_VERSION, 821 "", 822 headerAndFooter, null, false); 823 out.println(headerAndFooter[0]); 824 return out; 825 } 826 827 /** 828 * @param tsvFile TODO 829 * 830 */ finish(PrintWriter out, PrintWriter tsvFile)831 private static void finish(PrintWriter out, PrintWriter tsvFile) { 832 if (out == null) return; 833 out.println("</table>"); 834 out.println(headerAndFooter[1]); 835 out.close(); 836 } 837 finishAll(PrintWriter out, PrintWriter tsvFile)838 private static void finishAll(PrintWriter out, PrintWriter tsvFile) { 839 // TODO Auto-generated method stub 840 //tsvFile.println("# EOF"); 841 tsvFile.close(); 842 } 843 } 844