1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.tool; 10 11 import java.io.File; 12 import java.io.IOException; 13 import java.io.PrintWriter; 14 import java.util.Arrays; 15 import java.util.Comparator; 16 import java.util.Date; 17 import java.util.EnumSet; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.Iterator; 21 import java.util.Locale; 22 import java.util.Map; 23 import java.util.Map.Entry; 24 import java.util.Set; 25 import java.util.TreeMap; 26 import java.util.TreeSet; 27 import java.util.regex.Matcher; 28 29 import org.unicode.cldr.draft.FileUtilities; 30 import org.unicode.cldr.tool.ShowData.DataShower; 31 import org.unicode.cldr.util.CLDRFile; 32 import org.unicode.cldr.util.CLDRFile.Status; 33 import org.unicode.cldr.util.CLDRPaths; 34 import org.unicode.cldr.util.CldrUtility; 35 import org.unicode.cldr.util.Factory; 36 import org.unicode.cldr.util.FileCopier; 37 import org.unicode.cldr.util.LanguageTagParser; 38 import org.unicode.cldr.util.LanguageTagParser.Fields; 39 import org.unicode.cldr.util.LocaleIDParser; 40 import org.unicode.cldr.util.PathHeader; 41 import org.unicode.cldr.util.PathHeader.PageId; 42 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 43 import org.unicode.cldr.util.PatternCache; 44 import org.unicode.cldr.util.SimpleFactory; 45 import org.unicode.cldr.util.StringId; 46 import org.unicode.cldr.util.TransliteratorUtilities; 47 import org.unicode.cldr.util.XPathParts; 48 import org.xml.sax.SAXException; 49 50 import com.google.common.collect.ImmutableMap; 51 import com.ibm.icu.dev.tool.UOption; 52 import com.ibm.icu.dev.util.UnicodeMap; 53 import com.ibm.icu.impl.Relation; 54 import com.ibm.icu.impl.Utility; 55 import com.ibm.icu.lang.UCharacter; 56 import com.ibm.icu.lang.UScript; 57 import com.ibm.icu.text.BreakIterator; 58 import com.ibm.icu.text.Collator; 59 import com.ibm.icu.text.Normalizer; 60 import com.ibm.icu.text.RuleBasedCollator; 61 import com.ibm.icu.text.RuleBasedNumberFormat; 62 import com.ibm.icu.text.Transliterator; 63 import com.ibm.icu.text.UTF16; 64 import com.ibm.icu.text.UnicodeSet; 65 import com.ibm.icu.text.UnicodeSetIterator; 66 import com.ibm.icu.util.Output; 67 import com.ibm.icu.util.ULocale; 68 69 /** 70 * This is a simple class that walks through the CLDR hierarchy. 71 * It gathers together all the items from all the locales that share the 72 * same element chain, and thus presents a "sideways" view of the data, in files called 73 * by_type/X.html, where X is a type. X may be the concatenation of more than more than 74 * one element, where the file would otherwise be too large. 75 * 76 * @author medavis 77 */ 78 /* 79 * Notes: 80 * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3 81 * http://developers.sun.com/dev/coolstuff/xml/readme.html 82 * http://lists.xml.org/archives/xml-dev/200007/msg00284.html 83 * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html 84 */ 85 public class GenerateSidewaysView { 86 private static final String DIR_NAME = "by_type"; 87 // debug flags 88 static final boolean DEBUG = false; 89 static final boolean DEBUG2 = false; 90 static final boolean DEBUG_SHOW_ADD = false; 91 static final boolean DEBUG_ELEMENT = false; 92 static final boolean DEBUG_SHOW_BAT = false; 93 94 static final boolean FIX_ZONE_ALIASES = true; 95 96 private static final int HELP1 = 0, 97 HELP2 = 1, 98 SOURCEDIR = 2, 99 DESTDIR = 3, 100 MATCH = 4, 101 SKIP = 5, 102 TZADIR = 6, 103 NONVALIDATING = 7, 104 SHOW_DTD = 8, 105 TRANSLIT = 9, 106 PATH = 10; 107 108 private static final UOption[] options = { 109 UOption.HELP_H(), 110 UOption.HELP_QUESTION_MARK(), 111 UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY), 112 UOption.DESTDIR().setDefault(CLDRPaths.CHART_DIRECTORY + DIR_NAME + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/ 113 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 114 UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"), 115 UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault( 116 "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"), 117 UOption.create("nonvalidating", 'n', UOption.NO_ARG), 118 UOption.create("dtd", 'w', UOption.NO_ARG), 119 UOption.create("transliterate", 'y', UOption.NO_ARG), 120 UOption.create("path", 'p', UOption.REQUIRES_ARG), 121 }; 122 123 private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher(""); 124 // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF); 125 protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze(); 126 getFirstScript(UnicodeSet exemplars)127 static int getFirstScript(UnicodeSet exemplars) { 128 for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) { 129 int script = UScript.getScript(it.codepoint); 130 if (script == UScript.COMMON || script == UScript.INHERITED) { 131 continue; 132 } 133 return script; 134 } 135 return UScript.COMMON; 136 } 137 138 static Comparator<Object> UCA; 139 static { 140 RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 141 UCA2.setNumericCollation(true); 142 UCA2.setStrength(Collator.IDENTICAL); 143 UCA = new org.unicode.cldr.util.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0)); 144 } 145 146 private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<>(); 147 private static long startTime = System.currentTimeMillis(); 148 149 static RuleBasedCollator standardCollation = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); 150 static { 151 standardCollation.setStrength(Collator.IDENTICAL); 152 standardCollation.setNumericCollation(true); 153 } 154 155 private static CLDRFile english; 156 // private static DataShower dataShower = new DataShower(); 157 private static Matcher pathMatcher; 158 main(String[] args)159 public static void main(String[] args) throws SAXException, IOException { 160 startTime = System.currentTimeMillis(); 161 ToolUtilities.registerExtraTransliterators(); 162 UOption.parseArgs(args, options); 163 164 pathMatcher = options[PATH].value == null ? null : PatternCache.get(options[PATH].value).matcher(""); 165 166 File[] paths = { 167 new File(CLDRPaths.MAIN_DIRECTORY), 168 new File(CLDRPaths.ANNOTATIONS_DIRECTORY), 169 new File(CLDRPaths.SUBDIVISIONS_DIRECTORY) 170 }; 171 Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value); 172 173 // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value); 174 english = cldrFactory.make("en", true); 175 pathHeaderFactory = PathHeader.getFactory(english); 176 177 FileCopier.ensureDirectoryExists(options[DESTDIR].value); 178 FileCopier.copy(GenerateSidewaysView.class, "bytype-index.css", options[DESTDIR].value, "index.css"); 179 FormattedFileWriter.copyIncludeHtmls(options[DESTDIR].value); 180 181 // now get the info 182 183 loadInformation(cldrFactory); 184 String oldMain = ""; 185 PrintWriter out = null; 186 187 System.out.println("Getting types " + path_value_locales.size()); 188 // Set<String> types = new TreeSet<String>(); 189 // for (PathHeader path : path_value_locales.keySet()) { 190 // String main = getFileName2(path); 191 // if (!main.equals(oldMain)) { 192 // oldMain = main; 193 // types.add(main); 194 // } 195 // } 196 String headerString = getHeader(path_value_locales.keySet()); 197 FileCopier.copyAndReplace(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html", 198 ImmutableMap.of( 199 "%header%", headerString, 200 "%version%", ToolConstants.CHART_DISPLAY_VERSION, 201 "%index%", "../index.html", 202 "%index-title%", "Main Charts Index", 203 "%date%", CldrUtility.isoFormatDateOnly(new Date()))); 204 // FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html", 205 // new String[] { "%header%", headerString }); 206 207 System.out.println("Printing files in " + new File(options[DESTDIR].value).getAbsolutePath()); 208 // Transliterator toLatin = Transliterator.getInstance("any-latin"); 209 toHTML = TransliteratorUtilities.toHTML; 210 // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]"); 211 212 String oldHeader = ""; 213 Output<PrintWriter> tsvFile = new Output<>(); 214 215 for (PathHeader path : path_value_locales.keySet()) { 216 String main = getFileName2(path, null); 217 if (!main.equals(oldMain)) { 218 oldMain = main; 219 out = start(out, main, headerString, path.getSection() + ":" + path.getPage(), tsvFile); 220 out.println("<table class='table'>"); 221 oldHeader = ""; 222 } 223 String key = path.getCode(); 224 String anchor = toHTML.transliterate(key); 225 226 String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path); 227 String englishValue = english.getStringValue(originalPath); 228 if (englishValue != null) { 229 englishValue = "English: ‹" + englishValue + "›"; 230 } else { 231 englishValue = ""; 232 } 233 234 String header = path.getHeader(); 235 if (!header.equals(oldHeader) && !header.equals("null")) { 236 out.println("<tr><th colSpan='2' class='pathHeader'>" + CldrUtility.getDoubleLinkedText(header) 237 + "</th></tr>"); 238 oldHeader = header; 239 } 240 String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath())); 241 out.println("<tr>" + 242 "<th class='path'>" + CldrUtility.getDoubleLinkedText(anchorId, anchor) + "</th>" + 243 "<th class='path'>" + toHTML.transliterate(englishValue) + "</th>" + 244 "</tr>"); 245 Map<String, Set<String>> value_locales = path_value_locales.get(path); 246 for (String value : value_locales.keySet()) { 247 // String outValue = toHTML.transliterate(value); 248 // String transValue = value; 249 // try { 250 // transValue = toLatin.transliterate(value); 251 // } catch (RuntimeException e) { 252 // } 253 // if (!transValue.equals(value)) { 254 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + "</span>"; 255 // } 256 String valueClass = " class='value'"; 257 if (DataShower.getBidiStyle(value).length() != 0) { 258 valueClass = " class='rtl_value'"; 259 } 260 out.println("<tr><th" + valueClass + ">" + DataShower.getPrettyValue(value) + "</th><td class='td'>"); 261 tsvFile.value.print( 262 path.getSection() 263 + "\t" + path.getPage() 264 + "\t" + path.getHeader() 265 + "\t" + path.getCode() 266 + "\t" + value 267 + "\t"); 268 269 Set<String> locales = value_locales.get(value); 270 boolean first = true; 271 boolean containsRoot = locales.contains("root"); 272 for (String locale : locales) { 273 if (first) 274 first = false; 275 else 276 out.print(" "); 277 if (locale.endsWith("*")) { 278 locale = locale.substring(0, locale.length() - 1); 279 out.print("<i>\u00B7" + locale + "\u00B7</i>"); 280 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 281 } else if (!containsRoot) { 282 out.print("\u00B7" + locale + "\u00B7"); 283 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 284 } else if (locale.contains("_")) { 285 // not same as root, but need to test for parent 286 // if the parent is not in the same list, then we include anyway. 287 // Cf http://unicode.org/cldr/trac/ticket/7228 288 String parent = LocaleIDParser.getParent(locale); 289 if (!locales.contains(parent)) { 290 out.print("<b>\u00B7" + locale + "\u00B7</b>"); 291 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 292 } 293 } 294 } 295 if (containsRoot) { 296 out.print("<b>\u00B7all\u00B7others\u00B7</b>"); 297 tsvFile.value.print("\u00B7all-others\u00B7"); 298 } 299 out.println("</td></tr>"); 300 tsvFile.value.println(); 301 } 302 } 303 for (String[] pair : EXEMPLARS) { 304 showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile); 305 } 306 finish(out, tsvFile.value); 307 finishAll(out, tsvFile.value); 308 System.out.println("Done in " + new RuleBasedNumberFormat(new ULocale("en"), RuleBasedNumberFormat.DURATION) 309 .format((System.currentTimeMillis() - startTime) / 1000.0)); 310 } 311 312 static final String[][] EXEMPLARS = { 313 { "//ldml/characters/exemplarCharacters", "main", "Main Exemplars" }, 314 { "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", "punctuation", "Punctuation Exemplars" }, 315 { "//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars" }, 316 // TODO look at numbers, auxiliary 317 }; 318 showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, Output<PrintWriter> tsvFile)319 private static PrintWriter showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, 320 Output<PrintWriter> tsvFile) 321 throws IOException { 322 PathHeader cleanPath = fixPath(pathName, null); 323 String filename = getFileName2(cleanPath, variant); 324 out = start(out, filename, headerString, title, tsvFile); 325 Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath); 326 327 // TODO change logic so that aux characters characters work well. 328 329 Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<>(); 330 // UnicodeMap mapping = new UnicodeMap(); 331 UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]"); 332 333 // get the locale information 334 UnicodeSet totalExemplars = new UnicodeSet(); 335 for (String value : value_locales.keySet()) { 336 // flatten out UnicodeSet 337 UnicodeSet exemplars = new UnicodeSet(value); 338 if (variant.equals("main")) { 339 UnicodeSet extras = new UnicodeSet(); 340 for (String item : exemplars) { 341 extras.addAll(Normalizer.normalize(item, Normalizer.NFD)); 342 } 343 exemplars.addAll(extras); 344 } 345 totalExemplars.addAll(exemplars); 346 exemplars.removeAll(stuffToSkip); 347 348 Set<String> locales = value_locales.get(value); 349 //String script = UScript.getName(getFirstScript(exemplars)); 350 for (String locale : locales) { 351 checkTr(script_UnicodeMap); 352 String key = locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale; 353 String script = LOCALE_TO_SCRIPT.get(key); 354 // try a few variants until we get the script 355 if (script == null && key.contains("_")) { 356 String simpleParent = LanguageTagParser.getSimpleParent(key); 357 script = LOCALE_TO_SCRIPT.get(simpleParent); 358 if (script == null && simpleParent.contains("_")) { 359 simpleParent = LanguageTagParser.getSimpleParent(simpleParent); 360 script = LOCALE_TO_SCRIPT.get(simpleParent); 361 } 362 } 363 if (script == null) { 364 script = UScript.getName(UScript.UNKNOWN); 365 } 366 Set<String> temp = new HashSet<>(); 367 temp.add(locale); 368 checkTr(script_UnicodeMap); 369 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 370 if (mapping == null) { 371 script_UnicodeMap.put(script, mapping = new UnicodeMap<>()); 372 } 373 checkTr(script_UnicodeMap); 374 mapping.composeWith(exemplars, temp, setComposer); 375 checkTr(script_UnicodeMap); 376 } 377 } 378 System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false)); 379 for (String script : script_UnicodeMap.keySet()) { 380 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 381 writeCharToLocaleMapping(out, script, mapping); 382 } 383 return out; 384 } 385 checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap)386 private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) { 387 UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic"); 388 if (unicodeMap == null) { 389 return; 390 } 391 Set<String> foo = unicodeMap.get(0x21); 392 if (foo == null) { 393 return; 394 } 395 if (foo.contains("tr")) { 396 System.out.println("huh?"); 397 } 398 } 399 writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping)400 private static void writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping) { 401 BreakIterator charBreaks = BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for 402 // script 403 System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet()); 404 if (script.equals("Hangul")) { // || script.equals("Common") 405 return; // skip these 406 } 407 // find out all the locales and all the characters 408 Set<String> allLocales = new TreeSet<>(UCA); 409 Set<String> allChars = new TreeSet<>(UCA); 410 Set<String> allStrings = new TreeSet<>(UCA); 411 for (Set<String> locales : mapping.getAvailableValues()) { 412 allLocales.addAll(locales); 413 UnicodeSet unicodeSet = mapping.keySet(locales); 414 for (String item : unicodeSet) { 415 charBreaks.setText(item); 416 int endFirst = charBreaks.next(); 417 if (endFirst == item.length()) { 418 allChars.add(item); 419 } else { 420 allStrings.add(item); 421 } 422 } 423 } 424 // get the columns, and show them 425 out.println("<table class='table' style='width:1%'>"); 426 out.println("<caption>" + script + "</caption>"); 427 exemplarHeader(out, allChars); 428 429 for (String locale : allLocales) { 430 String headerHeader = "<th class='head'>" + cleanLocale(locale, false) + "</th><td class='head nowrap left'>" 431 + cleanLocale(locale, true) + "</td>"; 432 out.println("<tr>"); 433 out.println(headerHeader); 434 435 for (String item : allChars) { 436 // String exemplarsWithoutBrackets = displayExemplars(item); 437 if (mapping.get(item).contains(locale)) { 438 out.println("<td class='cell'" + 439 ">" + displayCharacter(item) + "</td>"); 440 } else { 441 out.println("<td class='empty'>\u00a0</td>"); 442 } 443 } 444 // now strings, if any 445 StringBuilder strings = new StringBuilder(); 446 int lastLineStart = 0; 447 for (String item : allStrings) { 448 // String exemplarsWithoutBrackets = displayExemplars(item); 449 if (mapping.get(item).contains(locale)) { 450 int str_len = strings.length(); 451 if (str_len != 0) { 452 if (str_len - lastLineStart > 20) { 453 strings.append(System.lineSeparator()); 454 lastLineStart = str_len; 455 } else { 456 strings.append(' '); 457 } 458 } 459 strings.append(displayCharacter(item)); 460 } 461 } 462 if (strings.length() == 0) { 463 out.println("<td class='empty'>\u00a0</td>"); 464 } else { 465 out.println("<td class='cell nowrap'>" + displayCharacter(strings.toString()).replace(System.lineSeparator(), "<br>") 466 + "</td>"); 467 } 468 469 out.println(headerHeader); 470 out.println("</tr>"); 471 } 472 exemplarHeader(out, allChars); 473 out.println("</table>"); 474 out.flush(); 475 } 476 characterTitle(String item)477 private static String characterTitle(String item) { 478 return ("title='U+" + 479 toHTML.transform( 480 Utility.hex(item, 4, ", U+", true, new StringBuilder()) 481 + " " + UCharacter.getName(item, ", ")) 482 + "'"); 483 } 484 exemplarHeader(PrintWriter out, Set<String> allChars)485 private static void exemplarHeader(PrintWriter out, Set<String> allChars) { 486 out.println("<tr>"); 487 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 488 for (String item : allChars) { 489 out.println("<th class='head' " + characterTitle(item) + ">" + displayCharacter(item) + "</th>"); 490 } 491 out.println("<th class='head'>Clusters</th>"); 492 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 493 out.println("</tr>"); 494 } 495 496 static final UnicodeSet NONSPACING = new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze(); 497 displayCharacter(String item)498 public static String displayCharacter(String item) { 499 if (item.length() == 0) return "<i>none</i>"; 500 int ch = item.codePointAt(0); 501 if (NONSPACING.contains(ch)) { 502 item = "\u00a0" + item + "\u00a0"; 503 } 504 String result = toHTML.transform(item); 505 return result; 506 } 507 508 static LanguageTagParser cleanLocaleParser = new LanguageTagParser(); 509 static Set<Fields> allButScripts = EnumSet.allOf(Fields.class); 510 static { 511 allButScripts.remove(Fields.SCRIPT); 512 } 513 cleanLocale(String item, boolean name)514 private static String cleanLocale(String item, boolean name) { 515 if (item == null) { 516 return "<i>null</i>"; 517 } 518 boolean draft = item.endsWith("*"); 519 if (draft) { 520 item = item.substring(0, item.length() - 1); 521 } 522 cleanLocaleParser.set(item); 523 item = cleanLocaleParser.toString(allButScripts); 524 String core = item; 525 item = toHTML.transform(item); 526 if (name) { 527 item = english.getName(core); 528 item = item == null ? "<i>null</i>" : toHTML.transform(item); 529 } 530 if (draft) { 531 item = "<i>" + item + "</i>"; 532 } 533 return item; 534 } 535 536 // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet lastChars, Set locales) { 537 // String exemplarsWithoutBrackets = displayExemplars(lastChars); 538 // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>"); 539 // for (String item : allLocales) { 540 // String cleanItem; 541 // if (locales.contains(item)) { 542 // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>"; 543 // } else { 544 // cleanItem = "<td class='value'>\u00a0</td>"; 545 // } 546 // out.println(cleanItem); 547 // } 548 // out.println("</tr>"); 549 // } 550 551 // private static final StringTransform MyTransform = new StringTransform() { 552 // 553 // public String transform(String source) { 554 // StringBuilder builder = new StringBuilder(); 555 // int cp = 0; 556 // builder.append("<span title='"); 557 // String prefix = ""; 558 // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { 559 // cp = UTF16.charAt(source, i); 560 // if (i == 0) { 561 // if (COMBINING.contains(cp)) { 562 // prefix = "\u25CC"; 563 // } 564 // } else { 565 // builder.append(" + "); 566 // } 567 // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' ').append(UCharacter.getExtendedName(cp)); 568 // } 569 // builder.append("'>").append(prefix).append(source).append("</span>"); 570 // return builder.toString(); 571 // } 572 // 573 // }; 574 575 // private static String displayExemplars(UnicodeSet lastChars) { 576 // String exemplarsWithoutBrackets = new PrettyPrinter() 577 // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)) 578 // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT) 579 // .setStrength2(Collator.PRIMARY)) 580 // .setCompressRanges(true) 581 // .setToQuote(ALL_CHARS) 582 // .setQuoter(MyTransform) 583 // .format(lastChars); 584 // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, exemplarsWithoutBrackets.length() - 1); 585 // return exemplarsWithoutBrackets; 586 // } 587 588 // private static boolean isNextCharacter(String last, String value) { 589 // if (UTF16.hasMoreCodePointsThan(last, 1)) return false; 590 // if (UTF16.hasMoreCodePointsThan(value, 1)) return false; 591 // int lastChar = UTF16.charAt(last,0); 592 // int valueChar = UTF16.charAt(value,0); 593 // return lastChar + 1 == valueChar; 594 // } 595 596 static UnicodeMap.Composer<Set<String>> setComposer = new UnicodeMap.Composer<Set<String>>() { 597 @Override 598 public Set<String> compose(int codepoint, String string, Set<String> a, Set<String> b) { 599 if (a == null) { 600 return b; 601 } else if (b == null) { 602 return a; 603 } else { 604 TreeSet<String> result = new TreeSet<>(a); 605 result.addAll(b); 606 return result; 607 } 608 } 609 }; 610 611 static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<>(); 612 loadInformation(Factory cldrFactory)613 private static void loadInformation(Factory cldrFactory) { 614 Set<String> alllocales = cldrFactory.getAvailable(); 615 String[] postFix = new String[] { "" }; 616 // gather all information 617 // TODO tweek for value-laden attributes 618 for (String localeID : alllocales) { 619 System.out.println("Loading: " + localeID); 620 System.out.flush(); 621 622 CLDRFile cldrFile; 623 try { 624 cldrFile = cldrFactory.make(localeID, localeID.equals("root")); 625 } catch (IllegalArgumentException e) { 626 System.err.println("Couldn't open " + localeID); 627 continue; 628 } 629 if (cldrFile.isNonInheriting()) continue; 630 for (String path : cldrFile) { 631 if (pathMatcher != null && !pathMatcher.reset(path).matches()) { 632 continue; 633 } 634 if (altProposedMatcher.reset(path).matches()) { 635 continue; 636 } 637 if (path.indexOf("/alias") >= 0) continue; 638 if (path.indexOf("/identity") >= 0) continue; 639 if (path.indexOf("/references") >= 0) continue; 640 PathHeader cleanPath = fixPath(path, postFix); 641 final SurveyToolStatus surveyToolStatus = cleanPath.getSurveyToolStatus(); 642 if (surveyToolStatus == SurveyToolStatus.DEPRECATED || surveyToolStatus == SurveyToolStatus.HIDE) { 643 // System.out.println("Skipping " + path); 644 continue; 645 } 646 String fullPath = cldrFile.getFullXPath(path); 647 String value = getValue(cldrFile, path, fullPath); 648 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) { 649 continue; 650 } 651 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0 652 || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) { 653 postFix[0] = "*"; 654 } 655 if (path.equals("//ldml/characters/exemplarCharacters")) { 656 UnicodeSet exemplars; 657 try { 658 exemplars = new UnicodeSet(value); 659 String script = UScript.getName(getFirstScript(exemplars)); 660 LOCALE_TO_SCRIPT.put(localeID, script); 661 } catch (Exception e) { 662 int debug = 0; 663 } 664 } 665 Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath); 666 if (value_locales == null) { 667 path_value_locales.put(cleanPath, value_locales = new TreeMap<>( 668 standardCollation)); 669 } 670 Set<String> locales = value_locales.get(value); 671 if (locales == null) { 672 value_locales.put(value, locales = new TreeSet<>()); 673 } 674 locales.add(localeID + postFix[0]); 675 } 676 } 677 Relation<String, String> sorted = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 678 for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) { 679 sorted.put(s.getValue(), s.getKey()); 680 } 681 for (Entry<String, Set<String>> s : sorted.keyValuesSet()) { 682 System.out.println(s); 683 } 684 } 685 686 static PathHeader.Factory pathHeaderFactory; 687 688 /** 689 * 690 * @param path 691 * @param localePrefix 692 * @return 693 */ fixPath(String path, String[] localePrefix)694 private static PathHeader fixPath(String path, String[] localePrefix) { 695 if (localePrefix != null) { 696 localePrefix[0] = ""; 697 } 698 return pathHeaderFactory.fromPath(path); 699 } 700 removeAttributes(String xpath, Set<String> skipAttributes)701 private static String removeAttributes(String xpath, Set<String> skipAttributes) { 702 XPathParts parts = XPathParts.getFrozenInstance(xpath).cloneAsThawed(); // not frozen, for removeAttributes 703 removeAttributes(parts, skipAttributes); 704 return parts.toString(); 705 } 706 707 /** 708 * 709 * @param parts 710 * @param skipAttributes 711 */ removeAttributes(XPathParts parts, Set<String> skipAttributes)712 private static void removeAttributes(XPathParts parts, Set<String> skipAttributes) { 713 for (int i = 0; i < parts.size(); ++i) { 714 // String element = parts.getElement(i); 715 Map<String, String> attributes = parts.getAttributes(i); 716 for (Iterator<String> it = attributes.keySet().iterator(); it.hasNext();) { 717 String attribute = it.next(); 718 if (skipAttributes.contains(attribute)) it.remove(); 719 } 720 } 721 } 722 723 static Set<String> skipSet = new HashSet<>(Arrays.asList("draft", "alt")); 724 725 static Status status = new Status(); 726 727 /** 728 * 729 */ getValue(CLDRFile cldrFile, String path, String fullPath)730 private static String getValue(CLDRFile cldrFile, String path, String fullPath) { 731 String value = cldrFile.getStringValue(path); 732 if (value == null) { 733 System.out.println("Null value for " + path); 734 return value; 735 } 736 cldrFile.getSourceLocaleID(path, status); 737 if (!path.equals(status.pathWhereFound)) { 738 // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]"; 739 value = null; 740 return value; 741 } 742 if (value.length() == 0) { 743 XPathParts parts = XPathParts.getFrozenInstance(fullPath).cloneAsThawed(); // not frozen, for removeAttributes 744 removeAttributes(parts, skipSet); 745 int limit = parts.size(); 746 value = parts.toString(limit - 1, limit); 747 return value; 748 } 749 return value; 750 } 751 getFileName2(PathHeader header, String suffix)752 private static String getFileName2(PathHeader header, String suffix) { 753 String result = (header.getSection() + "." + header.getPage()) 754 .replace(" ", "_") 755 .replace("/", "_") 756 .replace("(", "_") 757 .replace(")", "_"); 758 if (suffix != null) { 759 result += "." + suffix; 760 } 761 return result.toLowerCase(Locale.ENGLISH); 762 } 763 764 static String[] headerAndFooter = new String[2]; 765 private static Transliterator toHTML; 766 767 /** 768 * @param tsvFile TODO 769 * @param path2 770 * 771 */ start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)772 private static PrintWriter start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile) 773 throws IOException { 774 finish(out, tsvFile.value); 775 out = writeHeader(main, title, tsvFile); 776 out.println(headerString); 777 return out; 778 } 779 getHeader(Set<PathHeader> set)780 public static String getHeader(Set<PathHeader> set) { 781 StringBuffer out = new StringBuffer("<table class='simple'><tr>"); 782 String lastMain = ""; 783 String lastSub = ""; 784 for (PathHeader pathHeader : set) { 785 String mainName = pathHeader.getSection(); 786 String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage()); 787 if (!mainName.equals(lastMain)) { 788 if (lastMain.length() != 0) { 789 out.append("</tr>" + System.lineSeparator() + "<tr>"); 790 } 791 out.append("<th align='right' nowrap style='vertical-align: top'><b>" 792 + TransliteratorUtilities.toHTML.transform(mainName) 793 + ": </b></th><td>"); 794 lastMain = mainName; 795 lastSub = subName; 796 } else if (!subName.equals(lastSub)) { 797 out.append(" | "); 798 lastSub = subName; 799 } else { 800 continue; // identical, skip 801 } 802 out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>"); 803 if (pathHeader.getPageId() == PageId.Alphabetic_Information) { 804 for (String[] pair : EXEMPLARS) { 805 out.append(" | <a href='" + getFileName2(pathHeader, pair[1]) + ".html'>" + pair[2] + "</a>"); 806 } 807 } 808 continue; 809 } 810 return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString(); 811 } 812 writeHeader(String main, String title, Output<PrintWriter> tsvFile)813 private static PrintWriter writeHeader(String main, String title, Output<PrintWriter> tsvFile) throws IOException { 814 PrintWriter out; 815 out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html"); 816 if (tsvFile.value == null) { 817 tsvFile.value = FileUtilities.openUTF8Writer(Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv"); 818 tsvFile.value.println("# By-Type Data"); 819 tsvFile.value.println("# Section\tPage\tHeader\tCode\tValue\tLocales"); 820 } 821 822 ShowData.getChartTemplate("By-Type Chart: " + title, 823 ToolConstants.CHART_DISPLAY_VERSION, 824 "", 825 headerAndFooter, null, false); 826 out.println(headerAndFooter[0]); 827 return out; 828 } 829 830 /** 831 * @param tsvFile TODO 832 * 833 */ finish(PrintWriter out, PrintWriter tsvFile)834 private static void finish(PrintWriter out, PrintWriter tsvFile) { 835 if (out == null) return; 836 out.println("</table>"); 837 out.println(headerAndFooter[1]); 838 out.close(); 839 } 840 finishAll(PrintWriter out, PrintWriter tsvFile)841 private static void finishAll(PrintWriter out, PrintWriter tsvFile) { 842 // TODO Auto-generated method stub 843 //tsvFile.println("# EOF"); 844 tsvFile.close(); 845 } 846 } 847