1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.tool; 10 11 import java.io.File; 12 import java.io.IOException; 13 import java.io.PrintWriter; 14 import java.util.Arrays; 15 import java.util.Comparator; 16 import java.util.Date; 17 import java.util.EnumSet; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.Iterator; 21 import java.util.Locale; 22 import java.util.Map; 23 import java.util.Map.Entry; 24 import java.util.Set; 25 import java.util.TreeMap; 26 import java.util.TreeSet; 27 import java.util.regex.Matcher; 28 29 import org.unicode.cldr.draft.FileUtilities; 30 import org.unicode.cldr.tool.ShowData.DataShower; 31 import org.unicode.cldr.util.CLDRFile; 32 import org.unicode.cldr.util.CLDRFile.Status; 33 import org.unicode.cldr.util.CLDRPaths; 34 import org.unicode.cldr.util.CldrUtility; 35 import org.unicode.cldr.util.Factory; 36 import org.unicode.cldr.util.FileCopier; 37 import org.unicode.cldr.util.LanguageTagParser; 38 import org.unicode.cldr.util.LanguageTagParser.Fields; 39 import org.unicode.cldr.util.LocaleIDParser; 40 import org.unicode.cldr.util.PathHeader; 41 import org.unicode.cldr.util.PathHeader.PageId; 42 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 43 import org.unicode.cldr.util.PatternCache; 44 import org.unicode.cldr.util.SimpleFactory; 45 import org.unicode.cldr.util.StringId; 46 import org.unicode.cldr.util.TransliteratorUtilities; 47 import org.unicode.cldr.util.XPathParts; 48 import org.xml.sax.SAXException; 49 50 import com.google.common.collect.ImmutableMap; 51 import com.ibm.icu.dev.tool.UOption; 52 import com.ibm.icu.dev.util.UnicodeMap; 53 import com.ibm.icu.impl.Relation; 54 import com.ibm.icu.impl.Utility; 55 import com.ibm.icu.lang.UCharacter; 56 import com.ibm.icu.lang.UScript; 57 import com.ibm.icu.text.BreakIterator; 58 import com.ibm.icu.text.Collator; 59 import com.ibm.icu.text.Normalizer; 60 import com.ibm.icu.text.RuleBasedCollator; 61 import com.ibm.icu.text.RuleBasedNumberFormat; 62 import com.ibm.icu.text.Transliterator; 63 import com.ibm.icu.text.UTF16; 64 import com.ibm.icu.text.UnicodeSet; 65 import com.ibm.icu.text.UnicodeSetIterator; 66 import com.ibm.icu.util.Output; 67 import com.ibm.icu.util.ULocale; 68 69 /** 70 * This is a simple class that walks through the CLDR hierarchy. 71 * It gathers together all the items from all the locales that share the 72 * same element chain, and thus presents a "sideways" view of the data, in files called 73 * by_type/X.html, where X is a type. X may be the concatenation of more than more than 74 * one element, where the file would otherwise be too large. 75 * 76 * @author medavis 77 */ 78 /* 79 * Notes: 80 * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3 81 * http://developers.sun.com/dev/coolstuff/xml/readme.html 82 * http://lists.xml.org/archives/xml-dev/200007/msg00284.html 83 * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html 84 */ 85 public class GenerateSidewaysView { 86 private static final String DIR_NAME = "by_type"; 87 // debug flags 88 static final boolean DEBUG = false; 89 static final boolean DEBUG2 = false; 90 static final boolean DEBUG_SHOW_ADD = false; 91 static final boolean DEBUG_ELEMENT = false; 92 static final boolean DEBUG_SHOW_BAT = false; 93 94 static final boolean FIX_ZONE_ALIASES = true; 95 96 private static final int HELP1 = 0, 97 HELP2 = 1, 98 SOURCEDIR = 2, 99 DESTDIR = 3, 100 MATCH = 4, 101 SKIP = 5, 102 TZADIR = 6, 103 NONVALIDATING = 7, 104 SHOW_DTD = 8, 105 TRANSLIT = 9, 106 PATH = 10; 107 108 private static final UOption[] options = { 109 UOption.HELP_H(), 110 UOption.HELP_QUESTION_MARK(), 111 UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY), 112 UOption.DESTDIR().setDefault(CLDRPaths.CHART_DIRECTORY + DIR_NAME + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/ 113 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 114 UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"), 115 UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault( 116 "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"), 117 UOption.create("nonvalidating", 'n', UOption.NO_ARG), 118 UOption.create("dtd", 'w', UOption.NO_ARG), 119 UOption.create("transliterate", 'y', UOption.NO_ARG), 120 UOption.create("path", 'p', UOption.REQUIRES_ARG), 121 }; 122 123 private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher(""); 124 // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF); 125 protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze(); 126 getFirstScript(UnicodeSet exemplars)127 static int getFirstScript(UnicodeSet exemplars) { 128 for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) { 129 int script = UScript.getScript(it.codepoint); 130 if (script == UScript.COMMON || script == UScript.INHERITED) { 131 continue; 132 } 133 return script; 134 } 135 return UScript.COMMON; 136 } 137 138 static Comparator<Object> UCA; 139 static { 140 RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 141 UCA2.setNumericCollation(true); 142 UCA2.setStrength(Collator.IDENTICAL); 143 UCA = new org.unicode.cldr.util.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0)); 144 } 145 146 private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<PathHeader, Map<String, Set<String>>>(); 147 private static XPathParts parts = new XPathParts(null, null); 148 private static long startTime = System.currentTimeMillis(); 149 150 static RuleBasedCollator standardCollation = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); 151 static { 152 standardCollation.setStrength(Collator.IDENTICAL); 153 standardCollation.setNumericCollation(true); 154 } 155 156 private static CLDRFile english; 157 // private static DataShower dataShower = new DataShower(); 158 private static Matcher pathMatcher; 159 main(String[] args)160 public static void main(String[] args) throws SAXException, IOException { 161 startTime = System.currentTimeMillis(); 162 ToolUtilities.registerExtraTransliterators(); 163 UOption.parseArgs(args, options); 164 165 pathMatcher = options[PATH].value == null ? null : PatternCache.get(options[PATH].value).matcher(""); 166 167 File[] paths = { 168 new File(CLDRPaths.MAIN_DIRECTORY), 169 new File(CLDRPaths.ANNOTATIONS_DIRECTORY), 170 new File(CLDRPaths.SUBDIVISIONS_DIRECTORY) 171 }; 172 Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value); 173 174 // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value); 175 english = cldrFactory.make("en", true); 176 pathHeaderFactory = PathHeader.getFactory(english); 177 178 FileCopier.ensureDirectoryExists(options[DESTDIR].value); 179 FileCopier.copy(GenerateSidewaysView.class, "bytype-index.css", options[DESTDIR].value, "index.css"); 180 181 // now get the info 182 183 loadInformation(cldrFactory); 184 String oldMain = ""; 185 PrintWriter out = null; 186 187 System.out.println("Getting types " + path_value_locales.size()); 188 // Set<String> types = new TreeSet<String>(); 189 // for (PathHeader path : path_value_locales.keySet()) { 190 // String main = getFileName2(path); 191 // if (!main.equals(oldMain)) { 192 // oldMain = main; 193 // types.add(main); 194 // } 195 // } 196 String headerString = getHeader(path_value_locales.keySet()); 197 FileCopier.copyAndReplace(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html", 198 ImmutableMap.of( 199 "%header%", headerString, 200 "%version%", ToolConstants.CHART_DISPLAY_VERSION, 201 "%index-title%", "Main Charts Index", 202 "%date%", CldrUtility.isoFormatDateOnly(new Date()))); 203 // FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html", 204 // new String[] { "%header%", headerString }); 205 206 System.out.println("Printing files in " + new File(options[DESTDIR].value).getAbsolutePath()); 207 // Transliterator toLatin = Transliterator.getInstance("any-latin"); 208 toHTML = TransliteratorUtilities.toHTML; 209 // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]"); 210 211 String oldHeader = ""; 212 Output<PrintWriter> tsvFile = new Output<>(); 213 214 for (PathHeader path : path_value_locales.keySet()) { 215 String main = getFileName2(path, null); 216 if (!main.equals(oldMain)) { 217 oldMain = main; 218 out = start(out, main, headerString, path.getSection() + ":" + path.getPage(), tsvFile); 219 out.println("<table class='table'>"); 220 oldHeader = ""; 221 } 222 String key = path.getCode(); 223 String anchor = toHTML.transliterate(key); 224 225 String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path); 226 String englishValue = english.getStringValue(originalPath); 227 if (englishValue != null) { 228 englishValue = "English: ‹" + englishValue + "›"; 229 } else { 230 englishValue = ""; 231 } 232 233 String header = path.getHeader(); 234 if (!header.equals(oldHeader) && !header.equals("null")) { 235 out.println("<tr><th colSpan='2' class='pathHeader'>" + CldrUtility.getDoubleLinkedText(header) 236 + "</th></tr>"); 237 oldHeader = header; 238 } 239 String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath())); 240 out.println("<tr>" + 241 "<th class='path'>" + CldrUtility.getDoubleLinkedText(anchorId, anchor) + "</th>" + 242 "<th class='path'>" + toHTML.transliterate(englishValue) + "</th>" + 243 "</tr>"); 244 Map<String, Set<String>> value_locales = path_value_locales.get(path); 245 for (String value : value_locales.keySet()) { 246 // String outValue = toHTML.transliterate(value); 247 // String transValue = value; 248 // try { 249 // transValue = toLatin.transliterate(value); 250 // } catch (RuntimeException e) { 251 // } 252 // if (!transValue.equals(value)) { 253 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + "</span>"; 254 // } 255 String valueClass = " class='value'"; 256 if (DataShower.getBidiStyle(value).length() != 0) { 257 valueClass = " class='rtl_value'"; 258 } 259 out.println("<tr><th" + valueClass + ">" + DataShower.getPrettyValue(value) + "</th><td class='td'>"); 260 tsvFile.value.print( 261 path.getSection() 262 + "\t" + path.getPage() 263 + "\t" + path.getHeader() 264 + "\t" + path.getCode() 265 + "\t" + value 266 + "\t"); 267 268 Set<String> locales = value_locales.get(value); 269 boolean first = true; 270 boolean containsRoot = locales.contains("root"); 271 for (String locale : locales) { 272 if (first) 273 first = false; 274 else 275 out.print(" "); 276 if (locale.endsWith("*")) { 277 locale = locale.substring(0, locale.length() - 1); 278 out.print("<i>\u00B7" + locale + "\u00B7</i>"); 279 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 280 } else if (!containsRoot) { 281 out.print("\u00B7" + locale + "\u00B7"); 282 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 283 } else if (locale.contains("_")) { 284 // not same as root, but need to test for parent 285 // if the parent is not in the same list, then we include anyway. 286 // Cf http://unicode.org/cldr/trac/ticket/7228 287 String parent = LocaleIDParser.getParent(locale); 288 if (!locales.contains(parent)) { 289 out.print("<b>\u00B7" + locale + "\u00B7</b>"); 290 tsvFile.value.print("\u00B7" + locale + "\u00B7"); 291 } 292 } 293 } 294 if (containsRoot) { 295 out.print("<b>\u00B7all\u00B7others\u00B7</b>"); 296 tsvFile.value.print("\u00B7all-others\u00B7"); 297 } 298 out.println("</td></tr>"); 299 tsvFile.value.println(); 300 } 301 } 302 for (String[] pair : EXEMPLARS) { 303 showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile); 304 } 305 finish(out, tsvFile.value); 306 finishAll(out, tsvFile.value); 307 System.out.println("Done in " + new RuleBasedNumberFormat(new ULocale("en"), RuleBasedNumberFormat.DURATION) 308 .format((System.currentTimeMillis() - startTime) / 1000.0)); 309 } 310 311 // static Comparator UCA; 312 // static { 313 // RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 314 // UCA2.setNumericCollation(true); 315 // UCA2.setStrength(UCA2.IDENTICAL); 316 // UCA = new CollectionUtilities.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0) ); 317 // } 318 319 static final String[][] EXEMPLARS = { 320 { "//ldml/characters/exemplarCharacters", "main", "Main Exemplars" }, 321 { "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", "punctuation", "Punctuation Exemplars" }, 322 { "//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars" }, 323 // TODO look at numbers, auxiliary 324 }; 325 showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, Output<PrintWriter> tsvFile)326 private static PrintWriter showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, 327 Output<PrintWriter> tsvFile) 328 throws IOException { 329 PathHeader cleanPath = fixPath(pathName, null); 330 String filename = getFileName2(cleanPath, variant); 331 out = start(out, filename, headerString, title, tsvFile); 332 Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath); 333 334 // TODO change logic so that aux characters characters work well. 335 336 Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<String, UnicodeMap<Set<String>>>(); 337 // UnicodeMap mapping = new UnicodeMap(); 338 UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]"); 339 340 // get the locale information 341 UnicodeSet totalExemplars = new UnicodeSet(); 342 for (String value : value_locales.keySet()) { 343 // flatten out UnicodeSet 344 UnicodeSet exemplars = new UnicodeSet(value); 345 if (variant.equals("main")) { 346 UnicodeSet extras = new UnicodeSet(); 347 for (String item : exemplars) { 348 extras.addAll(Normalizer.normalize(item, Normalizer.NFD)); 349 } 350 exemplars.addAll(extras); 351 } 352 totalExemplars.addAll(exemplars); 353 exemplars.removeAll(stuffToSkip); 354 355 Set<String> locales = value_locales.get(value); 356 //String script = UScript.getName(getFirstScript(exemplars)); 357 for (String locale : locales) { 358 checkTr(script_UnicodeMap); 359 String key = locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale; 360 String script = LOCALE_TO_SCRIPT.get(key); 361 // try a few variants until we get the script 362 if (script == null && key.contains("_")) { 363 String simpleParent = LanguageTagParser.getSimpleParent(key); 364 script = LOCALE_TO_SCRIPT.get(simpleParent); 365 if (script == null && simpleParent.contains("_")) { 366 simpleParent = LanguageTagParser.getSimpleParent(simpleParent); 367 script = LOCALE_TO_SCRIPT.get(simpleParent); 368 } 369 } 370 if (script == null) { 371 script = UScript.getName(UScript.UNKNOWN); 372 } 373 Set<String> temp = new HashSet<String>(); 374 temp.add(locale); 375 checkTr(script_UnicodeMap); 376 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 377 if (mapping == null) { 378 script_UnicodeMap.put(script, mapping = new UnicodeMap<Set<String>>()); 379 } 380 checkTr(script_UnicodeMap); 381 mapping.composeWith(exemplars, temp, setComposer); 382 checkTr(script_UnicodeMap); 383 } 384 } 385 System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false)); 386 for (String script : script_UnicodeMap.keySet()) { 387 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script); 388 writeCharToLocaleMapping(out, script, mapping); 389 } 390 return out; 391 } 392 checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap)393 private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) { 394 UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic"); 395 if (unicodeMap == null) { 396 return; 397 } 398 Set<String> foo = unicodeMap.get(0x21); 399 if (foo == null) { 400 return; 401 } 402 if (foo.contains("tr")) { 403 System.out.println("huh?"); 404 } 405 } 406 writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping)407 private static void writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping) { 408 BreakIterator charBreaks = BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for 409 // script 410 System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet()); 411 if (script.equals("Hangul")) { // || script.equals("Common") 412 return; // skip these 413 } 414 // find out all the locales and all the characters 415 Set<String> allLocales = new TreeSet<String>(UCA); 416 Set<String> allChars = new TreeSet<String>(UCA); 417 Set<String> allStrings = new TreeSet<String>(UCA); 418 for (Set<String> locales : mapping.getAvailableValues()) { 419 allLocales.addAll(locales); 420 UnicodeSet unicodeSet = mapping.keySet(locales); 421 for (String item : unicodeSet) { 422 charBreaks.setText(item); 423 int endFirst = charBreaks.next(); 424 if (endFirst == item.length()) { 425 allChars.add(item); 426 } else { 427 allStrings.add(item); 428 } 429 } 430 } 431 // get the columns, and show them 432 out.println("<table class='table' style='width:1%'>"); 433 out.println("<caption>" + script + "</caption>"); 434 exemplarHeader(out, allChars); 435 436 for (String locale : allLocales) { 437 String headerHeader = "<th class='head'>" + cleanLocale(locale, false) + "</th><td class='head nowrap left'>" 438 + cleanLocale(locale, true) + "</td>"; 439 out.println("<tr>"); 440 out.println(headerHeader); 441 442 for (String item : allChars) { 443 // String exemplarsWithoutBrackets = displayExemplars(item); 444 if (mapping.get(item).contains(locale)) { 445 out.println("<td class='cell'" + 446 ">" + displayCharacter(item) + "</td>"); 447 } else { 448 out.println("<td class='empty'>\u00a0</td>"); 449 } 450 } 451 // now strings, if any 452 StringBuilder strings = new StringBuilder(); 453 int lastLineStart = 0; 454 for (String item : allStrings) { 455 // String exemplarsWithoutBrackets = displayExemplars(item); 456 if (mapping.get(item).contains(locale)) { 457 int str_len = strings.length(); 458 if (str_len != 0) { 459 if (str_len - lastLineStart > 20) { 460 strings.append(System.lineSeparator()); 461 lastLineStart = str_len; 462 } else { 463 strings.append(' '); 464 } 465 } 466 strings.append(displayCharacter(item)); 467 } 468 } 469 if (strings.length() == 0) { 470 out.println("<td class='empty'>\u00a0</td>"); 471 } else { 472 out.println("<td class='cell nowrap'>" + displayCharacter(strings.toString()).replace(System.lineSeparator(), "<br>") 473 + "</td>"); 474 } 475 476 out.println(headerHeader); 477 out.println("</tr>"); 478 } 479 exemplarHeader(out, allChars); 480 out.println("</table>"); 481 out.flush(); 482 } 483 characterTitle(String item)484 private static String characterTitle(String item) { 485 return ("title='U+" + 486 toHTML.transform( 487 Utility.hex(item, 4, ", U+", true, new StringBuilder()) 488 + " " + UCharacter.getName(item, ", ")) 489 + "'"); 490 } 491 exemplarHeader(PrintWriter out, Set<String> allChars)492 private static void exemplarHeader(PrintWriter out, Set<String> allChars) { 493 out.println("<tr>"); 494 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 495 for (String item : allChars) { 496 out.println("<th class='head' " + characterTitle(item) + ">" + displayCharacter(item) + "</th>"); 497 } 498 out.println("<th class='head'>Clusters</th>"); 499 out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>"); 500 out.println("</tr>"); 501 } 502 503 static final UnicodeSet NONSPACING = new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze(); 504 displayCharacter(String item)505 public static String displayCharacter(String item) { 506 if (item.length() == 0) return "<i>none</i>"; 507 int ch = item.codePointAt(0); 508 if (NONSPACING.contains(ch)) { 509 item = "\u00a0" + item + "\u00a0"; 510 } 511 String result = toHTML.transform(item); 512 return result; 513 } 514 515 static LanguageTagParser cleanLocaleParser = new LanguageTagParser(); 516 static Set<Fields> allButScripts = EnumSet.allOf(Fields.class); 517 static { 518 allButScripts.remove(Fields.SCRIPT); 519 } 520 cleanLocale(String item, boolean name)521 private static String cleanLocale(String item, boolean name) { 522 if (item == null) { 523 return "<i>null</i>"; 524 } 525 boolean draft = item.endsWith("*"); 526 if (draft) { 527 item = item.substring(0, item.length() - 1); 528 } 529 cleanLocaleParser.set(item); 530 item = cleanLocaleParser.toString(allButScripts); 531 String core = item; 532 item = toHTML.transform(item); 533 if (name) { 534 item = english.getName(core); 535 item = item == null ? "<i>null</i>" : toHTML.transform(item); 536 } 537 if (draft) { 538 item = "<i>" + item + "</i>"; 539 } 540 return item; 541 } 542 543 // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet lastChars, Set locales) { 544 // String exemplarsWithoutBrackets = displayExemplars(lastChars); 545 // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>"); 546 // for (String item : allLocales) { 547 // String cleanItem; 548 // if (locales.contains(item)) { 549 // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>"; 550 // } else { 551 // cleanItem = "<td class='value'>\u00a0</td>"; 552 // } 553 // out.println(cleanItem); 554 // } 555 // out.println("</tr>"); 556 // } 557 558 // private static final StringTransform MyTransform = new StringTransform() { 559 // 560 // public String transform(String source) { 561 // StringBuilder builder = new StringBuilder(); 562 // int cp = 0; 563 // builder.append("<span title='"); 564 // String prefix = ""; 565 // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { 566 // cp = UTF16.charAt(source, i); 567 // if (i == 0) { 568 // if (COMBINING.contains(cp)) { 569 // prefix = "\u25CC"; 570 // } 571 // } else { 572 // builder.append(" + "); 573 // } 574 // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' ').append(UCharacter.getExtendedName(cp)); 575 // } 576 // builder.append("'>").append(prefix).append(source).append("</span>"); 577 // return builder.toString(); 578 // } 579 // 580 // }; 581 582 // private static String displayExemplars(UnicodeSet lastChars) { 583 // String exemplarsWithoutBrackets = new PrettyPrinter() 584 // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)) 585 // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT) 586 // .setStrength2(Collator.PRIMARY)) 587 // .setCompressRanges(true) 588 // .setToQuote(ALL_CHARS) 589 // .setQuoter(MyTransform) 590 // .format(lastChars); 591 // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, exemplarsWithoutBrackets.length() - 1); 592 // return exemplarsWithoutBrackets; 593 // } 594 595 // private static boolean isNextCharacter(String last, String value) { 596 // if (UTF16.hasMoreCodePointsThan(last, 1)) return false; 597 // if (UTF16.hasMoreCodePointsThan(value, 1)) return false; 598 // int lastChar = UTF16.charAt(last,0); 599 // int valueChar = UTF16.charAt(value,0); 600 // return lastChar + 1 == valueChar; 601 // } 602 603 static UnicodeMap.Composer<Set<String>> setComposer = new UnicodeMap.Composer<Set<String>>() { 604 public Set<String> compose(int codepoint, String string, Set<String> a, Set<String> b) { 605 if (a == null) { 606 return b; 607 } else if (b == null) { 608 return a; 609 } else { 610 TreeSet<String> result = new TreeSet<String>(a); 611 result.addAll(b); 612 return result; 613 } 614 } 615 }; 616 617 static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<String, String>(); 618 loadInformation(Factory cldrFactory)619 private static void loadInformation(Factory cldrFactory) { 620 Set<String> alllocales = cldrFactory.getAvailable(); 621 String[] postFix = new String[] { "" }; 622 // gather all information 623 // TODO tweek for value-laden attributes 624 for (String localeID : alllocales) { 625 System.out.println("Loading: " + localeID); 626 System.out.flush(); 627 628 CLDRFile cldrFile; 629 try { 630 cldrFile = cldrFactory.make(localeID, localeID.equals("root")); 631 } catch (IllegalArgumentException e) { 632 System.err.println("Couldn't open " + localeID); 633 continue; 634 } 635 if (cldrFile.isNonInheriting()) continue; 636 for (String path : cldrFile) { 637 if (pathMatcher != null && !pathMatcher.reset(path).matches()) { 638 continue; 639 } 640 if (altProposedMatcher.reset(path).matches()) { 641 continue; 642 } 643 if (path.indexOf("/alias") >= 0) continue; 644 if (path.indexOf("/identity") >= 0) continue; 645 if (path.indexOf("/references") >= 0) continue; 646 PathHeader cleanPath = fixPath(path, postFix); 647 final SurveyToolStatus surveyToolStatus = cleanPath.getSurveyToolStatus(); 648 if (surveyToolStatus == SurveyToolStatus.DEPRECATED || surveyToolStatus == SurveyToolStatus.HIDE) { 649 // System.out.println("Skipping " + path); 650 continue; 651 } 652 String fullPath = cldrFile.getFullXPath(path); 653 String value = getValue(cldrFile, path, fullPath); 654 if (value == null) { 655 continue; 656 } 657 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0 658 || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) { 659 postFix[0] = "*"; 660 } 661 if (path.equals("//ldml/characters/exemplarCharacters")) { 662 UnicodeSet exemplars = new UnicodeSet(value); 663 String script = UScript.getName(getFirstScript(exemplars)); 664 LOCALE_TO_SCRIPT.put(localeID, script); 665 } 666 Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath); 667 if (value_locales == null) { 668 path_value_locales.put(cleanPath, value_locales = new TreeMap<String, Set<String>>( 669 standardCollation)); 670 } 671 Set<String> locales = value_locales.get(value); 672 if (locales == null) { 673 value_locales.put(value, locales = new TreeSet<String>()); 674 } 675 locales.add(localeID + postFix[0]); 676 } 677 } 678 Relation<String, String> sorted = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 679 for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) { 680 sorted.put(s.getValue(), s.getKey()); 681 } 682 for (Entry<String, Set<String>> s : sorted.keyValuesSet()) { 683 System.out.println(s); 684 } 685 } 686 687 static PathHeader.Factory pathHeaderFactory; 688 689 // static org.unicode.cldr.util.PrettyPath prettyPath = new org.unicode.cldr.util.PrettyPath(); 690 /** 691 * 692 */ fixPath(String path, String[] localePrefix)693 private static PathHeader fixPath(String path, String[] localePrefix) { 694 if (localePrefix != null) localePrefix[0] = ""; 695 // if (path.indexOf("[@alt=") >= 0 || path.indexOf("[@draft=") >= 0) { 696 // if (localePrefix != null) localePrefix[0] = "*"; 697 // path = removeAttributes(path, skipSet); 698 // } 699 // if (usePrettyPath) path = prettyPath.getPrettyPath(path); 700 return pathHeaderFactory.fromPath(path); 701 } 702 removeAttributes(String xpath, Set<String> skipAttributes)703 private static String removeAttributes(String xpath, Set<String> skipAttributes) { 704 XPathParts parts = new XPathParts(null, null).set(xpath); 705 removeAttributes(parts, skipAttributes); 706 return parts.toString(); 707 } 708 709 /** 710 * 711 */ removeAttributes(XPathParts parts, Set<String> skipAttributes)712 private static void removeAttributes(XPathParts parts, Set<String> skipAttributes) { 713 for (int i = 0; i < parts.size(); ++i) { 714 // String element = parts.getElement(i); 715 Map<String, String> attributes = parts.getAttributes(i); 716 for (Iterator<String> it = attributes.keySet().iterator(); it.hasNext();) { 717 String attribute = it.next(); 718 if (skipAttributes.contains(attribute)) it.remove(); 719 } 720 } 721 } 722 723 static Set<String> skipSet = new HashSet<String>(Arrays.asList("draft", "alt")); 724 725 static Status status = new Status(); 726 727 /** 728 * 729 */ getValue(CLDRFile cldrFile, String path, String fullPath)730 private static String getValue(CLDRFile cldrFile, String path, String fullPath) { 731 String value = cldrFile.getStringValue(path); 732 if (value == null) { 733 System.out.println("Null value for " + path); 734 return value; 735 } 736 cldrFile.getSourceLocaleID(path, status); 737 if (!path.equals(status.pathWhereFound)) { 738 // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]"; 739 value = null; 740 return value; 741 } 742 if (value.length() == 0) { 743 parts.set(fullPath); 744 removeAttributes(parts, skipSet); 745 int limit = parts.size(); 746 value = parts.toString(limit - 1, limit); 747 return value; 748 } 749 return value; 750 } 751 getFileName2(PathHeader header, String suffix)752 private static String getFileName2(PathHeader header, String suffix) { 753 String result = (header.getSection() + "." + header.getPage()) 754 .replace(" ", "_") 755 .replace("/", "_") 756 .replace("(", "_") 757 .replace(")", "_"); 758 if (suffix != null) { 759 result += "." + suffix; 760 } 761 return result.toLowerCase(Locale.ENGLISH); 762 } 763 764 static String[] headerAndFooter = new String[2]; 765 private static Transliterator toHTML; 766 767 /** 768 * @param tsvFile TODO 769 * @param path2 770 * 771 */ start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)772 private static PrintWriter start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile) 773 throws IOException { 774 finish(out, tsvFile.value); 775 out = writeHeader(main, title, tsvFile); 776 out.println(headerString); 777 return out; 778 } 779 getHeader(Set<PathHeader> set)780 public static String getHeader(Set<PathHeader> set) { 781 StringBuffer out = new StringBuffer("<table class='simple'><tr>"); 782 String lastMain = ""; 783 String lastSub = ""; 784 for (PathHeader pathHeader : set) { 785 String mainName = pathHeader.getSection(); 786 String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage()); 787 if (!mainName.equals(lastMain)) { 788 if (lastMain.length() != 0) { 789 out.append("</tr>" + System.lineSeparator() + "<tr>"); 790 } 791 out.append("<th align='right' nowrap style='vertical-align: top'><b>" 792 + TransliteratorUtilities.toHTML.transform(mainName) 793 + ": </b></th><td>"); 794 lastMain = mainName; 795 lastSub = subName; 796 } else if (!subName.equals(lastSub)) { 797 out.append(" | "); 798 lastSub = subName; 799 } else { 800 continue; // identical, skip 801 } 802 out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>"); 803 if (pathHeader.getPageId() == PageId.Alphabetic_Information) { 804 for (String[] pair : EXEMPLARS) { 805 out.append(" | <a href='" + getFileName2(pathHeader, pair[1]) + ".html'>" + pair[2] + "</a>"); 806 } 807 } 808 continue; 809 } 810 return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString(); 811 } 812 writeHeader(String main, String title, Output<PrintWriter> tsvFile)813 private static PrintWriter writeHeader(String main, String title, Output<PrintWriter> tsvFile) throws IOException { 814 PrintWriter out; 815 out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html"); 816 if (tsvFile.value == null) { 817 tsvFile.value = FileUtilities.openUTF8Writer(Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv"); 818 tsvFile.value.println("# By-Type Data"); 819 tsvFile.value.println("# Section\tPage\tHeader\tCode\tValue\tLocales"); 820 } 821 822 ShowData.getChartTemplate("By-Type Chart: " + title, 823 ToolConstants.CHART_DISPLAY_VERSION, 824 "", 825 // "<link rel='stylesheet' type='text/css' href='by_type.css'>" + 826 // "<style type='text/css'>" + Utility.LINE_SEPARATOR + 827 // "h1 {margin-bottom:1em}" + Utility.LINE_SEPARATOR + 828 // "</style>" + Utility.LINE_SEPARATOR, 829 headerAndFooter, null, false); 830 out.println(headerAndFooter[0]); 831 return out; 832 } 833 834 /** 835 * @param tsvFile TODO 836 * 837 */ finish(PrintWriter out, PrintWriter tsvFile)838 private static void finish(PrintWriter out, PrintWriter tsvFile) { 839 if (out == null) return; 840 out.println("</table>"); 841 out.println(headerAndFooter[1]); 842 out.close(); 843 } 844 finishAll(PrintWriter out, PrintWriter tsvFile)845 private static void finishAll(PrintWriter out, PrintWriter tsvFile) { 846 // TODO Auto-generated method stub 847 tsvFile.println("# EOF"); 848 tsvFile.close(); 849 } 850 } 851