• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.tool;
10 
11 import java.io.File;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.util.Arrays;
15 import java.util.Comparator;
16 import java.util.Date;
17 import java.util.EnumSet;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.Locale;
21 import java.util.Map;
22 import java.util.Map.Entry;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import java.util.regex.Matcher;
27 
28 import org.unicode.cldr.draft.FileUtilities;
29 import org.unicode.cldr.tool.ShowData.DataShower;
30 import org.unicode.cldr.util.CLDRFile;
31 import org.unicode.cldr.util.CLDRFile.Status;
32 import org.unicode.cldr.util.CLDRPaths;
33 import org.unicode.cldr.util.CldrUtility;
34 import org.unicode.cldr.util.Factory;
35 import org.unicode.cldr.util.FileCopier;
36 import org.unicode.cldr.util.LanguageTagParser;
37 import org.unicode.cldr.util.LanguageTagParser.Fields;
38 import org.unicode.cldr.util.LocaleIDParser;
39 import org.unicode.cldr.util.PathHeader;
40 import org.unicode.cldr.util.PathHeader.PageId;
41 import org.unicode.cldr.util.PatternCache;
42 import org.unicode.cldr.util.SimpleFactory;
43 import org.unicode.cldr.util.StringId;
44 import org.unicode.cldr.util.TransliteratorUtilities;
45 import org.unicode.cldr.util.XPathParts;
46 import org.xml.sax.SAXException;
47 
48 import com.google.common.collect.ImmutableMap;
49 import com.ibm.icu.dev.tool.UOption;
50 import com.ibm.icu.dev.util.UnicodeMap;
51 import com.ibm.icu.impl.Relation;
52 import com.ibm.icu.impl.Utility;
53 import com.ibm.icu.lang.UCharacter;
54 import com.ibm.icu.lang.UScript;
55 import com.ibm.icu.text.BreakIterator;
56 import com.ibm.icu.text.Collator;
57 import com.ibm.icu.text.Normalizer;
58 import com.ibm.icu.text.RuleBasedCollator;
59 import com.ibm.icu.text.RuleBasedNumberFormat;
60 import com.ibm.icu.text.Transliterator;
61 import com.ibm.icu.text.UTF16;
62 import com.ibm.icu.text.UnicodeSet;
63 import com.ibm.icu.text.UnicodeSetIterator;
64 import com.ibm.icu.util.Output;
65 import com.ibm.icu.util.ULocale;
66 
67 /**
68  * This is a simple class that walks through the CLDR hierarchy.
69  * It gathers together all the items from all the locales that share the
70  * same element chain, and thus presents a "sideways" view of the data, in files called
71  * by_type/X.html, where X is a type. X may be the concatenation of more than more than
72  * one element, where the file would otherwise be too large.
73  *
74  * @author medavis
75  */
76 /*
77  * Notes:
78  * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3
79  * http://developers.sun.com/dev/coolstuff/xml/readme.html
80  * http://lists.xml.org/archives/xml-dev/200007/msg00284.html
81  * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html
82  */
83 public class GenerateSidewaysView {
84     private static final String DIR_NAME = "by_type";
85     // debug flags
86     static final boolean DEBUG = false;
87     static final boolean DEBUG2 = false;
88     static final boolean DEBUG_SHOW_ADD = false;
89     static final boolean DEBUG_ELEMENT = false;
90     static final boolean DEBUG_SHOW_BAT = false;
91 
92     static final boolean FIX_ZONE_ALIASES = true;
93 
94     private static final int HELP1 = 0,
95         HELP2 = 1,
96         SOURCEDIR = 2,
97         DESTDIR = 3,
98         MATCH = 4,
99         SKIP = 5,
100         TZADIR = 6,
101         NONVALIDATING = 7,
102         SHOW_DTD = 8,
103         TRANSLIT = 9,
104         PATH = 10;
105 
106     private static final UOption[] options = {
107         UOption.HELP_H(),
108         UOption.HELP_QUESTION_MARK(),
109         UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY),
110         UOption.DESTDIR().setDefault(CLDRPaths.CHART_DIRECTORY + DIR_NAME + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/
111         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
112         UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"),
113         UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault(
114             "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
115         UOption.create("nonvalidating", 'n', UOption.NO_ARG),
116         UOption.create("dtd", 'w', UOption.NO_ARG),
117         UOption.create("transliterate", 'y', UOption.NO_ARG),
118         UOption.create("path", 'p', UOption.REQUIRES_ARG),
119     };
120 
121     private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher("");
122     // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF);
123     protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze();
124 
getFirstScript(UnicodeSet exemplars)125     static int getFirstScript(UnicodeSet exemplars) {
126         for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {
127             int script = UScript.getScript(it.codepoint);
128             if (script == UScript.COMMON || script == UScript.INHERITED) {
129                 continue;
130             }
131             return script;
132         }
133         return UScript.COMMON;
134     }
135 
136     static Comparator<Object> UCA;
137     static {
138         RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
139         UCA2.setNumericCollation(true);
140         UCA2.setStrength(Collator.IDENTICAL);
141         UCA = new org.unicode.cldr.util.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0));
142     }
143 
144     private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<>();
145     private static long startTime = System.currentTimeMillis();
146 
147     static RuleBasedCollator standardCollation = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH);
148     static {
149         standardCollation.setStrength(Collator.IDENTICAL);
150         standardCollation.setNumericCollation(true);
151     }
152 
153     private static CLDRFile english;
154     // private static DataShower dataShower = new DataShower();
155     private static Matcher pathMatcher;
156 
main(String[] args)157     public static void main(String[] args) throws SAXException, IOException {
158         startTime = System.currentTimeMillis();
159         ToolUtilities.registerExtraTransliterators();
160         UOption.parseArgs(args, options);
161 
162         pathMatcher = options[PATH].value == null ? null : PatternCache.get(options[PATH].value).matcher("");
163 
164         File[] paths = {
165             new File(CLDRPaths.MAIN_DIRECTORY),
166             new File(CLDRPaths.ANNOTATIONS_DIRECTORY),
167             new File(CLDRPaths.SUBDIVISIONS_DIRECTORY)
168         };
169         Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value);
170 
171         // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value);
172         english = cldrFactory.make("en", true);
173         pathHeaderFactory = PathHeader.getFactory(english);
174 
175         FileCopier.ensureDirectoryExists(options[DESTDIR].value);
176         FileCopier.copy(GenerateSidewaysView.class, "bytype-index.css", options[DESTDIR].value, "index.css");
177         FormattedFileWriter.copyIncludeHtmls(options[DESTDIR].value);
178 
179         // now get the info
180 
181         loadInformation(cldrFactory);
182         String oldMain = "";
183         PrintWriter out = null;
184 
185         System.out.println("Getting types " + path_value_locales.size());
186         // Set<String> types = new TreeSet<String>();
187         // for (PathHeader path : path_value_locales.keySet()) {
188         // String main = getFileName2(path);
189         // if (!main.equals(oldMain)) {
190         // oldMain = main;
191         // types.add(main);
192         // }
193         // }
194         String headerString = getHeader(path_value_locales.keySet());
195         FileCopier.copyAndReplace(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
196             ImmutableMap.of(
197                 "%header%", headerString,
198                 "%version%", ToolConstants.CHART_DISPLAY_VERSION,
199                 "%index%", "../index.html",
200                 "%index-title%", "Main Charts Index",
201                 "%date%", CldrUtility.isoFormatDateOnly(new Date())));
202 //        FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
203 //            new String[] { "%header%", headerString });
204 
205         System.out.println("Printing files in " + new File(options[DESTDIR].value).getAbsolutePath());
206         // Transliterator toLatin = Transliterator.getInstance("any-latin");
207         toHTML = TransliteratorUtilities.toHTML;
208         // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]");
209 
210         String oldHeader = "";
211         Output<PrintWriter> tsvFile = new Output<>();
212 
213         for (PathHeader path : path_value_locales.keySet()) {
214             String main = getFileName2(path, null);
215             if (!main.equals(oldMain)) {
216                 oldMain = main;
217                 out = start(out, main, headerString, path.getSection() + ":" + path.getPage(), tsvFile);
218                 out.println("<table class='table'>");
219                 oldHeader = "";
220             }
221             String key = path.getCode();
222             String anchor = toHTML.transliterate(key);
223 
224             String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path);
225             String englishValue = english.getStringValue(originalPath);
226             if (englishValue != null) {
227                 englishValue = "English: ‹" + englishValue + "›";
228             } else {
229                 englishValue = "";
230             }
231 
232             String header = path.getHeader();
233             if (!header.equals(oldHeader) && !header.equals("null")) {
234                 out.println("<tr><th colSpan='2' class='pathHeader'>" + CldrUtility.getDoubleLinkedText(header)
235                     + "</th></tr>");
236                 oldHeader = header;
237             }
238             String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath()));
239             out.println("<tr>" +
240                 "<th class='path'>" + CldrUtility.getDoubleLinkedText(anchorId, anchor) + "</th>" +
241                 "<th class='path'>" + toHTML.transliterate(englishValue) + "</th>" +
242                 "</tr>");
243             Map<String, Set<String>> value_locales = path_value_locales.get(path);
244             for (String value : value_locales.keySet()) {
245                 // String outValue = toHTML.transliterate(value);
246                 // String transValue = value;
247                 // try {
248                 // transValue = toLatin.transliterate(value);
249                 // } catch (RuntimeException e) {
250                 // }
251                 // if (!transValue.equals(value)) {
252                 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + "</span>";
253                 // }
254                 String valueClass = " class='value'";
255                 if (DataShower.getBidiStyle(value).length() != 0) {
256                     valueClass = " class='rtl_value'";
257                 }
258                 out.println("<tr><th" + valueClass + ">" + DataShower.getPrettyValue(value) + "</th><td class='td'>");
259                 tsvFile.value.print(
260                     path.getSection()
261                         + "\t" + path.getPage()
262                         + "\t" + path.getHeader()
263                         + "\t" + path.getCode()
264                         + "\t" + value
265                         + "\t");
266 
267                 Set<String> locales = value_locales.get(value);
268                 boolean first = true;
269                 boolean containsRoot = locales.contains("root");
270                 for (String locale : locales) {
271                     if (first)
272                         first = false;
273                     else
274                         out.print(" ");
275                     if (locale.endsWith("*")) {
276                         locale = locale.substring(0, locale.length() - 1);
277                         out.print("<i>\u00B7" + locale + "\u00B7</i>");
278                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
279                     } else if (!containsRoot) {
280                         out.print("\u00B7" + locale + "\u00B7");
281                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
282                     } else if (locale.contains("_")) {
283                         // not same as root, but need to test for parent
284                         // if the parent is not in the same list, then we include anyway.
285                         // Cf http://unicode.org/cldr/trac/ticket/7228
286                         String parent = LocaleIDParser.getParent(locale);
287                         if (!locales.contains(parent)) {
288                             out.print("<b>\u00B7" + locale + "\u00B7</b>");
289                             tsvFile.value.print("\u00B7" + locale + "\u00B7");
290                         }
291                     }
292                 }
293                 if (containsRoot) {
294                     out.print("<b>\u00B7all\u00B7others\u00B7</b>");
295                     tsvFile.value.print("\u00B7all-others\u00B7");
296                 }
297                 out.println("</td></tr>");
298                 tsvFile.value.println();
299             }
300         }
301         for (String[] pair : EXEMPLARS) {
302             showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile);
303         }
304         finish(out, tsvFile.value);
305         finishAll(out, tsvFile.value);
306         System.out.println("Done in " + new RuleBasedNumberFormat(new ULocale("en"), RuleBasedNumberFormat.DURATION)
307             .format((System.currentTimeMillis() - startTime) / 1000.0));
308     }
309 
310     static final String[][] EXEMPLARS = {
311         { "//ldml/characters/exemplarCharacters", "main", "Main Exemplars" },
312         { "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", "punctuation", "Punctuation Exemplars" },
313         { "//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars" },
314         // TODO look at numbers, auxiliary
315     };
316 
showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, Output<PrintWriter> tsvFile)317     private static PrintWriter showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title,
318         Output<PrintWriter> tsvFile)
319         throws IOException {
320         PathHeader ph = fixPath(pathName, null);
321         String filename = getFileName2(ph, variant);
322         out = start(out, filename, headerString, title, tsvFile);
323         Map<String, Set<String>> value_locales = path_value_locales.get(ph);
324 
325         // TODO change logic so that aux characters characters work well.
326 
327         Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<>();
328         // UnicodeMap mapping = new UnicodeMap();
329         UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]");
330 
331         // get the locale information
332         UnicodeSet totalExemplars = new UnicodeSet();
333         for (String value : value_locales.keySet()) {
334             // flatten out UnicodeSet
335             UnicodeSet exemplars = new UnicodeSet(value);
336             if (variant.equals("main")) {
337                 UnicodeSet extras = new UnicodeSet();
338                 for (String item : exemplars) {
339                     extras.addAll(Normalizer.normalize(item, Normalizer.NFD));
340                 }
341                 exemplars.addAll(extras);
342             }
343             totalExemplars.addAll(exemplars);
344             exemplars.removeAll(stuffToSkip);
345 
346             Set<String> locales = value_locales.get(value);
347             //String script = UScript.getName(getFirstScript(exemplars));
348             for (String locale : locales) {
349                 checkTr(script_UnicodeMap);
350                 String key = locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale;
351                 String script = LOCALE_TO_SCRIPT.get(key);
352                 // try a few variants until we get the script
353                 if (script == null && key.contains("_")) {
354                     String simpleParent = LanguageTagParser.getSimpleParent(key);
355                     script = LOCALE_TO_SCRIPT.get(simpleParent);
356                     if (script == null && simpleParent.contains("_")) {
357                         simpleParent = LanguageTagParser.getSimpleParent(simpleParent);
358                         script = LOCALE_TO_SCRIPT.get(simpleParent);
359                     }
360                 }
361                 if (script == null) {
362                     script = UScript.getName(UScript.UNKNOWN);
363                 }
364                 Set<String> temp = new HashSet<>();
365                 temp.add(locale);
366                 checkTr(script_UnicodeMap);
367                 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
368                 if (mapping == null) {
369                     script_UnicodeMap.put(script, mapping = new UnicodeMap<>());
370                 }
371                 checkTr(script_UnicodeMap);
372                 mapping.composeWith(exemplars, temp, setComposer);
373                 checkTr(script_UnicodeMap);
374             }
375         }
376         System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false));
377         for (String script : script_UnicodeMap.keySet()) {
378             UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
379             writeCharToLocaleMapping(out, script, mapping);
380         }
381         return out;
382     }
383 
checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap)384     private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) {
385         UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic");
386         if (unicodeMap == null) {
387             return;
388         }
389         Set<String> foo = unicodeMap.get(0x21);
390         if (foo == null) {
391             return;
392         }
393         if (foo.contains("tr")) {
394             System.out.println("huh?");
395         }
396     }
397 
writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping)398     private static void writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping) {
399         BreakIterator charBreaks = BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for
400         // script
401         System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet());
402         if (script.equals("Hangul")) { //  || script.equals("Common")
403             return; // skip these
404         }
405         // find out all the locales and all the characters
406         Set<String> allLocales = new TreeSet<>(UCA);
407         Set<String> allChars = new TreeSet<>(UCA);
408         Set<String> allStrings = new TreeSet<>(UCA);
409         for (Set<String> locales : mapping.getAvailableValues()) {
410             allLocales.addAll(locales);
411             UnicodeSet unicodeSet = mapping.keySet(locales);
412             for (String item : unicodeSet) {
413                 charBreaks.setText(item);
414                 int endFirst = charBreaks.next();
415                 if (endFirst == item.length()) {
416                     allChars.add(item);
417                 } else {
418                     allStrings.add(item);
419                 }
420             }
421         }
422         // get the columns, and show them
423         out.println("<table class='table' style='width:1%'>");
424         out.println("<caption>" + script + "</caption>");
425         exemplarHeader(out, allChars);
426 
427         for (String locale : allLocales) {
428             String headerHeader = "<th class='head'>" + cleanLocale(locale, false) + "</th><td class='head nowrap left'>"
429                 + cleanLocale(locale, true) + "</td>";
430             out.println("<tr>");
431             out.println(headerHeader);
432 
433             for (String item : allChars) {
434                 // String exemplarsWithoutBrackets = displayExemplars(item);
435                 if (mapping.get(item).contains(locale)) {
436                     out.println("<td class='cell'" +
437                         ">" + displayCharacter(item) + "</td>");
438                 } else {
439                     out.println("<td class='empty'>\u00a0</td>");
440                 }
441             }
442             // now strings, if any
443             StringBuilder strings = new StringBuilder();
444             int lastLineStart = 0;
445             for (String item : allStrings) {
446                 // String exemplarsWithoutBrackets = displayExemplars(item);
447                 if (mapping.get(item).contains(locale)) {
448                     int str_len = strings.length();
449                     if (str_len != 0) {
450                         if (str_len - lastLineStart > 20) {
451                             strings.append(System.lineSeparator());
452                             lastLineStart = str_len;
453                         } else {
454                             strings.append(' ');
455                         }
456                     }
457                     strings.append(displayCharacter(item));
458                 }
459             }
460             if (strings.length() == 0) {
461                 out.println("<td class='empty'>\u00a0</td>");
462             } else {
463                 out.println("<td class='cell nowrap'>" + displayCharacter(strings.toString()).replace(System.lineSeparator(), "<br>")
464                     + "</td>");
465             }
466 
467             out.println(headerHeader);
468             out.println("</tr>");
469         }
470         exemplarHeader(out, allChars);
471         out.println("</table>");
472         out.flush();
473     }
474 
characterTitle(String item)475     private static String characterTitle(String item) {
476         return ("title='U+" +
477             toHTML.transform(
478                 Utility.hex(item, 4, ", U+", true, new StringBuilder())
479                     + " " + UCharacter.getName(item, ", "))
480             + "'");
481     }
482 
exemplarHeader(PrintWriter out, Set<String> allChars)483     private static void exemplarHeader(PrintWriter out, Set<String> allChars) {
484         out.println("<tr>");
485         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
486         for (String item : allChars) {
487             out.println("<th class='head' " + characterTitle(item) + ">" + displayCharacter(item) + "</th>");
488         }
489         out.println("<th class='head'>Clusters</th>");
490         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
491         out.println("</tr>");
492     }
493 
494     static final UnicodeSet NONSPACING = new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze();
495 
displayCharacter(String item)496     public static String displayCharacter(String item) {
497         if (item.length() == 0) return "<i>none</i>";
498         int ch = item.codePointAt(0);
499         if (NONSPACING.contains(ch)) {
500             item = "\u00a0" + item + "\u00a0";
501         }
502         String result = toHTML.transform(item);
503         return result;
504     }
505 
506     static LanguageTagParser cleanLocaleParser = new LanguageTagParser();
507     static Set<Fields> allButScripts = EnumSet.allOf(Fields.class);
508     static {
509         allButScripts.remove(Fields.SCRIPT);
510     }
511 
cleanLocale(String item, boolean name)512     private static String cleanLocale(String item, boolean name) {
513         if (item == null) {
514             return "<i>null</i>";
515         }
516         boolean draft = item.endsWith("*");
517         if (draft) {
518             item = item.substring(0, item.length() - 1);
519         }
520         cleanLocaleParser.set(item);
521         item = cleanLocaleParser.toString(allButScripts);
522         String core = item;
523         item = toHTML.transform(item);
524         if (name) {
525             item = english.getName(core);
526             item = item == null ? "<i>null</i>" : toHTML.transform(item);
527         }
528         if (draft) {
529             item = "<i>" + item + "</i>";
530         }
531         return item;
532     }
533 
534     // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet lastChars, Set locales) {
535     // String exemplarsWithoutBrackets = displayExemplars(lastChars);
536     // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>");
537     // for (String item : allLocales) {
538     // String cleanItem;
539     // if (locales.contains(item)) {
540     // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>";
541     // } else {
542     // cleanItem = "<td class='value'>\u00a0</td>";
543     // }
544     // out.println(cleanItem);
545     // }
546     // out.println("</tr>");
547     // }
548 
549     // private static final StringTransform MyTransform = new StringTransform() {
550     //
551     // public String transform(String source) {
552     // StringBuilder builder = new StringBuilder();
553     // int cp = 0;
554     // builder.append("<span title='");
555     // String prefix = "";
556     // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
557     // cp = UTF16.charAt(source, i);
558     // if (i == 0) {
559     // if (COMBINING.contains(cp)) {
560     // prefix = "\u25CC";
561     // }
562     // } else {
563     // builder.append(" + ");
564     // }
565     // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' ').append(UCharacter.getExtendedName(cp));
566     // }
567     // builder.append("'>").append(prefix).append(source).append("</span>");
568     // return builder.toString();
569     // }
570     //
571     // };
572 
573     // private static String displayExemplars(UnicodeSet lastChars) {
574     // String exemplarsWithoutBrackets = new PrettyPrinter()
575     // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT))
576     // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)
577     // .setStrength2(Collator.PRIMARY))
578     // .setCompressRanges(true)
579     // .setToQuote(ALL_CHARS)
580     // .setQuoter(MyTransform)
581     // .format(lastChars);
582     // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, exemplarsWithoutBrackets.length() - 1);
583     // return exemplarsWithoutBrackets;
584     // }
585 
586     // private static boolean isNextCharacter(String last, String value) {
587     // if (UTF16.hasMoreCodePointsThan(last, 1)) return false;
588     // if (UTF16.hasMoreCodePointsThan(value, 1)) return false;
589     // int lastChar = UTF16.charAt(last,0);
590     // int valueChar = UTF16.charAt(value,0);
591     // return lastChar + 1 == valueChar;
592     // }
593 
594     static UnicodeMap.Composer<Set<String>> setComposer = new UnicodeMap.Composer<>() {
595         @Override
596         public Set<String> compose(int codepoint, String string, Set<String> a, Set<String> b) {
597             if (a == null) {
598                 return b;
599             } else if (b == null) {
600                 return a;
601             } else {
602                 TreeSet<String> result = new TreeSet<>(a);
603                 result.addAll(b);
604                 return result;
605             }
606         }
607     };
608 
609     static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<>();
610 
loadInformation(Factory cldrFactory)611     private static void loadInformation(Factory cldrFactory) {
612         Set<String> alllocales = cldrFactory.getAvailable();
613         String[] postFix = new String[] { "" };
614         // gather all information
615         // TODO tweek for value-laden attributes
616         for (String localeID : alllocales) {
617             System.out.println("Loading: " + localeID);
618             System.out.flush();
619 
620             CLDRFile cldrFile;
621             try {
622                 cldrFile = cldrFactory.make(localeID, localeID.equals("root"));
623             } catch (IllegalArgumentException e) {
624                 System.err.println("Couldn't open " + localeID);
625                 continue;
626             }
627             if (cldrFile.isNonInheriting()) continue;
628             for (String path : cldrFile) {
629                 if (pathMatcher != null && !pathMatcher.reset(path).matches()) {
630                     continue;
631                 }
632                 if (altProposedMatcher.reset(path).matches()) {
633                     continue;
634                 }
635                 if (path.indexOf("/alias") >= 0) continue;
636                 if (path.indexOf("/identity") >= 0) continue;
637                 if (path.indexOf("/references") >= 0) continue;
638                 PathHeader ph = fixPath(path, postFix);
639                 if (ph == null || ph.shouldHide()) {
640                     continue;
641                 }
642                 String fullPath = cldrFile.getFullXPath(path);
643                 String value = getValue(cldrFile, path, fullPath);
644                 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) {
645                     continue;
646                 }
647                 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0
648                     || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) {
649                     postFix[0] = "*";
650                 }
651                 if (path.equals("//ldml/characters/exemplarCharacters")) {
652                     UnicodeSet exemplars;
653                     try {
654                         exemplars = new UnicodeSet(value);
655                         String script = UScript.getName(getFirstScript(exemplars));
656                         LOCALE_TO_SCRIPT.put(localeID, script);
657                     } catch (Exception e) {
658 
659                     }
660                 }
661                 Map<String, Set<String>> value_locales = path_value_locales.get(ph);
662                 if (value_locales == null) {
663                     path_value_locales.put(ph, value_locales = new TreeMap<>(
664                         standardCollation));
665                 }
666                 Set<String> locales = value_locales.get(value);
667                 if (locales == null) {
668                     value_locales.put(value, locales = new TreeSet<>());
669                 }
670                 locales.add(localeID + postFix[0]);
671             }
672         }
673         Relation<String, String> sorted = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
674         for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) {
675             sorted.put(s.getValue(), s.getKey());
676         }
677         for (Entry<String, Set<String>> s : sorted.keyValuesSet()) {
678             System.out.println(s);
679         }
680     }
681 
682     static PathHeader.Factory pathHeaderFactory;
683 
684     /**
685      *
686      * @param path
687      * @param localePrefix
688      * @return
689      */
fixPath(String path, String[] localePrefix)690     private static PathHeader fixPath(String path, String[] localePrefix) {
691         if (localePrefix != null) {
692             localePrefix[0] = "";
693         }
694         return pathHeaderFactory.fromPath(path);
695     }
696 
removeAttributes(String xpath, Set<String> skipAttributes)697     private static String removeAttributes(String xpath, Set<String> skipAttributes) {
698         XPathParts parts = XPathParts.getFrozenInstance(xpath).cloneAsThawed(); // not frozen, for removeAttributes
699         removeAttributes(parts, skipAttributes);
700         return parts.toString();
701     }
702 
703     /**
704      *
705      * @param parts
706      * @param skipAttributes
707      */
removeAttributes(XPathParts parts, Set<String> skipAttributes)708     private static void removeAttributes(XPathParts parts, Set<String> skipAttributes) {
709         for (int i = 0; i < parts.size(); ++i) {
710             for (String attributeToRemove : skipAttributes) {
711                 parts.removeAttribute(i, attributeToRemove);
712             }
713         }
714     }
715 
716     static Set<String> skipSet = new HashSet<>(Arrays.asList("draft", "alt"));
717 
718     static Status status = new Status();
719 
720     /**
721      *
722      */
getValue(CLDRFile cldrFile, String path, String fullPath)723     private static String getValue(CLDRFile cldrFile, String path, String fullPath) {
724         String value = cldrFile.getStringValue(path);
725         if (value == null) {
726             System.out.println("Null value for " + path);
727             return value;
728         }
729         cldrFile.getSourceLocaleID(path, status);
730         if (!path.equals(status.pathWhereFound)) {
731             // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]";
732             value = null;
733             return value;
734         }
735         if (value.length() == 0) {
736             XPathParts parts = XPathParts.getFrozenInstance(fullPath).cloneAsThawed(); // not frozen, for removeAttributes
737             removeAttributes(parts, skipSet);
738             int limit = parts.size();
739             value = parts.toString(limit - 1, limit);
740             return value;
741         }
742         return value;
743     }
744 
getFileName2(PathHeader header, String suffix)745     private static String getFileName2(PathHeader header, String suffix) {
746         String result = (header.getSection() + "." + header.getPage())
747             .replace(" ", "_")
748             .replace("/", "_")
749             .replace("(", "_")
750             .replace(")", "_");
751         if (suffix != null) {
752             result += "." + suffix;
753         }
754         return result.toLowerCase(Locale.ENGLISH);
755     }
756 
757     static String[] headerAndFooter = new String[2];
758     private static Transliterator toHTML;
759 
760     /**
761      * @param tsvFile TODO
762      * @param path2
763      *
764      */
start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)765     private static PrintWriter start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)
766         throws IOException {
767         finish(out, tsvFile.value);
768         out = writeHeader(main, title, tsvFile);
769         out.println(headerString);
770         return out;
771     }
772 
getHeader(Set<PathHeader> set)773     public static String getHeader(Set<PathHeader> set) {
774         StringBuffer out = new StringBuffer("<table class='simple'><tr>");
775         String lastMain = "";
776         String lastSub = "";
777         for (PathHeader pathHeader : set) {
778             String mainName = pathHeader.getSection();
779             String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage());
780             if (!mainName.equals(lastMain)) {
781                 if (lastMain.length() != 0) {
782                     out.append("</tr>" + System.lineSeparator() + "<tr>");
783                 }
784                 out.append("<th align='right' nowrap style='vertical-align: top'><b>"
785                     + TransliteratorUtilities.toHTML.transform(mainName)
786                     + ":&nbsp;</b></th><td>");
787                 lastMain = mainName;
788                 lastSub = subName;
789             } else if (!subName.equals(lastSub)) {
790                 out.append(" | ");
791                 lastSub = subName;
792             } else {
793                 continue; // identical, skip
794             }
795             out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>");
796             if (pathHeader.getPageId() == PageId.Alphabetic_Information) {
797                 for (String[] pair : EXEMPLARS) {
798                     out.append(" | <a href='" + getFileName2(pathHeader, pair[1]) + ".html'>" + pair[2] + "</a>");
799                 }
800             }
801             continue;
802         }
803         return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString();
804     }
805 
writeHeader(String main, String title, Output<PrintWriter> tsvFile)806     private static PrintWriter writeHeader(String main, String title, Output<PrintWriter> tsvFile) throws IOException {
807         PrintWriter out;
808         out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html");
809         if (tsvFile.value == null) {
810             tsvFile.value = FileUtilities.openUTF8Writer(Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv");
811             tsvFile.value.println("# By-Type Data");
812             tsvFile.value.println("# Section\tPage\tHeader\tCode\tValue\tLocales");
813         }
814 
815         ShowData.getChartTemplate("By-Type Chart: " + title,
816             ToolConstants.CHART_DISPLAY_VERSION,
817             "",
818             headerAndFooter, null, false);
819         out.println(headerAndFooter[0]);
820         return out;
821     }
822 
823     /**
824      * @param tsvFile TODO
825      *
826      */
finish(PrintWriter out, PrintWriter tsvFile)827     private static void finish(PrintWriter out, PrintWriter tsvFile) {
828         if (out == null) return;
829         out.println("</table>");
830         out.println(headerAndFooter[1]);
831         out.close();
832     }
833 
finishAll(PrintWriter out, PrintWriter tsvFile)834     private static void finishAll(PrintWriter out, PrintWriter tsvFile) {
835         // TODO Auto-generated method stub
836         //tsvFile.println("# EOF");
837         tsvFile.close();
838     }
839 }
840