• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.tool;
10 
11 import java.io.File;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.util.Arrays;
15 import java.util.Comparator;
16 import java.util.Date;
17 import java.util.EnumSet;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.Iterator;
21 import java.util.Locale;
22 import java.util.Map;
23 import java.util.Map.Entry;
24 import java.util.Set;
25 import java.util.TreeMap;
26 import java.util.TreeSet;
27 import java.util.regex.Matcher;
28 
29 import org.unicode.cldr.draft.FileUtilities;
30 import org.unicode.cldr.tool.ShowData.DataShower;
31 import org.unicode.cldr.util.CLDRFile;
32 import org.unicode.cldr.util.CLDRFile.Status;
33 import org.unicode.cldr.util.CLDRPaths;
34 import org.unicode.cldr.util.CldrUtility;
35 import org.unicode.cldr.util.Factory;
36 import org.unicode.cldr.util.FileCopier;
37 import org.unicode.cldr.util.LanguageTagParser;
38 import org.unicode.cldr.util.LanguageTagParser.Fields;
39 import org.unicode.cldr.util.LocaleIDParser;
40 import org.unicode.cldr.util.PathHeader;
41 import org.unicode.cldr.util.PathHeader.PageId;
42 import org.unicode.cldr.util.PatternCache;
43 import org.unicode.cldr.util.SimpleFactory;
44 import org.unicode.cldr.util.StringId;
45 import org.unicode.cldr.util.TransliteratorUtilities;
46 import org.unicode.cldr.util.XPathParts;
47 import org.xml.sax.SAXException;
48 
49 import com.google.common.collect.ImmutableMap;
50 import com.ibm.icu.dev.tool.UOption;
51 import com.ibm.icu.dev.util.UnicodeMap;
52 import com.ibm.icu.impl.Relation;
53 import com.ibm.icu.impl.Utility;
54 import com.ibm.icu.lang.UCharacter;
55 import com.ibm.icu.lang.UScript;
56 import com.ibm.icu.text.BreakIterator;
57 import com.ibm.icu.text.Collator;
58 import com.ibm.icu.text.Normalizer;
59 import com.ibm.icu.text.RuleBasedCollator;
60 import com.ibm.icu.text.RuleBasedNumberFormat;
61 import com.ibm.icu.text.Transliterator;
62 import com.ibm.icu.text.UTF16;
63 import com.ibm.icu.text.UnicodeSet;
64 import com.ibm.icu.text.UnicodeSetIterator;
65 import com.ibm.icu.util.Output;
66 import com.ibm.icu.util.ULocale;
67 
68 /**
69  * This is a simple class that walks through the CLDR hierarchy.
70  * It gathers together all the items from all the locales that share the
71  * same element chain, and thus presents a "sideways" view of the data, in files called
72  * by_type/X.html, where X is a type. X may be the concatenation of more than more than
73  * one element, where the file would otherwise be too large.
74  *
75  * @author medavis
76  */
77 /*
78  * Notes:
79  * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3
80  * http://developers.sun.com/dev/coolstuff/xml/readme.html
81  * http://lists.xml.org/archives/xml-dev/200007/msg00284.html
82  * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html
83  */
84 public class GenerateSidewaysView {
85     private static final String DIR_NAME = "by_type";
86     // debug flags
87     static final boolean DEBUG = false;
88     static final boolean DEBUG2 = false;
89     static final boolean DEBUG_SHOW_ADD = false;
90     static final boolean DEBUG_ELEMENT = false;
91     static final boolean DEBUG_SHOW_BAT = false;
92 
93     static final boolean FIX_ZONE_ALIASES = true;
94 
95     private static final int HELP1 = 0,
96         HELP2 = 1,
97         SOURCEDIR = 2,
98         DESTDIR = 3,
99         MATCH = 4,
100         SKIP = 5,
101         TZADIR = 6,
102         NONVALIDATING = 7,
103         SHOW_DTD = 8,
104         TRANSLIT = 9,
105         PATH = 10;
106 
107     private static final UOption[] options = {
108         UOption.HELP_H(),
109         UOption.HELP_QUESTION_MARK(),
110         UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY),
111         UOption.DESTDIR().setDefault(CLDRPaths.CHART_DIRECTORY + DIR_NAME + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/
112         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
113         UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"),
114         UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault(
115             "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
116         UOption.create("nonvalidating", 'n', UOption.NO_ARG),
117         UOption.create("dtd", 'w', UOption.NO_ARG),
118         UOption.create("transliterate", 'y', UOption.NO_ARG),
119         UOption.create("path", 'p', UOption.REQUIRES_ARG),
120     };
121 
122     private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher("");
123     // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF);
124     protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze();
125 
getFirstScript(UnicodeSet exemplars)126     static int getFirstScript(UnicodeSet exemplars) {
127         for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {
128             int script = UScript.getScript(it.codepoint);
129             if (script == UScript.COMMON || script == UScript.INHERITED) {
130                 continue;
131             }
132             return script;
133         }
134         return UScript.COMMON;
135     }
136 
137     static Comparator<Object> UCA;
138     static {
139         RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
140         UCA2.setNumericCollation(true);
141         UCA2.setStrength(Collator.IDENTICAL);
142         UCA = new org.unicode.cldr.util.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0));
143     }
144 
145     private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<>();
146     private static long startTime = System.currentTimeMillis();
147 
148     static RuleBasedCollator standardCollation = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH);
149     static {
150         standardCollation.setStrength(Collator.IDENTICAL);
151         standardCollation.setNumericCollation(true);
152     }
153 
154     private static CLDRFile english;
155     // private static DataShower dataShower = new DataShower();
156     private static Matcher pathMatcher;
157 
main(String[] args)158     public static void main(String[] args) throws SAXException, IOException {
159         startTime = System.currentTimeMillis();
160         ToolUtilities.registerExtraTransliterators();
161         UOption.parseArgs(args, options);
162 
163         pathMatcher = options[PATH].value == null ? null : PatternCache.get(options[PATH].value).matcher("");
164 
165         File[] paths = {
166             new File(CLDRPaths.MAIN_DIRECTORY),
167             new File(CLDRPaths.ANNOTATIONS_DIRECTORY),
168             new File(CLDRPaths.SUBDIVISIONS_DIRECTORY)
169         };
170         Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value);
171 
172         // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value);
173         english = cldrFactory.make("en", true);
174         pathHeaderFactory = PathHeader.getFactory(english);
175 
176         FileCopier.ensureDirectoryExists(options[DESTDIR].value);
177         FileCopier.copy(GenerateSidewaysView.class, "bytype-index.css", options[DESTDIR].value, "index.css");
178         FormattedFileWriter.copyIncludeHtmls(options[DESTDIR].value);
179 
180         // now get the info
181 
182         loadInformation(cldrFactory);
183         String oldMain = "";
184         PrintWriter out = null;
185 
186         System.out.println("Getting types " + path_value_locales.size());
187         // Set<String> types = new TreeSet<String>();
188         // for (PathHeader path : path_value_locales.keySet()) {
189         // String main = getFileName2(path);
190         // if (!main.equals(oldMain)) {
191         // oldMain = main;
192         // types.add(main);
193         // }
194         // }
195         String headerString = getHeader(path_value_locales.keySet());
196         FileCopier.copyAndReplace(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
197             ImmutableMap.of(
198                 "%header%", headerString,
199                 "%version%", ToolConstants.CHART_DISPLAY_VERSION,
200                 "%index%", "../index.html",
201                 "%index-title%", "Main Charts Index",
202                 "%date%", CldrUtility.isoFormatDateOnly(new Date())));
203 //        FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
204 //            new String[] { "%header%", headerString });
205 
206         System.out.println("Printing files in " + new File(options[DESTDIR].value).getAbsolutePath());
207         // Transliterator toLatin = Transliterator.getInstance("any-latin");
208         toHTML = TransliteratorUtilities.toHTML;
209         // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]");
210 
211         String oldHeader = "";
212         Output<PrintWriter> tsvFile = new Output<>();
213 
214         for (PathHeader path : path_value_locales.keySet()) {
215             String main = getFileName2(path, null);
216             if (!main.equals(oldMain)) {
217                 oldMain = main;
218                 out = start(out, main, headerString, path.getSection() + ":" + path.getPage(), tsvFile);
219                 out.println("<table class='table'>");
220                 oldHeader = "";
221             }
222             String key = path.getCode();
223             String anchor = toHTML.transliterate(key);
224 
225             String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path);
226             String englishValue = english.getStringValue(originalPath);
227             if (englishValue != null) {
228                 englishValue = "English: ‹" + englishValue + "›";
229             } else {
230                 englishValue = "";
231             }
232 
233             String header = path.getHeader();
234             if (!header.equals(oldHeader) && !header.equals("null")) {
235                 out.println("<tr><th colSpan='2' class='pathHeader'>" + CldrUtility.getDoubleLinkedText(header)
236                     + "</th></tr>");
237                 oldHeader = header;
238             }
239             String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath()));
240             out.println("<tr>" +
241                 "<th class='path'>" + CldrUtility.getDoubleLinkedText(anchorId, anchor) + "</th>" +
242                 "<th class='path'>" + toHTML.transliterate(englishValue) + "</th>" +
243                 "</tr>");
244             Map<String, Set<String>> value_locales = path_value_locales.get(path);
245             for (String value : value_locales.keySet()) {
246                 // String outValue = toHTML.transliterate(value);
247                 // String transValue = value;
248                 // try {
249                 // transValue = toLatin.transliterate(value);
250                 // } catch (RuntimeException e) {
251                 // }
252                 // if (!transValue.equals(value)) {
253                 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + "</span>";
254                 // }
255                 String valueClass = " class='value'";
256                 if (DataShower.getBidiStyle(value).length() != 0) {
257                     valueClass = " class='rtl_value'";
258                 }
259                 out.println("<tr><th" + valueClass + ">" + DataShower.getPrettyValue(value) + "</th><td class='td'>");
260                 tsvFile.value.print(
261                     path.getSection()
262                         + "\t" + path.getPage()
263                         + "\t" + path.getHeader()
264                         + "\t" + path.getCode()
265                         + "\t" + value
266                         + "\t");
267 
268                 Set<String> locales = value_locales.get(value);
269                 boolean first = true;
270                 boolean containsRoot = locales.contains("root");
271                 for (String locale : locales) {
272                     if (first)
273                         first = false;
274                     else
275                         out.print(" ");
276                     if (locale.endsWith("*")) {
277                         locale = locale.substring(0, locale.length() - 1);
278                         out.print("<i>\u00B7" + locale + "\u00B7</i>");
279                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
280                     } else if (!containsRoot) {
281                         out.print("\u00B7" + locale + "\u00B7");
282                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
283                     } else if (locale.contains("_")) {
284                         // not same as root, but need to test for parent
285                         // if the parent is not in the same list, then we include anyway.
286                         // Cf http://unicode.org/cldr/trac/ticket/7228
287                         String parent = LocaleIDParser.getParent(locale);
288                         if (!locales.contains(parent)) {
289                             out.print("<b>\u00B7" + locale + "\u00B7</b>");
290                             tsvFile.value.print("\u00B7" + locale + "\u00B7");
291                         }
292                     }
293                 }
294                 if (containsRoot) {
295                     out.print("<b>\u00B7all\u00B7others\u00B7</b>");
296                     tsvFile.value.print("\u00B7all-others\u00B7");
297                 }
298                 out.println("</td></tr>");
299                 tsvFile.value.println();
300             }
301         }
302         for (String[] pair : EXEMPLARS) {
303             showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile);
304         }
305         finish(out, tsvFile.value);
306         finishAll(out, tsvFile.value);
307         System.out.println("Done in " + new RuleBasedNumberFormat(new ULocale("en"), RuleBasedNumberFormat.DURATION)
308             .format((System.currentTimeMillis() - startTime) / 1000.0));
309     }
310 
311     static final String[][] EXEMPLARS = {
312         { "//ldml/characters/exemplarCharacters", "main", "Main Exemplars" },
313         { "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", "punctuation", "Punctuation Exemplars" },
314         { "//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars" },
315         // TODO look at numbers, auxiliary
316     };
317 
showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, Output<PrintWriter> tsvFile)318     private static PrintWriter showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title,
319         Output<PrintWriter> tsvFile)
320         throws IOException {
321         PathHeader ph = fixPath(pathName, null);
322         String filename = getFileName2(ph, variant);
323         out = start(out, filename, headerString, title, tsvFile);
324         Map<String, Set<String>> value_locales = path_value_locales.get(ph);
325 
326         // TODO change logic so that aux characters characters work well.
327 
328         Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<>();
329         // UnicodeMap mapping = new UnicodeMap();
330         UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]");
331 
332         // get the locale information
333         UnicodeSet totalExemplars = new UnicodeSet();
334         for (String value : value_locales.keySet()) {
335             // flatten out UnicodeSet
336             UnicodeSet exemplars = new UnicodeSet(value);
337             if (variant.equals("main")) {
338                 UnicodeSet extras = new UnicodeSet();
339                 for (String item : exemplars) {
340                     extras.addAll(Normalizer.normalize(item, Normalizer.NFD));
341                 }
342                 exemplars.addAll(extras);
343             }
344             totalExemplars.addAll(exemplars);
345             exemplars.removeAll(stuffToSkip);
346 
347             Set<String> locales = value_locales.get(value);
348             //String script = UScript.getName(getFirstScript(exemplars));
349             for (String locale : locales) {
350                 checkTr(script_UnicodeMap);
351                 String key = locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale;
352                 String script = LOCALE_TO_SCRIPT.get(key);
353                 // try a few variants until we get the script
354                 if (script == null && key.contains("_")) {
355                     String simpleParent = LanguageTagParser.getSimpleParent(key);
356                     script = LOCALE_TO_SCRIPT.get(simpleParent);
357                     if (script == null && simpleParent.contains("_")) {
358                         simpleParent = LanguageTagParser.getSimpleParent(simpleParent);
359                         script = LOCALE_TO_SCRIPT.get(simpleParent);
360                     }
361                 }
362                 if (script == null) {
363                     script = UScript.getName(UScript.UNKNOWN);
364                 }
365                 Set<String> temp = new HashSet<>();
366                 temp.add(locale);
367                 checkTr(script_UnicodeMap);
368                 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
369                 if (mapping == null) {
370                     script_UnicodeMap.put(script, mapping = new UnicodeMap<>());
371                 }
372                 checkTr(script_UnicodeMap);
373                 mapping.composeWith(exemplars, temp, setComposer);
374                 checkTr(script_UnicodeMap);
375             }
376         }
377         System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false));
378         for (String script : script_UnicodeMap.keySet()) {
379             UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
380             writeCharToLocaleMapping(out, script, mapping);
381         }
382         return out;
383     }
384 
checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap)385     private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) {
386         UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic");
387         if (unicodeMap == null) {
388             return;
389         }
390         Set<String> foo = unicodeMap.get(0x21);
391         if (foo == null) {
392             return;
393         }
394         if (foo.contains("tr")) {
395             System.out.println("huh?");
396         }
397     }
398 
writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping)399     private static void writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping) {
400         BreakIterator charBreaks = BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for
401         // script
402         System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet());
403         if (script.equals("Hangul")) { //  || script.equals("Common")
404             return; // skip these
405         }
406         // find out all the locales and all the characters
407         Set<String> allLocales = new TreeSet<>(UCA);
408         Set<String> allChars = new TreeSet<>(UCA);
409         Set<String> allStrings = new TreeSet<>(UCA);
410         for (Set<String> locales : mapping.getAvailableValues()) {
411             allLocales.addAll(locales);
412             UnicodeSet unicodeSet = mapping.keySet(locales);
413             for (String item : unicodeSet) {
414                 charBreaks.setText(item);
415                 int endFirst = charBreaks.next();
416                 if (endFirst == item.length()) {
417                     allChars.add(item);
418                 } else {
419                     allStrings.add(item);
420                 }
421             }
422         }
423         // get the columns, and show them
424         out.println("<table class='table' style='width:1%'>");
425         out.println("<caption>" + script + "</caption>");
426         exemplarHeader(out, allChars);
427 
428         for (String locale : allLocales) {
429             String headerHeader = "<th class='head'>" + cleanLocale(locale, false) + "</th><td class='head nowrap left'>"
430                 + cleanLocale(locale, true) + "</td>";
431             out.println("<tr>");
432             out.println(headerHeader);
433 
434             for (String item : allChars) {
435                 // String exemplarsWithoutBrackets = displayExemplars(item);
436                 if (mapping.get(item).contains(locale)) {
437                     out.println("<td class='cell'" +
438                         ">" + displayCharacter(item) + "</td>");
439                 } else {
440                     out.println("<td class='empty'>\u00a0</td>");
441                 }
442             }
443             // now strings, if any
444             StringBuilder strings = new StringBuilder();
445             int lastLineStart = 0;
446             for (String item : allStrings) {
447                 // String exemplarsWithoutBrackets = displayExemplars(item);
448                 if (mapping.get(item).contains(locale)) {
449                     int str_len = strings.length();
450                     if (str_len != 0) {
451                         if (str_len - lastLineStart > 20) {
452                             strings.append(System.lineSeparator());
453                             lastLineStart = str_len;
454                         } else {
455                             strings.append(' ');
456                         }
457                     }
458                     strings.append(displayCharacter(item));
459                 }
460             }
461             if (strings.length() == 0) {
462                 out.println("<td class='empty'>\u00a0</td>");
463             } else {
464                 out.println("<td class='cell nowrap'>" + displayCharacter(strings.toString()).replace(System.lineSeparator(), "<br>")
465                     + "</td>");
466             }
467 
468             out.println(headerHeader);
469             out.println("</tr>");
470         }
471         exemplarHeader(out, allChars);
472         out.println("</table>");
473         out.flush();
474     }
475 
characterTitle(String item)476     private static String characterTitle(String item) {
477         return ("title='U+" +
478             toHTML.transform(
479                 Utility.hex(item, 4, ", U+", true, new StringBuilder())
480                     + " " + UCharacter.getName(item, ", "))
481             + "'");
482     }
483 
exemplarHeader(PrintWriter out, Set<String> allChars)484     private static void exemplarHeader(PrintWriter out, Set<String> allChars) {
485         out.println("<tr>");
486         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
487         for (String item : allChars) {
488             out.println("<th class='head' " + characterTitle(item) + ">" + displayCharacter(item) + "</th>");
489         }
490         out.println("<th class='head'>Clusters</th>");
491         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
492         out.println("</tr>");
493     }
494 
495     static final UnicodeSet NONSPACING = new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze();
496 
displayCharacter(String item)497     public static String displayCharacter(String item) {
498         if (item.length() == 0) return "<i>none</i>";
499         int ch = item.codePointAt(0);
500         if (NONSPACING.contains(ch)) {
501             item = "\u00a0" + item + "\u00a0";
502         }
503         String result = toHTML.transform(item);
504         return result;
505     }
506 
507     static LanguageTagParser cleanLocaleParser = new LanguageTagParser();
508     static Set<Fields> allButScripts = EnumSet.allOf(Fields.class);
509     static {
510         allButScripts.remove(Fields.SCRIPT);
511     }
512 
cleanLocale(String item, boolean name)513     private static String cleanLocale(String item, boolean name) {
514         if (item == null) {
515             return "<i>null</i>";
516         }
517         boolean draft = item.endsWith("*");
518         if (draft) {
519             item = item.substring(0, item.length() - 1);
520         }
521         cleanLocaleParser.set(item);
522         item = cleanLocaleParser.toString(allButScripts);
523         String core = item;
524         item = toHTML.transform(item);
525         if (name) {
526             item = english.getName(core);
527             item = item == null ? "<i>null</i>" : toHTML.transform(item);
528         }
529         if (draft) {
530             item = "<i>" + item + "</i>";
531         }
532         return item;
533     }
534 
535     // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet lastChars, Set locales) {
536     // String exemplarsWithoutBrackets = displayExemplars(lastChars);
537     // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>");
538     // for (String item : allLocales) {
539     // String cleanItem;
540     // if (locales.contains(item)) {
541     // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>";
542     // } else {
543     // cleanItem = "<td class='value'>\u00a0</td>";
544     // }
545     // out.println(cleanItem);
546     // }
547     // out.println("</tr>");
548     // }
549 
550     // private static final StringTransform MyTransform = new StringTransform() {
551     //
552     // public String transform(String source) {
553     // StringBuilder builder = new StringBuilder();
554     // int cp = 0;
555     // builder.append("<span title='");
556     // String prefix = "";
557     // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
558     // cp = UTF16.charAt(source, i);
559     // if (i == 0) {
560     // if (COMBINING.contains(cp)) {
561     // prefix = "\u25CC";
562     // }
563     // } else {
564     // builder.append(" + ");
565     // }
566     // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' ').append(UCharacter.getExtendedName(cp));
567     // }
568     // builder.append("'>").append(prefix).append(source).append("</span>");
569     // return builder.toString();
570     // }
571     //
572     // };
573 
574     // private static String displayExemplars(UnicodeSet lastChars) {
575     // String exemplarsWithoutBrackets = new PrettyPrinter()
576     // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT))
577     // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)
578     // .setStrength2(Collator.PRIMARY))
579     // .setCompressRanges(true)
580     // .setToQuote(ALL_CHARS)
581     // .setQuoter(MyTransform)
582     // .format(lastChars);
583     // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, exemplarsWithoutBrackets.length() - 1);
584     // return exemplarsWithoutBrackets;
585     // }
586 
587     // private static boolean isNextCharacter(String last, String value) {
588     // if (UTF16.hasMoreCodePointsThan(last, 1)) return false;
589     // if (UTF16.hasMoreCodePointsThan(value, 1)) return false;
590     // int lastChar = UTF16.charAt(last,0);
591     // int valueChar = UTF16.charAt(value,0);
592     // return lastChar + 1 == valueChar;
593     // }
594 
595     static UnicodeMap.Composer<Set<String>> setComposer = new UnicodeMap.Composer<Set<String>>() {
596         @Override
597         public Set<String> compose(int codepoint, String string, Set<String> a, Set<String> b) {
598             if (a == null) {
599                 return b;
600             } else if (b == null) {
601                 return a;
602             } else {
603                 TreeSet<String> result = new TreeSet<>(a);
604                 result.addAll(b);
605                 return result;
606             }
607         }
608     };
609 
610     static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<>();
611 
loadInformation(Factory cldrFactory)612     private static void loadInformation(Factory cldrFactory) {
613         Set<String> alllocales = cldrFactory.getAvailable();
614         String[] postFix = new String[] { "" };
615         // gather all information
616         // TODO tweek for value-laden attributes
617         for (String localeID : alllocales) {
618             System.out.println("Loading: " + localeID);
619             System.out.flush();
620 
621             CLDRFile cldrFile;
622             try {
623                 cldrFile = cldrFactory.make(localeID, localeID.equals("root"));
624             } catch (IllegalArgumentException e) {
625                 System.err.println("Couldn't open " + localeID);
626                 continue;
627             }
628             if (cldrFile.isNonInheriting()) continue;
629             for (String path : cldrFile) {
630                 if (pathMatcher != null && !pathMatcher.reset(path).matches()) {
631                     continue;
632                 }
633                 if (altProposedMatcher.reset(path).matches()) {
634                     continue;
635                 }
636                 if (path.indexOf("/alias") >= 0) continue;
637                 if (path.indexOf("/identity") >= 0) continue;
638                 if (path.indexOf("/references") >= 0) continue;
639                 PathHeader ph = fixPath(path, postFix);
640                 if (ph == null || ph.shouldHide()) {
641                     continue;
642                 }
643                 String fullPath = cldrFile.getFullXPath(path);
644                 String value = getValue(cldrFile, path, fullPath);
645                 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) {
646                     continue;
647                 }
648                 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0
649                     || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) {
650                     postFix[0] = "*";
651                 }
652                 if (path.equals("//ldml/characters/exemplarCharacters")) {
653                     UnicodeSet exemplars;
654                     try {
655                         exemplars = new UnicodeSet(value);
656                         String script = UScript.getName(getFirstScript(exemplars));
657                         LOCALE_TO_SCRIPT.put(localeID, script);
658                     } catch (Exception e) {
659 
660                     }
661                 }
662                 Map<String, Set<String>> value_locales = path_value_locales.get(ph);
663                 if (value_locales == null) {
664                     path_value_locales.put(ph, value_locales = new TreeMap<>(
665                         standardCollation));
666                 }
667                 Set<String> locales = value_locales.get(value);
668                 if (locales == null) {
669                     value_locales.put(value, locales = new TreeSet<>());
670                 }
671                 locales.add(localeID + postFix[0]);
672             }
673         }
674         Relation<String, String> sorted = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
675         for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) {
676             sorted.put(s.getValue(), s.getKey());
677         }
678         for (Entry<String, Set<String>> s : sorted.keyValuesSet()) {
679             System.out.println(s);
680         }
681     }
682 
683     static PathHeader.Factory pathHeaderFactory;
684 
685     /**
686      *
687      * @param path
688      * @param localePrefix
689      * @return
690      */
fixPath(String path, String[] localePrefix)691     private static PathHeader fixPath(String path, String[] localePrefix) {
692         if (localePrefix != null) {
693             localePrefix[0] = "";
694         }
695         return pathHeaderFactory.fromPath(path);
696     }
697 
removeAttributes(String xpath, Set<String> skipAttributes)698     private static String removeAttributes(String xpath, Set<String> skipAttributes) {
699         XPathParts parts = XPathParts.getFrozenInstance(xpath).cloneAsThawed(); // not frozen, for removeAttributes
700         removeAttributes(parts, skipAttributes);
701         return parts.toString();
702     }
703 
704     /**
705      *
706      * @param parts
707      * @param skipAttributes
708      */
removeAttributes(XPathParts parts, Set<String> skipAttributes)709     private static void removeAttributes(XPathParts parts, Set<String> skipAttributes) {
710         for (int i = 0; i < parts.size(); ++i) {
711             // String element = parts.getElement(i);
712             Map<String, String> attributes = parts.getAttributes(i);
713             for (Iterator<String> it = attributes.keySet().iterator(); it.hasNext();) {
714                 String attribute = it.next();
715                 if (skipAttributes.contains(attribute)) it.remove();
716             }
717         }
718     }
719 
720     static Set<String> skipSet = new HashSet<>(Arrays.asList("draft", "alt"));
721 
722     static Status status = new Status();
723 
724     /**
725      *
726      */
getValue(CLDRFile cldrFile, String path, String fullPath)727     private static String getValue(CLDRFile cldrFile, String path, String fullPath) {
728         String value = cldrFile.getStringValue(path);
729         if (value == null) {
730             System.out.println("Null value for " + path);
731             return value;
732         }
733         cldrFile.getSourceLocaleID(path, status);
734         if (!path.equals(status.pathWhereFound)) {
735             // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]";
736             value = null;
737             return value;
738         }
739         if (value.length() == 0) {
740             XPathParts parts = XPathParts.getFrozenInstance(fullPath).cloneAsThawed(); // not frozen, for removeAttributes
741             removeAttributes(parts, skipSet);
742             int limit = parts.size();
743             value = parts.toString(limit - 1, limit);
744             return value;
745         }
746         return value;
747     }
748 
getFileName2(PathHeader header, String suffix)749     private static String getFileName2(PathHeader header, String suffix) {
750         String result = (header.getSection() + "." + header.getPage())
751             .replace(" ", "_")
752             .replace("/", "_")
753             .replace("(", "_")
754             .replace(")", "_");
755         if (suffix != null) {
756             result += "." + suffix;
757         }
758         return result.toLowerCase(Locale.ENGLISH);
759     }
760 
761     static String[] headerAndFooter = new String[2];
762     private static Transliterator toHTML;
763 
764     /**
765      * @param tsvFile TODO
766      * @param path2
767      *
768      */
start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)769     private static PrintWriter start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)
770         throws IOException {
771         finish(out, tsvFile.value);
772         out = writeHeader(main, title, tsvFile);
773         out.println(headerString);
774         return out;
775     }
776 
getHeader(Set<PathHeader> set)777     public static String getHeader(Set<PathHeader> set) {
778         StringBuffer out = new StringBuffer("<table class='simple'><tr>");
779         String lastMain = "";
780         String lastSub = "";
781         for (PathHeader pathHeader : set) {
782             String mainName = pathHeader.getSection();
783             String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage());
784             if (!mainName.equals(lastMain)) {
785                 if (lastMain.length() != 0) {
786                     out.append("</tr>" + System.lineSeparator() + "<tr>");
787                 }
788                 out.append("<th align='right' nowrap style='vertical-align: top'><b>"
789                     + TransliteratorUtilities.toHTML.transform(mainName)
790                     + ":&nbsp;</b></th><td>");
791                 lastMain = mainName;
792                 lastSub = subName;
793             } else if (!subName.equals(lastSub)) {
794                 out.append(" | ");
795                 lastSub = subName;
796             } else {
797                 continue; // identical, skip
798             }
799             out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>");
800             if (pathHeader.getPageId() == PageId.Alphabetic_Information) {
801                 for (String[] pair : EXEMPLARS) {
802                     out.append(" | <a href='" + getFileName2(pathHeader, pair[1]) + ".html'>" + pair[2] + "</a>");
803                 }
804             }
805             continue;
806         }
807         return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString();
808     }
809 
writeHeader(String main, String title, Output<PrintWriter> tsvFile)810     private static PrintWriter writeHeader(String main, String title, Output<PrintWriter> tsvFile) throws IOException {
811         PrintWriter out;
812         out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html");
813         if (tsvFile.value == null) {
814             tsvFile.value = FileUtilities.openUTF8Writer(Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv");
815             tsvFile.value.println("# By-Type Data");
816             tsvFile.value.println("# Section\tPage\tHeader\tCode\tValue\tLocales");
817         }
818 
819         ShowData.getChartTemplate("By-Type Chart: " + title,
820             ToolConstants.CHART_DISPLAY_VERSION,
821             "",
822             headerAndFooter, null, false);
823         out.println(headerAndFooter[0]);
824         return out;
825     }
826 
827     /**
828      * @param tsvFile TODO
829      *
830      */
finish(PrintWriter out, PrintWriter tsvFile)831     private static void finish(PrintWriter out, PrintWriter tsvFile) {
832         if (out == null) return;
833         out.println("</table>");
834         out.println(headerAndFooter[1]);
835         out.close();
836     }
837 
finishAll(PrintWriter out, PrintWriter tsvFile)838     private static void finishAll(PrintWriter out, PrintWriter tsvFile) {
839         // TODO Auto-generated method stub
840         //tsvFile.println("# EOF");
841         tsvFile.close();
842     }
843 }
844