• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.tool;
10 
11 import java.io.File;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.util.Arrays;
15 import java.util.Comparator;
16 import java.util.Date;
17 import java.util.EnumSet;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.Iterator;
21 import java.util.Locale;
22 import java.util.Map;
23 import java.util.Map.Entry;
24 import java.util.Set;
25 import java.util.TreeMap;
26 import java.util.TreeSet;
27 import java.util.regex.Matcher;
28 
29 import org.unicode.cldr.draft.FileUtilities;
30 import org.unicode.cldr.tool.ShowData.DataShower;
31 import org.unicode.cldr.util.CLDRFile;
32 import org.unicode.cldr.util.CLDRFile.Status;
33 import org.unicode.cldr.util.CLDRPaths;
34 import org.unicode.cldr.util.CldrUtility;
35 import org.unicode.cldr.util.Factory;
36 import org.unicode.cldr.util.FileCopier;
37 import org.unicode.cldr.util.LanguageTagParser;
38 import org.unicode.cldr.util.LanguageTagParser.Fields;
39 import org.unicode.cldr.util.LocaleIDParser;
40 import org.unicode.cldr.util.PathHeader;
41 import org.unicode.cldr.util.PathHeader.PageId;
42 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
43 import org.unicode.cldr.util.PatternCache;
44 import org.unicode.cldr.util.SimpleFactory;
45 import org.unicode.cldr.util.StringId;
46 import org.unicode.cldr.util.TransliteratorUtilities;
47 import org.unicode.cldr.util.XPathParts;
48 import org.xml.sax.SAXException;
49 
50 import com.google.common.collect.ImmutableMap;
51 import com.ibm.icu.dev.tool.UOption;
52 import com.ibm.icu.dev.util.UnicodeMap;
53 import com.ibm.icu.impl.Relation;
54 import com.ibm.icu.impl.Utility;
55 import com.ibm.icu.lang.UCharacter;
56 import com.ibm.icu.lang.UScript;
57 import com.ibm.icu.text.BreakIterator;
58 import com.ibm.icu.text.Collator;
59 import com.ibm.icu.text.Normalizer;
60 import com.ibm.icu.text.RuleBasedCollator;
61 import com.ibm.icu.text.RuleBasedNumberFormat;
62 import com.ibm.icu.text.Transliterator;
63 import com.ibm.icu.text.UTF16;
64 import com.ibm.icu.text.UnicodeSet;
65 import com.ibm.icu.text.UnicodeSetIterator;
66 import com.ibm.icu.util.Output;
67 import com.ibm.icu.util.ULocale;
68 
69 /**
70  * This is a simple class that walks through the CLDR hierarchy.
71  * It gathers together all the items from all the locales that share the
72  * same element chain, and thus presents a "sideways" view of the data, in files called
73  * by_type/X.html, where X is a type. X may be the concatenation of more than more than
74  * one element, where the file would otherwise be too large.
75  *
76  * @author medavis
77  */
78 /*
79  * Notes:
80  * http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3
81  * http://developers.sun.com/dev/coolstuff/xml/readme.html
82  * http://lists.xml.org/archives/xml-dev/200007/msg00284.html
83  * http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html
84  */
85 public class GenerateSidewaysView {
86     private static final String DIR_NAME = "by_type";
87     // debug flags
88     static final boolean DEBUG = false;
89     static final boolean DEBUG2 = false;
90     static final boolean DEBUG_SHOW_ADD = false;
91     static final boolean DEBUG_ELEMENT = false;
92     static final boolean DEBUG_SHOW_BAT = false;
93 
94     static final boolean FIX_ZONE_ALIASES = true;
95 
96     private static final int HELP1 = 0,
97         HELP2 = 1,
98         SOURCEDIR = 2,
99         DESTDIR = 3,
100         MATCH = 4,
101         SKIP = 5,
102         TZADIR = 6,
103         NONVALIDATING = 7,
104         SHOW_DTD = 8,
105         TRANSLIT = 9,
106         PATH = 10;
107 
108     private static final UOption[] options = {
109         UOption.HELP_H(),
110         UOption.HELP_QUESTION_MARK(),
111         UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY),
112         UOption.DESTDIR().setDefault(CLDRPaths.CHART_DIRECTORY + DIR_NAME + "/"), // C:/cvsdata/unicode/cldr/diff/by_type/
113         UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
114         UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"),
115         UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault(
116             "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
117         UOption.create("nonvalidating", 'n', UOption.NO_ARG),
118         UOption.create("dtd", 'w', UOption.NO_ARG),
119         UOption.create("transliterate", 'y', UOption.NO_ARG),
120         UOption.create("path", 'p', UOption.REQUIRES_ARG),
121     };
122 
123     private static final Matcher altProposedMatcher = CLDRFile.ALT_PROPOSED_PATTERN.matcher("");
124     // private static final UnicodeSet ALL_CHARS = new UnicodeSet(0, 0x10FFFF);
125     protected static final UnicodeSet COMBINING = new UnicodeSet("[[:m:]]").freeze();
126 
getFirstScript(UnicodeSet exemplars)127     static int getFirstScript(UnicodeSet exemplars) {
128         for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next();) {
129             int script = UScript.getScript(it.codepoint);
130             if (script == UScript.COMMON || script == UScript.INHERITED) {
131                 continue;
132             }
133             return script;
134         }
135         return UScript.COMMON;
136     }
137 
138     static Comparator<Object> UCA;
139     static {
140         RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
141         UCA2.setNumericCollation(true);
142         UCA2.setStrength(Collator.IDENTICAL);
143         UCA = new org.unicode.cldr.util.MultiComparator(UCA2, new UTF16.StringComparator(true, false, 0));
144     }
145 
146     private static Map<PathHeader, Map<String, Set<String>>> path_value_locales = new TreeMap<>();
147     private static long startTime = System.currentTimeMillis();
148 
149     static RuleBasedCollator standardCollation = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH);
150     static {
151         standardCollation.setStrength(Collator.IDENTICAL);
152         standardCollation.setNumericCollation(true);
153     }
154 
155     private static CLDRFile english;
156     // private static DataShower dataShower = new DataShower();
157     private static Matcher pathMatcher;
158 
main(String[] args)159     public static void main(String[] args) throws SAXException, IOException {
160         startTime = System.currentTimeMillis();
161         ToolUtilities.registerExtraTransliterators();
162         UOption.parseArgs(args, options);
163 
164         pathMatcher = options[PATH].value == null ? null : PatternCache.get(options[PATH].value).matcher("");
165 
166         File[] paths = {
167             new File(CLDRPaths.MAIN_DIRECTORY),
168             new File(CLDRPaths.ANNOTATIONS_DIRECTORY),
169             new File(CLDRPaths.SUBDIVISIONS_DIRECTORY)
170         };
171         Factory cldrFactory = SimpleFactory.make(paths, options[MATCH].value);
172 
173         // Factory cldrFactory = Factory.make(options[SOURCEDIR].value, options[MATCH].value);
174         english = cldrFactory.make("en", true);
175         pathHeaderFactory = PathHeader.getFactory(english);
176 
177         FileCopier.ensureDirectoryExists(options[DESTDIR].value);
178         FileCopier.copy(GenerateSidewaysView.class, "bytype-index.css", options[DESTDIR].value, "index.css");
179         FormattedFileWriter.copyIncludeHtmls(options[DESTDIR].value);
180 
181         // now get the info
182 
183         loadInformation(cldrFactory);
184         String oldMain = "";
185         PrintWriter out = null;
186 
187         System.out.println("Getting types " + path_value_locales.size());
188         // Set<String> types = new TreeSet<String>();
189         // for (PathHeader path : path_value_locales.keySet()) {
190         // String main = getFileName2(path);
191         // if (!main.equals(oldMain)) {
192         // oldMain = main;
193         // types.add(main);
194         // }
195         // }
196         String headerString = getHeader(path_value_locales.keySet());
197         FileCopier.copyAndReplace(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
198             ImmutableMap.of(
199                 "%header%", headerString,
200                 "%version%", ToolConstants.CHART_DISPLAY_VERSION,
201                 "%index%", "../index.html",
202                 "%index-title%", "Main Charts Index",
203                 "%date%", CldrUtility.isoFormatDateOnly(new Date())));
204 //        FileUtilities.copyFile(GenerateSidewaysView.class, "bytype-index.html", options[DESTDIR].value, "index.html",
205 //            new String[] { "%header%", headerString });
206 
207         System.out.println("Printing files in " + new File(options[DESTDIR].value).getAbsolutePath());
208         // Transliterator toLatin = Transliterator.getInstance("any-latin");
209         toHTML = TransliteratorUtilities.toHTML;
210         // UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]");
211 
212         String oldHeader = "";
213         Output<PrintWriter> tsvFile = new Output<>();
214 
215         for (PathHeader path : path_value_locales.keySet()) {
216             String main = getFileName2(path, null);
217             if (!main.equals(oldMain)) {
218                 oldMain = main;
219                 out = start(out, main, headerString, path.getSection() + ":" + path.getPage(), tsvFile);
220                 out.println("<table class='table'>");
221                 oldHeader = "";
222             }
223             String key = path.getCode();
224             String anchor = toHTML.transliterate(key);
225 
226             String originalPath = path.getOriginalPath(); // prettyPath.getOriginal(path);
227             String englishValue = english.getStringValue(originalPath);
228             if (englishValue != null) {
229                 englishValue = "English: ‹" + englishValue + "›";
230             } else {
231                 englishValue = "";
232             }
233 
234             String header = path.getHeader();
235             if (!header.equals(oldHeader) && !header.equals("null")) {
236                 out.println("<tr><th colSpan='2' class='pathHeader'>" + CldrUtility.getDoubleLinkedText(header)
237                     + "</th></tr>");
238                 oldHeader = header;
239             }
240             String anchorId = Long.toHexString(StringId.getId(path.getOriginalPath()));
241             out.println("<tr>" +
242                 "<th class='path'>" + CldrUtility.getDoubleLinkedText(anchorId, anchor) + "</th>" +
243                 "<th class='path'>" + toHTML.transliterate(englishValue) + "</th>" +
244                 "</tr>");
245             Map<String, Set<String>> value_locales = path_value_locales.get(path);
246             for (String value : value_locales.keySet()) {
247                 // String outValue = toHTML.transliterate(value);
248                 // String transValue = value;
249                 // try {
250                 // transValue = toLatin.transliterate(value);
251                 // } catch (RuntimeException e) {
252                 // }
253                 // if (!transValue.equals(value)) {
254                 // outValue = "<span title='" + toHTML.transliterate(transValue) + "'>" + outValue + "</span>";
255                 // }
256                 String valueClass = " class='value'";
257                 if (DataShower.getBidiStyle(value).length() != 0) {
258                     valueClass = " class='rtl_value'";
259                 }
260                 out.println("<tr><th" + valueClass + ">" + DataShower.getPrettyValue(value) + "</th><td class='td'>");
261                 tsvFile.value.print(
262                     path.getSection()
263                         + "\t" + path.getPage()
264                         + "\t" + path.getHeader()
265                         + "\t" + path.getCode()
266                         + "\t" + value
267                         + "\t");
268 
269                 Set<String> locales = value_locales.get(value);
270                 boolean first = true;
271                 boolean containsRoot = locales.contains("root");
272                 for (String locale : locales) {
273                     if (first)
274                         first = false;
275                     else
276                         out.print(" ");
277                     if (locale.endsWith("*")) {
278                         locale = locale.substring(0, locale.length() - 1);
279                         out.print("<i>\u00B7" + locale + "\u00B7</i>");
280                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
281                     } else if (!containsRoot) {
282                         out.print("\u00B7" + locale + "\u00B7");
283                         tsvFile.value.print("\u00B7" + locale + "\u00B7");
284                     } else if (locale.contains("_")) {
285                         // not same as root, but need to test for parent
286                         // if the parent is not in the same list, then we include anyway.
287                         // Cf http://unicode.org/cldr/trac/ticket/7228
288                         String parent = LocaleIDParser.getParent(locale);
289                         if (!locales.contains(parent)) {
290                             out.print("<b>\u00B7" + locale + "\u00B7</b>");
291                             tsvFile.value.print("\u00B7" + locale + "\u00B7");
292                         }
293                     }
294                 }
295                 if (containsRoot) {
296                     out.print("<b>\u00B7all\u00B7others\u00B7</b>");
297                     tsvFile.value.print("\u00B7all-others\u00B7");
298                 }
299                 out.println("</td></tr>");
300                 tsvFile.value.println();
301             }
302         }
303         for (String[] pair : EXEMPLARS) {
304             showExemplars(out, headerString, pair[0], pair[1], pair[2], tsvFile);
305         }
306         finish(out, tsvFile.value);
307         finishAll(out, tsvFile.value);
308         System.out.println("Done in " + new RuleBasedNumberFormat(new ULocale("en"), RuleBasedNumberFormat.DURATION)
309             .format((System.currentTimeMillis() - startTime) / 1000.0));
310     }
311 
312     static final String[][] EXEMPLARS = {
313         { "//ldml/characters/exemplarCharacters", "main", "Main Exemplars" },
314         { "//ldml/characters/exemplarCharacters[@type=\"punctuation\"]", "punctuation", "Punctuation Exemplars" },
315         { "//ldml/characters/exemplarCharacters[@type=\"index\"]", "index", "Index Exemplars" },
316         // TODO look at numbers, auxiliary
317     };
318 
showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title, Output<PrintWriter> tsvFile)319     private static PrintWriter showExemplars(PrintWriter out, String headerString, String pathName, String variant, String title,
320         Output<PrintWriter> tsvFile)
321         throws IOException {
322         PathHeader cleanPath = fixPath(pathName, null);
323         String filename = getFileName2(cleanPath, variant);
324         out = start(out, filename, headerString, title, tsvFile);
325         Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath);
326 
327         // TODO change logic so that aux characters characters work well.
328 
329         Map<String, UnicodeMap<Set<String>>> script_UnicodeMap = new TreeMap<>();
330         // UnicodeMap mapping = new UnicodeMap();
331         UnicodeSet stuffToSkip = new UnicodeSet("[:Han:]");
332 
333         // get the locale information
334         UnicodeSet totalExemplars = new UnicodeSet();
335         for (String value : value_locales.keySet()) {
336             // flatten out UnicodeSet
337             UnicodeSet exemplars = new UnicodeSet(value);
338             if (variant.equals("main")) {
339                 UnicodeSet extras = new UnicodeSet();
340                 for (String item : exemplars) {
341                     extras.addAll(Normalizer.normalize(item, Normalizer.NFD));
342                 }
343                 exemplars.addAll(extras);
344             }
345             totalExemplars.addAll(exemplars);
346             exemplars.removeAll(stuffToSkip);
347 
348             Set<String> locales = value_locales.get(value);
349             //String script = UScript.getName(getFirstScript(exemplars));
350             for (String locale : locales) {
351                 checkTr(script_UnicodeMap);
352                 String key = locale.endsWith("*") ? locale.substring(0, locale.length() - 1) : locale;
353                 String script = LOCALE_TO_SCRIPT.get(key);
354                 // try a few variants until we get the script
355                 if (script == null && key.contains("_")) {
356                     String simpleParent = LanguageTagParser.getSimpleParent(key);
357                     script = LOCALE_TO_SCRIPT.get(simpleParent);
358                     if (script == null && simpleParent.contains("_")) {
359                         simpleParent = LanguageTagParser.getSimpleParent(simpleParent);
360                         script = LOCALE_TO_SCRIPT.get(simpleParent);
361                     }
362                 }
363                 if (script == null) {
364                     script = UScript.getName(UScript.UNKNOWN);
365                 }
366                 Set<String> temp = new HashSet<>();
367                 temp.add(locale);
368                 checkTr(script_UnicodeMap);
369                 UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
370                 if (mapping == null) {
371                     script_UnicodeMap.put(script, mapping = new UnicodeMap<>());
372                 }
373                 checkTr(script_UnicodeMap);
374                 mapping.composeWith(exemplars, temp, setComposer);
375                 checkTr(script_UnicodeMap);
376             }
377         }
378         System.out.println("@@@TOTAL:\t" + variant + "\t" + totalExemplars.toPattern(false));
379         for (String script : script_UnicodeMap.keySet()) {
380             UnicodeMap<Set<String>> mapping = script_UnicodeMap.get(script);
381             writeCharToLocaleMapping(out, script, mapping);
382         }
383         return out;
384     }
385 
checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap)386     private static void checkTr(Map<String, UnicodeMap<Set<String>>> script_UnicodeMap) {
387         UnicodeMap<Set<String>> unicodeMap = script_UnicodeMap.get("Cyrillic");
388         if (unicodeMap == null) {
389             return;
390         }
391         Set<String> foo = unicodeMap.get(0x21);
392         if (foo == null) {
393             return;
394         }
395         if (foo.contains("tr")) {
396             System.out.println("huh?");
397         }
398     }
399 
writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping)400     private static void writeCharToLocaleMapping(PrintWriter out, String script, UnicodeMap<Set<String>> mapping) {
401         BreakIterator charBreaks = BreakIterator.getCharacterInstance(ULocale.ROOT); // TODO, make default language for
402         // script
403         System.out.println("@@Exemplars for\t" + script + "\t" + mapping.keySet());
404         if (script.equals("Hangul")) { //  || script.equals("Common")
405             return; // skip these
406         }
407         // find out all the locales and all the characters
408         Set<String> allLocales = new TreeSet<>(UCA);
409         Set<String> allChars = new TreeSet<>(UCA);
410         Set<String> allStrings = new TreeSet<>(UCA);
411         for (Set<String> locales : mapping.getAvailableValues()) {
412             allLocales.addAll(locales);
413             UnicodeSet unicodeSet = mapping.keySet(locales);
414             for (String item : unicodeSet) {
415                 charBreaks.setText(item);
416                 int endFirst = charBreaks.next();
417                 if (endFirst == item.length()) {
418                     allChars.add(item);
419                 } else {
420                     allStrings.add(item);
421                 }
422             }
423         }
424         // get the columns, and show them
425         out.println("<table class='table' style='width:1%'>");
426         out.println("<caption>" + script + "</caption>");
427         exemplarHeader(out, allChars);
428 
429         for (String locale : allLocales) {
430             String headerHeader = "<th class='head'>" + cleanLocale(locale, false) + "</th><td class='head nowrap left'>"
431                 + cleanLocale(locale, true) + "</td>";
432             out.println("<tr>");
433             out.println(headerHeader);
434 
435             for (String item : allChars) {
436                 // String exemplarsWithoutBrackets = displayExemplars(item);
437                 if (mapping.get(item).contains(locale)) {
438                     out.println("<td class='cell'" +
439                         ">" + displayCharacter(item) + "</td>");
440                 } else {
441                     out.println("<td class='empty'>\u00a0</td>");
442                 }
443             }
444             // now strings, if any
445             StringBuilder strings = new StringBuilder();
446             int lastLineStart = 0;
447             for (String item : allStrings) {
448                 // String exemplarsWithoutBrackets = displayExemplars(item);
449                 if (mapping.get(item).contains(locale)) {
450                     int str_len = strings.length();
451                     if (str_len != 0) {
452                         if (str_len - lastLineStart > 20) {
453                             strings.append(System.lineSeparator());
454                             lastLineStart = str_len;
455                         } else {
456                             strings.append(' ');
457                         }
458                     }
459                     strings.append(displayCharacter(item));
460                 }
461             }
462             if (strings.length() == 0) {
463                 out.println("<td class='empty'>\u00a0</td>");
464             } else {
465                 out.println("<td class='cell nowrap'>" + displayCharacter(strings.toString()).replace(System.lineSeparator(), "<br>")
466                     + "</td>");
467             }
468 
469             out.println(headerHeader);
470             out.println("</tr>");
471         }
472         exemplarHeader(out, allChars);
473         out.println("</table>");
474         out.flush();
475     }
476 
characterTitle(String item)477     private static String characterTitle(String item) {
478         return ("title='U+" +
479             toHTML.transform(
480                 Utility.hex(item, 4, ", U+", true, new StringBuilder())
481                     + " " + UCharacter.getName(item, ", "))
482             + "'");
483     }
484 
exemplarHeader(PrintWriter out, Set<String> allChars)485     private static void exemplarHeader(PrintWriter out, Set<String> allChars) {
486         out.println("<tr>");
487         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
488         for (String item : allChars) {
489             out.println("<th class='head' " + characterTitle(item) + ">" + displayCharacter(item) + "</th>");
490         }
491         out.println("<th class='head'>Clusters</th>");
492         out.println("<th class='head nowrap' colSpan='2'>Locale \\\u00a0Chars</th>");
493         out.println("</tr>");
494     }
495 
496     static final UnicodeSet NONSPACING = new UnicodeSet("[[:Mn:][:Me:][:default_ignorable_code_point:]]").freeze();
497 
displayCharacter(String item)498     public static String displayCharacter(String item) {
499         if (item.length() == 0) return "<i>none</i>";
500         int ch = item.codePointAt(0);
501         if (NONSPACING.contains(ch)) {
502             item = "\u00a0" + item + "\u00a0";
503         }
504         String result = toHTML.transform(item);
505         return result;
506     }
507 
508     static LanguageTagParser cleanLocaleParser = new LanguageTagParser();
509     static Set<Fields> allButScripts = EnumSet.allOf(Fields.class);
510     static {
511         allButScripts.remove(Fields.SCRIPT);
512     }
513 
cleanLocale(String item, boolean name)514     private static String cleanLocale(String item, boolean name) {
515         if (item == null) {
516             return "<i>null</i>";
517         }
518         boolean draft = item.endsWith("*");
519         if (draft) {
520             item = item.substring(0, item.length() - 1);
521         }
522         cleanLocaleParser.set(item);
523         item = cleanLocaleParser.toString(allButScripts);
524         String core = item;
525         item = toHTML.transform(item);
526         if (name) {
527             item = english.getName(core);
528             item = item == null ? "<i>null</i>" : toHTML.transform(item);
529         }
530         if (draft) {
531             item = "<i>" + item + "</i>";
532         }
533         return item;
534     }
535 
536     // private static void showExemplarRow(PrintWriter out, Set<String> allLocales, UnicodeSet lastChars, Set locales) {
537     // String exemplarsWithoutBrackets = displayExemplars(lastChars);
538     // out.println("<tr><th class='head'>" + exemplarsWithoutBrackets + "</th>");
539     // for (String item : allLocales) {
540     // String cleanItem;
541     // if (locales.contains(item)) {
542     // cleanItem = "<th class='value'>" + cleanLocale(item, false) + "</th>";
543     // } else {
544     // cleanItem = "<td class='value'>\u00a0</td>";
545     // }
546     // out.println(cleanItem);
547     // }
548     // out.println("</tr>");
549     // }
550 
551     // private static final StringTransform MyTransform = new StringTransform() {
552     //
553     // public String transform(String source) {
554     // StringBuilder builder = new StringBuilder();
555     // int cp = 0;
556     // builder.append("<span title='");
557     // String prefix = "";
558     // for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
559     // cp = UTF16.charAt(source, i);
560     // if (i == 0) {
561     // if (COMBINING.contains(cp)) {
562     // prefix = "\u25CC";
563     // }
564     // } else {
565     // builder.append(" + ");
566     // }
567     // builder.append("U+").append(com.ibm.icu.impl.Utility.hex(cp,4)).append(' ').append(UCharacter.getExtendedName(cp));
568     // }
569     // builder.append("'>").append(prefix).append(source).append("</span>");
570     // return builder.toString();
571     // }
572     //
573     // };
574 
575     // private static String displayExemplars(UnicodeSet lastChars) {
576     // String exemplarsWithoutBrackets = new PrettyPrinter()
577     // .setOrdering(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT))
578     // .setSpaceComparator(UCA != null ? UCA : Collator.getInstance(ULocale.ROOT)
579     // .setStrength2(Collator.PRIMARY))
580     // .setCompressRanges(true)
581     // .setToQuote(ALL_CHARS)
582     // .setQuoter(MyTransform)
583     // .format(lastChars);
584     // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, exemplarsWithoutBrackets.length() - 1);
585     // return exemplarsWithoutBrackets;
586     // }
587 
588     // private static boolean isNextCharacter(String last, String value) {
589     // if (UTF16.hasMoreCodePointsThan(last, 1)) return false;
590     // if (UTF16.hasMoreCodePointsThan(value, 1)) return false;
591     // int lastChar = UTF16.charAt(last,0);
592     // int valueChar = UTF16.charAt(value,0);
593     // return lastChar + 1 == valueChar;
594     // }
595 
596     static UnicodeMap.Composer<Set<String>> setComposer = new UnicodeMap.Composer<Set<String>>() {
597         @Override
598         public Set<String> compose(int codepoint, String string, Set<String> a, Set<String> b) {
599             if (a == null) {
600                 return b;
601             } else if (b == null) {
602                 return a;
603             } else {
604                 TreeSet<String> result = new TreeSet<>(a);
605                 result.addAll(b);
606                 return result;
607             }
608         }
609     };
610 
611     static Map<String, String> LOCALE_TO_SCRIPT = new HashMap<>();
612 
loadInformation(Factory cldrFactory)613     private static void loadInformation(Factory cldrFactory) {
614         Set<String> alllocales = cldrFactory.getAvailable();
615         String[] postFix = new String[] { "" };
616         // gather all information
617         // TODO tweek for value-laden attributes
618         for (String localeID : alllocales) {
619             System.out.println("Loading: " + localeID);
620             System.out.flush();
621 
622             CLDRFile cldrFile;
623             try {
624                 cldrFile = cldrFactory.make(localeID, localeID.equals("root"));
625             } catch (IllegalArgumentException e) {
626                 System.err.println("Couldn't open " + localeID);
627                 continue;
628             }
629             if (cldrFile.isNonInheriting()) continue;
630             for (String path : cldrFile) {
631                 if (pathMatcher != null && !pathMatcher.reset(path).matches()) {
632                     continue;
633                 }
634                 if (altProposedMatcher.reset(path).matches()) {
635                     continue;
636                 }
637                 if (path.indexOf("/alias") >= 0) continue;
638                 if (path.indexOf("/identity") >= 0) continue;
639                 if (path.indexOf("/references") >= 0) continue;
640                 PathHeader cleanPath = fixPath(path, postFix);
641                 final SurveyToolStatus surveyToolStatus = cleanPath.getSurveyToolStatus();
642                 if (surveyToolStatus == SurveyToolStatus.DEPRECATED || surveyToolStatus == SurveyToolStatus.HIDE) {
643                     // System.out.println("Skipping " + path);
644                     continue;
645                 }
646                 String fullPath = cldrFile.getFullXPath(path);
647                 String value = getValue(cldrFile, path, fullPath);
648                 if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) {
649                     continue;
650                 }
651                 if (fullPath.indexOf("[@draft=\"unconfirmed\"]") >= 0
652                     || fullPath.indexOf("[@draft=\"provisional\"]") >= 0) {
653                     postFix[0] = "*";
654                 }
655                 if (path.equals("//ldml/characters/exemplarCharacters")) {
656                     UnicodeSet exemplars;
657                     try {
658                         exemplars = new UnicodeSet(value);
659                         String script = UScript.getName(getFirstScript(exemplars));
660                         LOCALE_TO_SCRIPT.put(localeID, script);
661                     } catch (Exception e) {
662                         int debug = 0;
663                     }
664                 }
665                 Map<String, Set<String>> value_locales = path_value_locales.get(cleanPath);
666                 if (value_locales == null) {
667                     path_value_locales.put(cleanPath, value_locales = new TreeMap<>(
668                         standardCollation));
669                 }
670                 Set<String> locales = value_locales.get(value);
671                 if (locales == null) {
672                     value_locales.put(value, locales = new TreeSet<>());
673                 }
674                 locales.add(localeID + postFix[0]);
675             }
676         }
677         Relation<String, String> sorted = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
678         for (Entry<String, String> s : LOCALE_TO_SCRIPT.entrySet()) {
679             sorted.put(s.getValue(), s.getKey());
680         }
681         for (Entry<String, Set<String>> s : sorted.keyValuesSet()) {
682             System.out.println(s);
683         }
684     }
685 
686     static PathHeader.Factory pathHeaderFactory;
687 
688     /**
689      *
690      * @param path
691      * @param localePrefix
692      * @return
693      */
fixPath(String path, String[] localePrefix)694     private static PathHeader fixPath(String path, String[] localePrefix) {
695         if (localePrefix != null) {
696             localePrefix[0] = "";
697         }
698         return pathHeaderFactory.fromPath(path);
699     }
700 
removeAttributes(String xpath, Set<String> skipAttributes)701     private static String removeAttributes(String xpath, Set<String> skipAttributes) {
702         XPathParts parts = XPathParts.getFrozenInstance(xpath).cloneAsThawed(); // not frozen, for removeAttributes
703         removeAttributes(parts, skipAttributes);
704         return parts.toString();
705     }
706 
707     /**
708      *
709      * @param parts
710      * @param skipAttributes
711      */
removeAttributes(XPathParts parts, Set<String> skipAttributes)712     private static void removeAttributes(XPathParts parts, Set<String> skipAttributes) {
713         for (int i = 0; i < parts.size(); ++i) {
714             // String element = parts.getElement(i);
715             Map<String, String> attributes = parts.getAttributes(i);
716             for (Iterator<String> it = attributes.keySet().iterator(); it.hasNext();) {
717                 String attribute = it.next();
718                 if (skipAttributes.contains(attribute)) it.remove();
719             }
720         }
721     }
722 
723     static Set<String> skipSet = new HashSet<>(Arrays.asList("draft", "alt"));
724 
725     static Status status = new Status();
726 
727     /**
728      *
729      */
getValue(CLDRFile cldrFile, String path, String fullPath)730     private static String getValue(CLDRFile cldrFile, String path, String fullPath) {
731         String value = cldrFile.getStringValue(path);
732         if (value == null) {
733             System.out.println("Null value for " + path);
734             return value;
735         }
736         cldrFile.getSourceLocaleID(path, status);
737         if (!path.equals(status.pathWhereFound)) {
738             // value = "[" + prettyPath.getPrettyPath(status.pathWhereFound, false) + "]";
739             value = null;
740             return value;
741         }
742         if (value.length() == 0) {
743             XPathParts parts = XPathParts.getFrozenInstance(fullPath).cloneAsThawed(); // not frozen, for removeAttributes
744             removeAttributes(parts, skipSet);
745             int limit = parts.size();
746             value = parts.toString(limit - 1, limit);
747             return value;
748         }
749         return value;
750     }
751 
getFileName2(PathHeader header, String suffix)752     private static String getFileName2(PathHeader header, String suffix) {
753         String result = (header.getSection() + "." + header.getPage())
754             .replace(" ", "_")
755             .replace("/", "_")
756             .replace("(", "_")
757             .replace(")", "_");
758         if (suffix != null) {
759             result += "." + suffix;
760         }
761         return result.toLowerCase(Locale.ENGLISH);
762     }
763 
764     static String[] headerAndFooter = new String[2];
765     private static Transliterator toHTML;
766 
767     /**
768      * @param tsvFile TODO
769      * @param path2
770      *
771      */
start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)772     private static PrintWriter start(PrintWriter out, String main, String headerString, String title, Output<PrintWriter> tsvFile)
773         throws IOException {
774         finish(out, tsvFile.value);
775         out = writeHeader(main, title, tsvFile);
776         out.println(headerString);
777         return out;
778     }
779 
getHeader(Set<PathHeader> set)780     public static String getHeader(Set<PathHeader> set) {
781         StringBuffer out = new StringBuffer("<table class='simple'><tr>");
782         String lastMain = "";
783         String lastSub = "";
784         for (PathHeader pathHeader : set) {
785             String mainName = pathHeader.getSection();
786             String subName = TransliteratorUtilities.toHTML.transform(pathHeader.getPage());
787             if (!mainName.equals(lastMain)) {
788                 if (lastMain.length() != 0) {
789                     out.append("</tr>" + System.lineSeparator() + "<tr>");
790                 }
791                 out.append("<th align='right' nowrap style='vertical-align: top'><b>"
792                     + TransliteratorUtilities.toHTML.transform(mainName)
793                     + ":&nbsp;</b></th><td>");
794                 lastMain = mainName;
795                 lastSub = subName;
796             } else if (!subName.equals(lastSub)) {
797                 out.append(" | ");
798                 lastSub = subName;
799             } else {
800                 continue; // identical, skip
801             }
802             out.append("<a href='" + getFileName2(pathHeader, null) + ".html'>" + subName + "</a>");
803             if (pathHeader.getPageId() == PageId.Alphabetic_Information) {
804                 for (String[] pair : EXEMPLARS) {
805                     out.append(" | <a href='" + getFileName2(pathHeader, pair[1]) + ".html'>" + pair[2] + "</a>");
806                 }
807             }
808             continue;
809         }
810         return out.append("</td></tr>" + System.lineSeparator() + "</table>").toString();
811     }
812 
writeHeader(String main, String title, Output<PrintWriter> tsvFile)813     private static PrintWriter writeHeader(String main, String title, Output<PrintWriter> tsvFile) throws IOException {
814         PrintWriter out;
815         out = FileUtilities.openUTF8Writer(options[DESTDIR].value, main + ".html");
816         if (tsvFile.value == null) {
817             tsvFile.value = FileUtilities.openUTF8Writer(Chart.getTsvDir(options[DESTDIR].value, DIR_NAME), DIR_NAME + ".tsv");
818             tsvFile.value.println("# By-Type Data");
819             tsvFile.value.println("# Section\tPage\tHeader\tCode\tValue\tLocales");
820         }
821 
822         ShowData.getChartTemplate("By-Type Chart: " + title,
823             ToolConstants.CHART_DISPLAY_VERSION,
824             "",
825             headerAndFooter, null, false);
826         out.println(headerAndFooter[0]);
827         return out;
828     }
829 
830     /**
831      * @param tsvFile TODO
832      *
833      */
finish(PrintWriter out, PrintWriter tsvFile)834     private static void finish(PrintWriter out, PrintWriter tsvFile) {
835         if (out == null) return;
836         out.println("</table>");
837         out.println(headerAndFooter[1]);
838         out.close();
839     }
840 
finishAll(PrintWriter out, PrintWriter tsvFile)841     private static void finishAll(PrintWriter out, PrintWriter tsvFile) {
842         // TODO Auto-generated method stub
843         //tsvFile.println("# EOF");
844         tsvFile.close();
845     }
846 }
847