• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004-2011, International Business Machines Corporation and   *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.tool;
8 
9 import java.io.IOException;
10 import java.io.PrintWriter;
11 import java.io.StringWriter;
12 import java.io.UnsupportedEncodingException;
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.Collection;
16 import java.util.Collections;
17 import java.util.Comparator;
18 import java.util.Date;
19 import java.util.EnumSet;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.Iterator;
23 import java.util.LinkedHashSet;
24 import java.util.List;
25 import java.util.Locale;
26 import java.util.Map;
27 import java.util.Map.Entry;
28 import java.util.Set;
29 import java.util.SortedMap;
30 import java.util.TreeMap;
31 import java.util.TreeSet;
32 
33 import org.unicode.cldr.draft.FileUtilities;
34 import org.unicode.cldr.draft.ScriptMetadata;
35 import org.unicode.cldr.draft.ScriptMetadata.Info;
36 import org.unicode.cldr.util.ArrayComparator;
37 import org.unicode.cldr.util.CLDRConfig;
38 import org.unicode.cldr.util.CLDRFile;
39 import org.unicode.cldr.util.CLDRFile.WinningChoice;
40 import org.unicode.cldr.util.CLDRPaths;
41 import org.unicode.cldr.util.CLDRTool;
42 import org.unicode.cldr.util.CLDRURLS;
43 import org.unicode.cldr.util.CldrUtility;
44 import org.unicode.cldr.util.Factory;
45 import org.unicode.cldr.util.FileCopier;
46 import org.unicode.cldr.util.Iso639Data;
47 import org.unicode.cldr.util.Iso639Data.Scope;
48 import org.unicode.cldr.util.Iso639Data.Type;
49 import org.unicode.cldr.util.LanguageTagParser;
50 import org.unicode.cldr.util.Level;
51 import org.unicode.cldr.util.LocaleIDParser;
52 import org.unicode.cldr.util.Log;
53 import org.unicode.cldr.util.Organization;
54 import org.unicode.cldr.util.StandardCodes;
55 import org.unicode.cldr.util.StandardCodes.CodeType;
56 import org.unicode.cldr.util.SupplementalDataInfo;
57 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
58 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle;
59 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
60 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo;
61 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
62 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
63 import org.unicode.cldr.util.TransliteratorUtilities;
64 import org.unicode.cldr.util.XPathParts;
65 
66 import com.google.common.base.Joiner;
67 import com.google.common.collect.ImmutableMap;
68 import com.google.common.collect.Multimap;
69 import com.google.common.collect.Multimaps;
70 import com.google.common.collect.TreeMultimap;
71 import com.ibm.icu.impl.Relation;
72 import com.ibm.icu.impl.Row.R2;
73 import com.ibm.icu.impl.Row.R4;
74 import com.ibm.icu.lang.UCharacter;
75 import com.ibm.icu.text.Collator;
76 import com.ibm.icu.text.Normalizer;
77 import com.ibm.icu.text.Normalizer2;
78 import com.ibm.icu.text.NumberFormat;
79 import com.ibm.icu.text.UTF16;
80 import com.ibm.icu.text.UnicodeSet;
81 import com.ibm.icu.util.ICUUncheckedIOException;
82 import com.ibm.icu.util.ULocale;
83 
84 @CLDRTool(alias = "showlanguages", description = "Generate Language info charts")
85 public class ShowLanguages {
86     private static final boolean SHOW_NATIVE = true;
87 
88     static Comparator col = new org.unicode.cldr.util.MultiComparator(
89         Collator.getInstance(new ULocale("en")),
90         new UTF16.StringComparator(true, false, 0));
91 
92     static StandardCodes sc = StandardCodes.make();
93 
94     static Factory cldrFactory = CLDRConfig.getInstance().getCldrFactory();//.make(CLDRPaths.MAIN_DIRECTORY, ".*");
95     static CLDRFile english = CLDRConfig.getInstance().getEnglish();
96 
main(String[] args)97     public static void main(String[] args) throws IOException {
98         System.out.println("Writing into " + FormattedFileWriter.CHART_TARGET_DIR);
99         FileCopier.ensureDirectoryExists(FormattedFileWriter.CHART_TARGET_DIR);
100         FileCopier.copy(ShowLanguages.class, "index.css", FormattedFileWriter.CHART_TARGET_DIR);
101         FormattedFileWriter.copyIncludeHtmls(FormattedFileWriter.CHART_TARGET_DIR);
102 
103         StringWriter sw = printLanguageData(cldrFactory, "index.html");
104         writeSupplementalIndex("index.html", sw);
105 
106         // cldrFactory = Factory.make(Utility.COMMON_DIRECTORY + "../dropbox/extra2/", ".*");
107         // printLanguageData(cldrFactory, "language_info2.txt");
108         System.out.println("Done - wrote into " + FormattedFileWriter.CHART_TARGET_DIR);
109     }
110 
111     /**
112      *
113      */
114     public static FormattedFileWriter.Anchors SUPPLEMENTAL_INDEX_ANCHORS = new FormattedFileWriter.Anchors();
115 
116     static SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo
117         .getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
118 
printLanguageData(Factory cldrFactory, String filename)119     private static StringWriter printLanguageData(Factory cldrFactory, String filename) throws IOException {
120         StringWriter sw = new StringWriter();
121         PrintWriter pw = new PrintWriter(sw);
122 
123         new ChartDtdDelta().writeChart(SUPPLEMENTAL_INDEX_ANCHORS);
124         ShowLocaleCoverage.showCoverage(SUPPLEMENTAL_INDEX_ANCHORS, null);
125 
126         new ChartDayPeriods().writeChart(SUPPLEMENTAL_INDEX_ANCHORS);
127         new ChartLanguageMatching().writeChart(SUPPLEMENTAL_INDEX_ANCHORS);
128         new ChartLanguageGroups().writeChart(SUPPLEMENTAL_INDEX_ANCHORS);
129         new ChartSubdivisions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS);
130         if (ToolConstants.CHART_VERSION.compareTo("37") >= 0) {
131             new ChartUnitConversions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS);
132             new ChartUnitPreferences().writeChart(SUPPLEMENTAL_INDEX_ANCHORS);
133         }
134         if (ToolConstants.CHART_VERSION.compareTo("37") >= 0) {
135             new ChartGrammaticalForms().writeChart(SUPPLEMENTAL_INDEX_ANCHORS);
136         }
137         // since we don't want these listed on the supplemental page, use null
138 
139         new ShowPlurals().printPlurals(english, null, pw, cldrFactory);
140 
141         LanguageInfo linfo = new LanguageInfo(cldrFactory);
142 
143         linfo.showCoverageGoals(pw);
144 
145         linfo.printLikelySubtags(pw);
146 
147         linfo.showCountryLanguageInfo(pw);
148 
149         linfo.showLanguageCountryInfo(pw);
150 
151 //      linfo.showTerritoryInfo();
152 //      linfo.printCountryData(pw);
153 
154         // linfo.printDeprecatedItems(pw);
155 
156         // PrintWriter pw1 = new PrintWriter(new FormattedFileWriter(pw, "Languages and Territories", null));
157         // pw1.println("<tr><th>Language \u2192 Territories");
158         // pw1.println("</th><th>Territory \u2192 Language");
159         // pw1.println("</th><th>Territories Not Represented");
160         // pw1.println("</th><th>Languages Not Represented");
161         // pw1.println("</th></tr>");
162         //
163         // pw1.println("<tr><td>");
164         // linfo.print(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME);
165         // pw1.println("</td><td>");
166         // linfo.print(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.LANGUAGE_NAME);
167         // pw1.println("</td><td>");
168         // linfo.printMissing(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.TERRITORY_NAME);
169         // pw1.println("</td><td>");
170         // linfo.printMissing(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME);
171         // pw1.println("</td></tr>");
172         //
173         // pw1.close();
174 
175         printLanguageScript(linfo, pw);
176         printScriptLanguageTerritory(linfo, pw);
177 
178         linfo.showCorrespondances();
179 
180         // linfo.showCalendarData(pw);
181 
182         linfo.showCountryInfo(pw);
183         linfo.printCurrency(pw);
184         linfo.printContains(pw);
185 
186         linfo.printWindows_Tzid(pw);
187         linfo.printAliases(pw);
188 
189         linfo.printCharacters(pw);
190 
191         pw.close();
192 
193         return sw;
194     }
195 
writeSupplementalIndex(String filename, StringWriter sw)196     private static void writeSupplementalIndex(String filename, StringWriter sw) throws IOException {
197         String[] replacements = {
198             "%date%", CldrUtility.isoFormatDateOnly(new Date()),
199             "%contents%", SUPPLEMENTAL_INDEX_ANCHORS.toString(),
200             "%data%", sw.toString(),
201             "%index%", "../index.html" };
202         PrintWriter pw2 = org.unicode.cldr.draft.FileUtilities.openUTF8Writer(FormattedFileWriter.CHART_TARGET_DIR, filename);
203         FileUtilities.appendFile(ShowLanguages.class, "supplemental.html", replacements, pw2);
204         pw2.close();
205     }
206 
printLanguageScript(LanguageInfo linfo, PrintWriter pw)207     private static void printLanguageScript(LanguageInfo linfo, PrintWriter pw) throws IOException {
208         PrintWriter pw1;
209         TablePrinter tablePrinter = new TablePrinter()
210             .addColumn("Language", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0)
211             .setBreakSpans(true)
212             .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
213             .setSpanRows(true)
214             .addColumn("ML", "class='target' title='modern language'", null, "class='target'", true).setSpanRows(true)
215             .setSortPriority(1)
216             .addColumn("P", "class='target' title='primary'", null, "class='target'", true).setSortPriority(3)
217             .addColumn("Script", "class='target'", null, "class='target'", true).setSortPriority(3)
218             .addColumn("Code", "class='target'", null, "class='target'", true)
219             .addColumn("MS", "class='target' title='modern script'", null, "class='target'", true).setSortPriority(2);
220 
221         TablePrinter tablePrinter2 = new TablePrinter()
222             .addColumn("Script", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0)
223             .setBreakSpans(true)
224             .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
225             .setSpanRows(true)
226             .addColumn("MS", "class='target' title='modern script'", null, "class='target'", true).setSpanRows(true)
227             .setSortPriority(1)
228             .addColumn("Language", "class='target'", null, "class='target'", true).setSortPriority(3)
229             .addColumn("Code", "class='target'", null, "class='target'", true)
230             .addColumn("ML", "class='target' title='modern language'", null, "class='target'", true).setSortPriority(2)
231             .addColumn("P", "class='target' title='primary'", null, "class='target'", true).setSortPriority(3);
232 
233         // get the codes so we can show the remainder
234         Set<String> remainingScripts = new TreeSet<>(getScriptsToShow()); // StandardCodes.MODERN_SCRIPTS);
235         UnicodeSet temp = new UnicodeSet();
236         for (String script : getScriptsToShow()) {
237             temp.clear();
238             try {
239                 temp.applyPropertyAlias("script", script);
240             } catch (RuntimeException e) {
241             } // fall through
242             if (temp.size() == 0) {
243                 remainingScripts.remove(script);
244                 System.out.println("Removing: " + script);
245             } else {
246                 System.out.println("Keeping: " + script);
247             }
248         }
249         remainingScripts.remove("Brai");
250         remainingScripts.remove("Hira");
251         remainingScripts.remove("Qaai");
252         remainingScripts.remove("Hrkt");
253         remainingScripts.remove("Zzzz");
254         remainingScripts.remove("Zyyy");
255 
256         Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow());
257         for (String language : getLanguagesToShow()) {
258             Scope s = Iso639Data.getScope(language);
259             Type t = Iso639Data.getType(language);
260             if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) {
261                 remainingLanguages.remove(language);
262             }
263         }
264 
265         Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages();
266         for (String language : languages) {
267             Set<BasicLanguageData> basicLanguageData = supplementalDataInfo.getBasicLanguageData(language);
268             for (BasicLanguageData basicData : basicLanguageData) {
269                 String secondary = isOfficial(language) // basicData.getType() == BasicLanguageData.Type.primary
270                     ? "\u00A0"
271                         : "N";
272                 for (String script : basicData.getScripts()) {
273                     addLanguageScriptCells(tablePrinter, tablePrinter2, language, script, secondary);
274                     remainingScripts.remove(script);
275                     remainingLanguages.remove(language);
276                 }
277             }
278         }
279         for (String language : remainingLanguages) {
280             addLanguageScriptCells(tablePrinter, tablePrinter2, language, "Zzzz", "?");
281         }
282         for (String script : remainingScripts) {
283             addLanguageScriptCells(tablePrinter, tablePrinter2, "und", script, "?");
284         }
285 
286         pw1 = new PrintWriter(new FormattedFileWriter(null, "Languages and Scripts", null, SUPPLEMENTAL_INDEX_ANCHORS));
287         pw1.println(tablePrinter.toTable());
288         pw1.close();
289 
290         pw1 = new PrintWriter(new FormattedFileWriter(null, "Scripts and Languages", null, SUPPLEMENTAL_INDEX_ANCHORS));
291         pw1.println(tablePrinter2.toTable());
292         pw1.close();
293 
294     }
295 
296     static final Map<String, OfficialStatus> languageToBestStatus = new HashMap<>();
297     static {
298         for (String language : supplementalDataInfo.getLanguagesForTerritoriesPopulationData()) {
299             Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language);
300             if (territories == null) {
301                 continue;
302             }
303             int underbar = language.indexOf('_');
304             String base = underbar < 0 ? null : language.substring(0, underbar);
305 
306             for (String territory : territories) {
307                 PopulationData data = supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory);
308                 OfficialStatus status = data.getOfficialStatus();
309                 OfficialStatus old;
310                 old = languageToBestStatus.get(language);
311                 if (old == null || status.compareTo(old) > 0) {
312                     languageToBestStatus.put(language, status);
313                 }
314                 if (base != null) {
315                     old = languageToBestStatus.get(base);
316                     if (old == null || status.compareTo(old) > 0) {
317                         languageToBestStatus.put(base, status);
318                     }
319                 }
320             }
321         }
322     }
323 
324     private static boolean isOfficial(String language) {
325         OfficialStatus status = languageToBestStatus.get(language);
326         if (status != null && status.isMajor()) {
327             return true;
328         }
329         int underbar = language.indexOf('_');
330         if (underbar < 0) {
331             return false;
332         }
333         return isOfficial(language.substring(0, underbar));
334     }
335 
336     private static Set<String> getLanguagesToShow() {
337         return getEnglishTypes("language", CLDRFile.LANGUAGE_NAME);
338     }
339 
340     private static Set<String> getEnglishTypes(String type, int code) {
341         Set<String> result = new HashSet<>(sc.getSurveyToolDisplayCodes(type));
342         for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext();) {
343             XPathParts parts = XPathParts.getFrozenInstance(it.next());
344             String newType = parts.getAttributeValue(-1, "type");
345             if (!result.contains(newType)) {
346                 result.add(newType);
347             }
348         }
349         return result;
350     }
351 
352     private static Set<String> getScriptsToShow() {
353         return getEnglishTypes("script", CLDRFile.SCRIPT_NAME);
354     }
355 
356     private static void printScriptLanguageTerritory(LanguageInfo linfo, PrintWriter pw) throws IOException {
357         PrintWriter pw1;
358         TablePrinter tablePrinter2 = new TablePrinter()
359             .addColumn("Sample Char", "class='source'", null, "class='source sample'", true).setSpanRows(true)
360             .addColumn("Script", "class='source'", null, "class='source'", true).setSpanRows(true).setSortPriority(0)
361             .setBreakSpans(true)
362             .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
363             .setSpanRows(true)
364             .addColumn("T", "class='target'", null, "class='target'", true).setSortPriority(1)
365             .addColumn("Language", "class='target'", null, "class='target'", true).setSortPriority(2)
366             .addColumn("Native", "class='target'", null, "class='target'", true)
367             .addColumn("Code", "class='target'", null, "class='target'", true)
368             .addColumn("T", "class='target'", null, "class='target'", true).setSortPriority(3)
369             .addColumn("Territory", "class='target'", null, "class='target'", true).setSortPriority(4)
370             .addColumn("Native", "class='target'", null, "class='target'", true)
371             .addColumn("Code", "class='target'", null, "class='target'", true);
372 
373         // get the codes so we can show the remainder
374         Set<String> remainingScripts = new TreeSet<>(getScriptsToShow());
375         Set<String> remainingTerritories = new TreeSet<>(sc.getGoodAvailableCodes("territory"));
376         UnicodeSet temp = new UnicodeSet();
377         for (String script : getScriptsToShow()) {
378             temp.clear();
379             try {
380                 temp.applyPropertyAlias("script", script);
381             } catch (RuntimeException e) {
382             } // fall through
383             if (temp.size() == 0) {
384                 remainingScripts.remove(script);
385                 System.out.println("Removing: " + script);
386             } else {
387                 System.out.println("Keeping: " + script);
388             }
389         }
390         remainingScripts.remove("Brai");
391         remainingScripts.remove("Hira");
392         remainingScripts.remove("Qaai");
393         remainingScripts.remove("Hrkt");
394         remainingScripts.remove("Zzzz");
395         remainingScripts.remove("Zyyy");
396 
397         Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow());
398         for (String language : getLanguagesToShow()) {
399             Scope s = Iso639Data.getScope(language);
400             Type t = Iso639Data.getType(language);
401             if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) {
402                 remainingLanguages.remove(language);
403             }
404         }
405 
406         Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages();
407         for (String language : languages) {
408             Set<BasicLanguageData> basicLanguageData = supplementalDataInfo.getBasicLanguageData(language);
409             for (BasicLanguageData basicData : basicLanguageData) {
410                 if (basicData.getType() != BasicLanguageData.Type.primary) {
411                     continue;
412                 }
413                 Set<String> mainTerritories = getTerritories(language);
414                 if (mainTerritories.size() == 0) {
415                     continue;
416                     // mainTerritories.add("ZZ");
417                 }
418 
419                 TreeSet<String> mainScripts = new TreeSet<>(basicData.getScripts());
420                 if (mainScripts.size() == 0) {
421                     continue;
422                 }
423                 for (String script : mainScripts) {
424                     for (String territory : mainTerritories) {
425                         addLanguageScriptCells2(tablePrinter2, language, script, territory);
426                         remainingTerritories.remove(territory);
427                     }
428                     remainingScripts.remove(script);
429                 }
430             }
431             remainingLanguages.remove(language);
432         }
433         // for (String language : remainingLanguages) {
434         // addLanguageScriptCells2( tablePrinter2, language, "Zzzz", "ZZ");
435         // }
436         // for (String script : remainingScripts) {
437         // addLanguageScriptCells2( tablePrinter2, "und", script, "ZZ");
438         // }
439         // for (String territory : remainingTerritories) {
440         // addLanguageScriptCells2( tablePrinter2, "und", "Zzzz", territory);
441         // }
442 
443         pw1 = new PrintWriter(new FormattedFileWriter(null, "Scripts, Languages, and Territories", null, SUPPLEMENTAL_INDEX_ANCHORS));
444         pw1.println(tablePrinter2.toTable());
445         pw1.close();
446     }
447 
448     private static Relation<String, String> territoryFix;
449 
getTerritories(String language)450     private static Set<String> getTerritories(String language) {
451         if (territoryFix == null) { // set up the data
452             initTerritoryFix();
453         }
454         Set<String> territories = territoryFix.getAll(language);
455         if (territories == null) {
456             territories = new TreeSet<>();
457         }
458         return territories;
459     }
460 
initTerritoryFix()461     private static void initTerritoryFix() {
462         territoryFix = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
463         Set<String> languages = supplementalDataInfo.getLanguages();
464         LanguageTagParser ltp = new LanguageTagParser();
465         for (String language2 : languages) {
466             if (language2.contains("_")) {
467                 ltp.set(language2).getLanguage();
468                 addOfficialTerritory(ltp, language2, ltp.getLanguage());
469             } else {
470                 addOfficialTerritory(ltp, language2, language2);
471             }
472         }
473     }
474 
addOfficialTerritory(LanguageTagParser ltp, String language, String baseLanguage)475     private static void addOfficialTerritory(LanguageTagParser ltp, String language, String baseLanguage) {
476         // territoryFix.putAll(baseLanguage, supplementalDataInfo.getTerritoriesForPopulationData(language));
477         Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language);
478         if (territories == null) {
479             return;
480         }
481         for (String territory : territories) {
482             PopulationData data = supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory);
483             OfficialStatus status = data.getOfficialStatus();
484             if (status.isMajor()) {
485                 territoryFix.put(baseLanguage, territory);
486                 System.out.println("\tAdding\t" + baseLanguage + "\t" + territory + "\t" + language);
487             }
488         }
489     }
490 
addLanguageScriptCells2(TablePrinter tablePrinter2, String language, String script, String territory)491     private static void addLanguageScriptCells2(TablePrinter tablePrinter2, String language, String script,
492         String territory) {
493         CLDRFile nativeLanguage = null;
494         if (SHOW_NATIVE) {
495             try {
496                 nativeLanguage = cldrFactory.make(language + "_" + script + "_" + territory, true);
497             } catch (RuntimeException e) {
498                 try {
499                     nativeLanguage = cldrFactory.make(language + "_" + script, true);
500                 } catch (RuntimeException e2) {
501                     try {
502                         nativeLanguage = cldrFactory.make(language, true);
503                     } catch (RuntimeException e3) {
504                     }
505                 }
506             }
507             // check for overlap
508             if (nativeLanguage != null && !script.equals("Jpan") && !script.equals("Hans") && !script.equals("Hant")) {
509                 UnicodeSet scriptSet;
510                 try {
511                     String tempScript = script.equals("Kore") ? "Hang" : script;
512                     scriptSet = new UnicodeSet("[:script=" + tempScript + ":]");
513                 } catch (RuntimeException e) {
514                     scriptSet = new UnicodeSet();
515                 }
516                 UnicodeSet exemplars = nativeLanguage.getExemplarSet("", WinningChoice.WINNING);
517                 if (scriptSet.containsNone(exemplars)) {
518                     System.out.println("Skipping CLDR file -- exemplars differ: " + language + "\t"
519                         + nativeLanguage.getLocaleID() + "\t" + scriptSet + "\t" + exemplars);
520                     nativeLanguage = null;
521                 }
522             }
523         }
524         String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language);
525         if (languageName == null) languageName = "???";
526         String isLanguageTranslated = "";
527         String nativeLanguageName = nativeLanguage == null ? null : nativeLanguage.getName(CLDRFile.LANGUAGE_NAME,
528             language);
529         if (nativeLanguageName == null || nativeLanguageName.equals(language)) {
530             nativeLanguageName = "<i>n/a</i>";
531             isLanguageTranslated = "n";
532         }
533 
534         String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script);
535         // String nativeScriptName = nativeLanguage == null ? null :
536         // nativeLanguage.getName(CLDRFile.SCRIPT_NAME,script);
537         // if (nativeScriptName != null && !nativeScriptName.equals(script)) {
538         // scriptName = nativeScriptName + "[" + scriptName + "]";
539         // }
540 
541         String isTerritoryTranslated = "";
542         String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territory);
543         String nativeTerritoryName = nativeLanguage == null ? null : nativeLanguage.getName(CLDRFile.TERRITORY_NAME,
544             territory);
545         if (nativeTerritoryName == null || nativeTerritoryName.equals(territory)) {
546             nativeTerritoryName = "<i>n/a</i>";
547             isTerritoryTranslated = "n";
548         }
549 
550         // Type t = Iso639Data.getType(language);
551         // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == Type.Living) {
552         // // ok
553         // } else if (!language.equals("und")){
554         // scriptModern = "N";
555         // }
556         //String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : "";
557 
558         Info scriptMetatdata = ScriptMetadata.getInfo(script);
559         tablePrinter2.addRow()
560         .addCell(scriptMetatdata.sampleChar)
561         .addCell(scriptName)
562         .addCell(script)
563         .addCell(isLanguageTranslated)
564         .addCell(languageName)
565         .addCell(nativeLanguageName)
566         .addCell(language)
567         .addCell(isTerritoryTranslated)
568         .addCell(territoryName)
569         .addCell(nativeTerritoryName)
570         .addCell(territory)
571         .finishRow();
572     }
573 
574     static ImmutableMap<String, String> fixScriptGif = ImmutableMap.<String, String>builder()
575         .put("hangul", "hangulsyllables")
576         .put("japanese", "hiragana")
577         .put("unknown or invalid script", "unknown")
578         .put("Hant", "Hant")
579         .put("Hans", "Hans")
580         .build();
581 
getGifName(String script)582     private static String getGifName(String script) {
583         String temp = fixScriptGif.get(script);
584         if (temp != null) {
585             return temp;
586         }
587         String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script);
588         scriptName = scriptName.toLowerCase(Locale.ENGLISH);
589         temp = fixScriptGif.get(scriptName);
590         if (temp != null) {
591             return temp;
592         }
593         return scriptName;
594     }
595 
596     private static Set<Type> oldLanguage = Collections.unmodifiableSet(EnumSet.of(Type.Ancient, Type.Extinct,
597         Type.Historical, Type.Constructed));
598 
addLanguageScriptCells(TablePrinter tablePrinter, TablePrinter tablePrinter2, String language, String script, String secondary)599     private static void addLanguageScriptCells(TablePrinter tablePrinter, TablePrinter tablePrinter2, String language,
600         String script, String secondary) {
601         try {
602             String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language);
603             if (languageName == null) {
604                 languageName = "¿" + language + "?";
605                 System.err.println("No English Language Name for:" + language);
606             }
607             String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script);
608             if (scriptName == null) {
609                 scriptName = "¿" + script + "?";
610                 System.err.println("No English Language Name for:" + script);
611             }
612             String scriptModern = StandardCodes.isScriptModern(script) ? "" : script.equals("Zzzz") ? "n/a" : "N";
613             //Scope s = Iso639Data.getScope(language);
614             Type t = Iso639Data.getType(language);
615             // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == Type.Living) {
616             // // ok
617             // } else if (!language.equals("und")){
618             // scriptModern = "N";
619             // }
620             String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : "";
621 
622             tablePrinter.addRow()
623             .addCell(languageName)
624             .addCell(language)
625             .addCell(languageModern)
626             .addCell(secondary)
627             .addCell(scriptName)
628             .addCell(script)
629             .addCell(scriptModern)
630             .finishRow();
631 
632             tablePrinter2.addRow()
633             .addCell(scriptName)
634             .addCell(script)
635             .addCell(scriptModern)
636             .addCell(languageName)
637             .addCell(language)
638             .addCell(languageModern)
639             .addCell(secondary)
640             .finishRow();
641         } catch (RuntimeException e) {
642             throw e;
643         }
644     }
645 
646     static class LanguageInfo {
647         private static final Map<String, Map<String, String>> localeAliasInfo = new TreeMap<>();
648 
649         Multimap<String, String> language_scripts = TreeMultimap.create();
650 
651         Multimap<String, String> language_territories = TreeMultimap.create();
652 
653         List<Map<String, String>> deprecatedItems = new ArrayList<>();
654 
655         Multimap<String, String> territory_languages;
656 
657         Multimap<String, String> script_languages;
658 
659         //Map group_contains = new TreeMap();
660 
661         Set<String[]> aliases = new TreeSet<String[]>(new ArrayComparator(new Comparator[] { new UTF16.StringComparator(), col }));
662 
663         Comparator col3 = new ArrayComparator(new Comparator[] { col, col, col });
664 
665         Map<String, String> currency_fractions = new TreeMap<String, String>(col);
666 
667         Map<String, Set> currency_territory = new TreeMap<String, Set>(col);
668 
669         Map<String, Set> territory_currency = new TreeMap<String, Set>(col);
670 
671         Set<String> territoriesWithCurrencies = new TreeSet<>();
672 
673         Set<String> currenciesWithTerritories = new TreeSet<>();
674 
675         Map<String, Map<String, Set<String>>> territoryData = new TreeMap<>();
676 
677         Set<String> territoryTypes = new TreeSet<>();
678 
679         Map<String, LinkedHashSet<String>> charSubstitutions = new TreeMap<String, LinkedHashSet<String>>(col);
680 
681         String defaultDigits = null;
682 
683         Map<String, Map<String, Object>> territoryLanguageData = new TreeMap<>();
684 
685         private Relation<String, String> territoriesToModernCurrencies = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class,
686             null);
687 
LanguageInfo(Factory cldrFactory)688         public LanguageInfo(Factory cldrFactory) throws IOException {
689             CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
690             for (Iterator<String> it = supp.iterator(); it.hasNext();) {
691                 String path = it.next();
692                 String fullPath = supp.getFullXPath(path);
693                 if (fullPath == null) {
694                     supp.getFullXPath(path);
695                 }
696                 XPathParts parts = XPathParts.getFrozenInstance(fullPath);
697 
698                 // <zoneItem type="America/Adak" territory="US" aliases="America/Atka US/Aleutian"/>
699                 if (path.indexOf("/zoneItem") >= 0) {
700                     Map<String, String> attributes = parts.getAttributes(parts.size() - 1);
701                     String type = attributes.get("type");
702                     //String territory = attributes.get("territory");
703                     String aliasAttributes = attributes.get("aliases");
704                     if (aliasAttributes != null) {
705                         String[] aliasesList = aliasAttributes.split("\\s+");
706 
707                         for (int i = 0; i < aliasesList.length; ++i) {
708                             String alias = aliasesList[i];
709                             aliases.add(new String[] { "timezone", alias, type });
710                         }
711                     }
712                     // TODO territory, multizone
713                     continue;
714                 }
715 
716                 if (path.indexOf("/currencyData") >= 0) {
717                     if (path.indexOf("/fractions") >= 0) {
718                         // <info iso4217="ADP" digits="0" rounding="0"/>
719                         String element = parts.getElement(parts.size() - 1);
720                         if (!element.equals("info"))
721                             throw new IllegalArgumentException("Unexpected fractions element: " + element);
722                         Map<String, String> attributes = parts.getAttributes(parts.size() - 1);
723                         String iso4217 = attributes.get("iso4217");
724                         String digits = attributes.get("digits");
725                         String rounding = attributes.get("rounding");
726                         digits = digits + (rounding.equals("0") ? "" : " (" + rounding + ")");
727                         if (iso4217.equals("DEFAULT"))
728                             defaultDigits = digits;
729                         else
730                             currency_fractions.put(getName(CLDRFile.CURRENCY_NAME, iso4217, false), digits);
731                         continue;
732                     }
733                     // <region iso3166="AR">
734                     // <currency iso4217="ARS" from="1992-01-01"/>
735                     if (path.indexOf("/region") >= 0) {
736                         Map<String, String> attributes = parts.getAttributes(parts.size() - 2);
737                         String iso3166 = attributes.get("iso3166");
738                         attributes = parts.getAttributes(parts.size() - 1);
739                         String iso4217 = attributes.get("iso4217");
740                         String to = attributes.get("to");
741                         if (to == null)
742                             to = "\u221E";
743                         String from = attributes.get("from");
744                         if (from == null)
745                             from = "-\u221E";
746                         String countryName = getName(CLDRFile.TERRITORY_NAME, iso3166, false);
747                         String currencyName = getName(CLDRFile.CURRENCY_NAME, iso4217, false);
748                         Set info = territory_currency.get(countryName);
749                         if (info == null)
750                             territory_currency.put(countryName, info = new TreeSet(col3));
751                         info.add(new String[] { from, to, currencyName });
752                         info = currency_territory.get(currencyName);
753                         if (info == null)
754                             currency_territory.put(currencyName, info = new TreeSet(col));
755                         territoriesWithCurrencies.add(iso3166);
756                         currenciesWithTerritories.add(iso4217);
757                         if (to.equals("\u221E") || to.compareTo("2006") > 0) {
758                             territoriesToModernCurrencies.put(iso3166, iso4217);
759                             info.add("<b>" + countryName + "</b>");
760 
761                         } else {
762                             info.add("<i>" + countryName + "</i>");
763 
764                         }
765                         continue;
766                     }
767                 }
768 
769                 if (path.indexOf("/languageData") >= 0) {
770                     Map<String, String> attributes = parts.findAttributes("language");
771                     String language = attributes.get("type");
772                     String alt = attributes.get("alt");
773                     addTokens(language, attributes.get("scripts"), " ", language_scripts);
774                     // mark the territories
775                     if (alt == null)
776                         ; // nothing
777                     else if ("secondary".equals(alt))
778                         language += "*";
779                     else
780                         language += "*" + alt;
781                     // <language type="af" scripts="Latn" territories="ZA"/>
782                     addTokens(language, attributes.get("territories"), " ", language_territories);
783                     continue;
784                 }
785 
786                 if (path.indexOf("/deprecatedItems") >= 0) {
787                     deprecatedItems.add(parts.findAttributes("deprecatedItems"));
788                     continue;
789                 }
790                 if (path.indexOf("/calendarData") >= 0) {
791                     Map<String, String> attributes = parts.findAttributes("calendar");
792                     if (attributes == null) {
793                         System.err.println("Err: on path " + fullPath
794                             + " , no attributes on 'calendar'. Probably, this tool is out of date.");
795                     } else {
796                         String type = attributes.get("type");
797                         String territories = attributes.get("territories");
798                         if (territories == null) {
799                             System.err.println("Err: on path " + fullPath
800                                 + ", missing territories. Probably, this tool is out of date.");
801                         } else if (type == null) {
802                             System.err.println("Err: on path " + fullPath
803                                 + ", missing type. Probably, this tool is out of date.");
804                         } else {
805                             addTerritoryInfo(territories, "calendar", type);
806                         }
807                     }
808                 }
809                 if (path.indexOf("/weekData") >= 0 || path.indexOf("measurementData") >= 0) {
810                     String element = parts.getElement(parts.size() - 1);
811                     Map<String, String> attributes = parts.getAttributes(parts.size() - 1);
812                     // later, make this a table
813                     String key = "count";
814                     String display = "Days in week (min)";
815                     boolean useTerritory = true;
816                     switch (element) {
817                     case "firstDay":
818                         key = "day";
819                         display = "First day of week";
820                         break;
821                     case "weekendStart":
822                         key = "day";
823                         display = "First day of weekend";
824                         break;
825                     case "weekendEnd":
826                         key = "day";
827                         display = "Last day of weekend";
828                         break;
829                     case "measurementSystem":
830                         // <measurementSystem type="metric" territories="001"/>
831                         key = "type";
832                         display = "Meas. system";
833                         break;
834                     case "paperSize":
835                         key = "type";
836                         display = "Paper Size";
837                         break;
838                     case "weekOfPreference":
839                         useTerritory = false;
840                         break;
841                     }
842                     if (useTerritory) {
843                         String type = attributes.get(key);
844                         String territories = attributes.get("territories");
845                         addTerritoryInfo(territories, display, type);
846                     }
847                 }
848                 if (path.indexOf("/generation") >= 0 || path.indexOf("/version") >= 0)
849                     continue;
850                 System.out.println("Skipped Element: " + path);
851             }
852 
853             for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) {
854                 for (String language : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territory)) {
855                     language_territories.put(language, territory);
856                 }
857             }
858             territory_languages = Multimaps.invertFrom(language_territories, TreeMultimap.create());
859             script_languages = Multimaps.invertFrom(language_scripts, TreeMultimap.create());
860 
861             // now get some metadata
862             localeAliasInfo.put("language", new TreeMap<String, String>());
863             localeAliasInfo.put("script", new TreeMap<String, String>());
864             localeAliasInfo.put("territory", new TreeMap<String, String>());
865             localeAliasInfo.put("variant", new TreeMap<String, String>());
866             localeAliasInfo.put("zone", new TreeMap<String, String>());
867             localeAliasInfo.put("subdivision", new TreeMap<String, String>());
868             localeAliasInfo.put("unit", new TreeMap<String, String>());
869             localeAliasInfo.put("usage", new TreeMap<String, String>());
870 
871             localeAliasInfo.get("language").put("no", "nb");
872             localeAliasInfo.get("language").put("zh_CN", "zh_Hans_CN");
873             localeAliasInfo.get("language").put("zh_SG", "zh_Hans_SG");
874             localeAliasInfo.get("language").put("zh_TW", "zh_Hant_TW");
875             localeAliasInfo.get("language").put("zh_MO", "zh_Hant_MO");
876             localeAliasInfo.get("language").put("zh_HK", "zh_Hant_HK");
877 
878             // CLDRFile supp2 = cldrFactory.make(CLDRFile.SUPPLEMENTAL_METADATA, false);
879             Map<String, Map<String, R2<List<String>, String>>> localeAliasInfo2 = supplementalDataInfo
880                 .getLocaleAliasInfo();
881             for (Entry<String, Map<String, R2<List<String>, String>>> entry1 : localeAliasInfo2.entrySet()) {
882                 String element = entry1.getKey();
883                 for (Entry<String, R2<List<String>, String>> entry2 : entry1.getValue().entrySet()) {
884                     String type = entry2.getKey();
885                     R2<List<String>, String> replacementReason = entry2.getValue();
886                     List<String> replacementList = replacementReason.get0();
887                     String replacement = replacementList == null ? null :
888                         Joiner.on(" ").join(replacementList);
889                     String reason = replacementReason.get1();
890                     if (element.equals("timezone")) {
891                         element = "zone";
892                     }
893                     try {
894                         localeAliasInfo.get(element).put(type, replacement == null ? "?" : replacement);
895                     } catch (Exception e) {
896                         // TODO Auto-generated catch block
897                         throw new IllegalArgumentException("Can't find alias data for '" + element + "'", e);
898                     }
899 
900                     String name = "";
901                     if (replacement == null) {
902                         name = "(none)";
903                     } else if (element.equals("language")) {
904                         name = getName(replacement, false);
905                     } else if (element.equals("zone")) {
906                         element = "timezone";
907                         name = replacement + "*";
908                     } else {
909                         int typeCode = CLDRFile.typeNameToCode(element);
910                         if (typeCode >= 0) {
911                             name = getName(typeCode, replacement, false);
912                         } else {
913                             name = "*" + replacement;
914                         }
915                     }
916                     if (element.equals("territory")) {
917                         territoryAliases.put(type, name);
918                         aliases
919                         .add(new String[] { element, getName(CLDRFile.TERRITORY_NAME, type, false), name, reason });
920                     } else {
921                         aliases.add(new String[] { element, type, name, reason });
922                     }
923                     continue;
924                 }
925             }
926             Log.setLog(CLDRPaths.CHART_DIRECTORY + "supplemental/", "characterLog.txt");
927             Log.close();
928         }
929 
printLikelySubtags(PrintWriter index)930         public void printLikelySubtags(PrintWriter index) throws IOException {
931 
932             PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Likely Subtags", null, SUPPLEMENTAL_INDEX_ANCHORS));
933 
934             TablePrinter tablePrinter = new TablePrinter()
935                 .addColumn("Source Lang", "class='source'", null, "class='source'", true).setSortPriority(1)
936                 .setSpanRows(false)
937                 .addColumn("Source Script", "class='source'", null, "class='source'", true).setSortPriority(0)
938                 .setSpanRows(false).setBreakSpans(true)
939                 .addColumn("Source Region", "class='source'", null, "class='source'", true).setSortPriority(2)
940                 .setSpanRows(false)
941                 .addColumn("Target Lang", "class='target'", null, "class='target'", true).setSortPriority(3)
942                 .setBreakSpans(true)
943                 .addColumn("Target Script", "class='target'", null, "class='target'", true).setSortPriority(4)
944                 .addColumn("Target Region", "class='target'", null, "class='target'", true).setSortPriority(5)
945                 .addColumn("Source ID", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
946                 .addColumn("Target ID", "class='target'", null, "class='target'", true);
947             Map<String, String> subtags = supplementalDataInfo.getLikelySubtags();
948             LanguageTagParser sourceParsed = new LanguageTagParser();
949             LanguageTagParser targetParsed = new LanguageTagParser();
950             for (String source : subtags.keySet()) {
951                 String target = subtags.get(source);
952                 sourceParsed.set(source);
953                 targetParsed.set(target);
954                 tablePrinter.addRow()
955                 .addCell(getName(CLDRFile.LANGUAGE_NAME, sourceParsed.getLanguage()))
956                 .addCell(getName(CLDRFile.SCRIPT_NAME, sourceParsed.getScript()))
957                 .addCell(getName(CLDRFile.TERRITORY_NAME, sourceParsed.getRegion()))
958                 .addCell(getName(CLDRFile.LANGUAGE_NAME, targetParsed.getLanguage()))
959                 .addCell(getName(CLDRFile.SCRIPT_NAME, targetParsed.getScript()))
960                 .addCell(getName(CLDRFile.TERRITORY_NAME, targetParsed.getRegion()))
961                 .addCell(source)
962                 .addCell(target)
963                 .finishRow();
964             }
965             pw.println(tablePrinter.toTable());
966             pw.close();
967         }
968 
969         static class LanguageData extends R4<Double, Double, Double, String> {
LanguageData(Double a, Double b, Double c, String d)970             public LanguageData(Double a, Double b, Double c, String d) {
971                 super(a, b, c, d);
972             }
973         }
974 
getName(final int type, final String value)975         private String getName(final int type, final String value) {
976             if (value == null || value.equals("") || value.equals("und")) {
977                 return "\u00A0";
978             }
979             String result = english.getName(type, value);
980             if (result == null) {
981                 result = value;
982             }
983             return result;
984         }
985 
986         static final Comparator INVERSE_COMPARABLE = new Comparator() {
987             @Override
988             public int compare(Object o1, Object o2) {
989                 return ((Comparable) o2).compareTo(o1);
990             }
991         };
992 
993         // http://www.faqs.org/rfcs/rfc2396.html
994         // delims = "<" | ">" | "#" | "%" | <">
995         // "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
996         // Within a query component, the characters ";", "/", "?", ":", "@",
997         // "&", "=", "+", ",", and "$" are reserved.
998         static final UnicodeSet ESCAPED_URI_QUERY = new UnicodeSet(
999             "[\\u0000-\\u0020\\u007F <>#%\"\\{}|\\\\\\^\\[\\]`;/?:@\\&=+,$\\u0080-\\U0001FFFF]").freeze();
1000 
1001         private static final int MINIMAL_BIG_VENDOR = 8;
1002 
1003         static {
System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement())1004             System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement());
1005         }
1006 
urlEncode(String input)1007         private String urlEncode(String input) {
1008             try {
1009                 byte[] utf8 = input.getBytes("utf-8");
1010                 StringBuffer output = new StringBuffer();
1011                 for (int i = 0; i < utf8.length; ++i) {
1012                     int b = utf8[i] & 0xFF;
1013                     if (ESCAPED_URI_QUERY.contains(b)) {
1014                         output.append('%');
1015                         if (b < 0x10) output.append('0');
1016                         output.append(Integer.toString(b, 16));
1017                     } else {
1018                         output.append((char) b);
1019                     }
1020                 }
1021                 return output.toString();
1022             } catch (UnsupportedEncodingException e) {
1023                 throw (IllegalArgumentException) new IllegalArgumentException().initCause(e);
1024             }
1025         }
1026 
addBug(int bugNumber, String text, String from, String subject, String body)1027         private String addBug(int bugNumber, String text, String from, String subject, String body) {
1028             return "<a target='_blank' href='" + CLDRURLS.CLDR_NEWTICKET_URL
1029                 + "'>" + text + "</a>";
1030         }
1031 
showLanguageCountryInfo(PrintWriter pw)1032         private void showLanguageCountryInfo(PrintWriter pw) throws IOException {
1033             FormattedFileWriter ffw = new FormattedFileWriter(null, "Language-Territory Information",
1034                 null
1035                 // "<div  style='margin:1em'><p>The language data is provided for localization testing, and is under development for CLDR 1.5. "
1036                 // +
1037                 // "To add a new territory for a language, see the <i>add new</i> links below. " +
1038                 // "For more information, see <a href=\"territory_language_information.html\">Territory-Language Information.</a>"
1039                 // +
1040                 // "<p></div>"
1041                 , SUPPLEMENTAL_INDEX_ANCHORS);
1042             PrintWriter pw21 = new PrintWriter(ffw);
1043             PrintWriter pw2 = pw21;
1044             NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH);
1045             nf.setGroupingUsed(true);
1046             //NumberFormat percent = new DecimalFormat("000.0%");
1047             TablePrinter tablePrinter = new TablePrinter()
1048                 // tablePrinter.setSortPriorities(0,5)
1049                 .addColumn("L", "class='source'", null, "class='source'", true)
1050                 .setSortPriority(0)
1051                 .setBreakSpans(true)
1052                 .setRepeatHeader(true)
1053                 .setHidden(true)
1054                 .addColumn("Language", "class='source'", null, "class='source'", true)
1055                 .setSortPriority(0)
1056                 .setBreakSpans(true)
1057                 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
1058                 // .addColumn("Report Bug", "class='target'", null, "class='target'", false)
1059                 .addColumn("Territory", "class='target'", null, "class='target'", true)
1060                 .addColumn("Code", "class='target'", "<a href=\"territory_language_information.html#{0}\">{0}</a>",
1061                     "class='target'", true)
1062                 .addColumn("Language Population", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true)
1063                 .setSortPriority(1).setSortAscending(false)
1064                 // .addColumn("Territory Population", "class='target'", "{0,number,#,##0}", "class='targetRight'", true)
1065                 // .addColumn("Language Literacy", "class='target'", "{0,number,00.0}%", "class='targetRight'", true)
1066                 // .addColumn("Territory Literacy", "class='target'", "{0,number,00.0}%", "class='targetRight'", true)
1067                 // .addColumn("Territory GDP (PPP)", "class='target'", "{0,number,#,##0}", "class='targetRight'", true)
1068                 ;
1069             TreeSet<String> languages = new TreeSet<>();
1070             Collection<Comparable[]> data = new ArrayList<>();
1071             String msg = "<br><i>Please click on each country code</i>";
1072 
1073             Collection<Comparable[]> plainData = new ArrayList<>();
1074 
1075             for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) {
1076                 // PopulationData territoryData = supplementalDataInfo.getPopulationDataForTerritory(territoryCode);
1077                 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode);
1078                 for (String languageCode : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territoryCode)) {
1079                     PopulationData languageData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode);
1080                     languages.add(languageCode);
1081                     Comparable[] items = new Comparable[] {
1082                         getFirstPrimaryWeight(getLanguageName(languageCode)),
1083                         getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode),
1084                         languageCode,
1085                         // bug,
1086                         territoryName + getOfficialStatus(territoryCode, languageCode),
1087                         territoryCode,
1088                         languageData.getPopulation(),
1089                         // population,
1090                         // languageliteracy,
1091                         // territoryLiteracy,
1092                         // gdp
1093                     };
1094                     Comparable[] plainItems = new Comparable[] {
1095                         getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode),
1096                         languageCode,
1097                         territoryName,
1098                         territoryCode,
1099                         getRawOfficialStatus(territoryCode, languageCode),
1100                         languageData.getPopulation(),
1101                         languageData.getLiteratePopulation()
1102                     };
1103 
1104                     data.add(items);
1105                     plainData.add(plainItems);
1106                 }
1107             }
1108             for (String languageCode : languages) {
1109                 Comparable[] items = new Comparable[] {
1110                     getFirstPrimaryWeight(getLanguageName(languageCode)),
1111                     getLanguageName(languageCode), // + getLanguagePluralMessage(msg, languageCode),
1112                     languageCode,
1113                     // bug,
1114                     addBug(1217, "<i>add new</i>", "<email>", "Add territory to " + getLanguageName(languageCode)
1115                     + " (" + languageCode + ")", "<territory, speaker population in territory, and references>"),
1116                     "",
1117                     0.0d,
1118                     // 0.0d,
1119                     // 0.0d,
1120                     // 0.0d,
1121                     // gdp
1122                 };
1123                 data.add(items);
1124             }
1125             Comparable[][] flattened = data.toArray(new Comparable[data.size()][]);
1126             String value = tablePrinter.addRows(flattened).toTable();
1127             pw2.println(value);
1128             pw2.close();
1129             try (PrintWriter pw21plain = FileUtilities.openUTF8Writer(ffw.getDir(), ffw.getBaseFileName() + ".txt")) {
1130                 for (Comparable[] row : plainData) {
1131                     pw21plain.println(Joiner.on("\t").join(row));
1132                 }
1133             }
1134         }
1135 
getLanguagePluralMessage(String msg, String languageCode)1136         private String getLanguagePluralMessage(String msg, String languageCode) {
1137             String mainLanguageCode = new LanguageTagParser().set(languageCode).getLanguage();
1138             String messageWithPlurals = msg + ", on <a href='language_plural_rules.html#" + mainLanguageCode
1139                 + "'>plurals</a>" +
1140                 ", and on <a href='likely_subtags.html#" + mainLanguageCode + "'>likely-subtags</a>";
1141             return messageWithPlurals;
1142         }
1143 
getLanguageName(String languageCode)1144         private String getLanguageName(String languageCode) {
1145             String result = english.getName(languageCode);
1146             if (!result.equals(languageCode)) return result;
1147             Set<String> names = Iso639Data.getNames(languageCode);
1148             if (names != null && names.size() != 0) {
1149                 return names.iterator().next();
1150             }
1151             return languageCode;
1152         }
1153 
showCoverageGoals(PrintWriter pw)1154         private void showCoverageGoals(PrintWriter pw) throws IOException {
1155             PrintWriter pw2 = new PrintWriter(new FormattedFileWriter(null, "Coverage Goals",
1156                 null
1157                 // "<p>" +
1158                 // "The following show default coverage goals for larger organizations. " +
1159                 // "<i>[n/a]</i> shows where there is no specific value for a given organization, " +
1160                 // "while <i>(...)</i> indicates that the goal is inherited from the parent. " +
1161                 // "A * is added if the goal differs from the parent locale's goal. " +
1162                 // "For information on what these goals mean (comprehensive, modern, moderate,...), see the LDML specification "
1163                 // +
1164                 // "<a href='http://www.unicode.org/reports/tr35/#Coverage_Levels'>Appendix M: Coverage Levels</a>. " +
1165                 // +
1166                 // "</p>"
1167                 , null));
1168 
1169             TablePrinter tablePrinter = new TablePrinter()
1170                 // tablePrinter.setSortPriorities(0,4)
1171                 .addColumn("Language", "class='source'", null, "class='source'", true)
1172                 .setSortPriority(0)
1173                 .setBreakSpans(true)
1174                 .addColumn("Code", "class='source'",
1175                     "<a href=\"http://www.unicode.org/cldr/data/common/main/{0}.xml\">{0}</a>", "class='source'", false);
1176             Map<Organization, Map<String, Level>> vendordata = sc.getLocaleTypes();
1177             Set<String> locales = new TreeSet<>();
1178             Set<Organization> vendors = new LinkedHashSet<>();
1179             Set<Organization> smallVendors = new LinkedHashSet<>();
1180 
1181             for (Entry<Organization, Map<String, Level>> vendorData : vendordata.entrySet()) {
1182                 Organization vendor = vendorData.getKey();
1183                 //if (vendor.equals(Organization.java)) continue;
1184                 Map<String, Level> data = vendorData.getValue();
1185                 if (data.size() < MINIMAL_BIG_VENDOR) {
1186                     smallVendors.add(vendor);
1187                     continue;
1188                 }
1189                 vendors.add(vendor);
1190                 tablePrinter.addColumn(vendor.getDisplayName(), "class='target'", null, "class='target'", false)
1191                 .setSpanRows(true);
1192                 locales.addAll(data.keySet());
1193             }
1194 
1195             Collection<Comparable[]> data = new ArrayList<>();
1196             List<String> list = new ArrayList<>();
1197             LanguageTagParser ltp = new LanguageTagParser();
1198             //String alias2 = getAlias("sh_YU");
1199 
1200             for (String locale : locales) {
1201                 list.clear();
1202                 String localeCode = locale.equals("*") ? "und" : locale;
1203                 String alias = getAlias(localeCode);
1204                 if (!alias.equals(localeCode)) {
1205                     System.out.println("Should use canonical form: " + locale + " => " + alias);
1206                 }
1207                 String baseLang = ltp.set(localeCode).getLanguage();
1208                 String baseLangName = getLanguageName(baseLang);
1209                 list.add("und".equals(localeCode) ? "other" : baseLangName);
1210                 list.add(locale);
1211                 for (Organization vendor : vendors) {
1212                     String status = getVendorStatus(locale, vendor, vendordata);
1213                     if (!baseLang.equals(locale) && !status.startsWith("<")) {
1214                         String langStatus = getVendorStatus(baseLang, vendor, vendordata);
1215                         if (!langStatus.equals(status)) {
1216                             status += "*";
1217                         }
1218                     }
1219                     list.add(status);
1220                 }
1221                 data.add(list.toArray(new String[list.size()]));
1222             }
1223             Comparable[][] flattened = data.toArray(new Comparable[data.size()][]);
1224             String value = tablePrinter.addRows(flattened).toTable();
1225             pw2.println(value);
1226             pw2.append("<h2>Others</h2><div align='left'><ul>");
1227             for (Organization vendor2 : smallVendors) {
1228                 pw2.append("<li><b>");
1229                 pw2.append(TransliteratorUtilities.toHTML.transform(
1230                     vendor2.getDisplayName())).append(": </b>");
1231                 boolean first1 = true;
1232                 for (Level level : Level.values()) {
1233                     boolean first2 = true;
1234                     Level other = null;
1235                     for (Entry<String, Level> data2 : vendordata.get(vendor2).entrySet()) {
1236                         String key = data2.getKey();
1237                         Level level2 = data2.getValue();
1238                         if (level != level2) {
1239                             continue;
1240                         }
1241                         if (key.equals("*")) {
1242                             other = level2;
1243                             continue;
1244                         }
1245                         if (first2) {
1246                             if (first1) {
1247                                 first1 = false;
1248                             } else {
1249                                 pw2.append("; ");
1250                             }
1251                             pw2.append(level2.toString()).append(": ");
1252                             first2 = false;
1253                         } else {
1254                             pw2.append(", ");
1255                         }
1256                         pw2.append(TransliteratorUtilities.toHTML.transform(key));
1257                     }
1258                     if (other != null) {
1259                         if (first2) {
1260                             if (first1) {
1261                                 first1 = false;
1262                             } else {
1263                                 pw2.append("; ");
1264                             }
1265                             pw2.append(level.toString()).append(": ");
1266                             first2 = false;
1267                         } else {
1268                             pw2.append(", ");
1269                         }
1270                         pw2.append("<i>other</i>");
1271                     }
1272                 }
1273                 pw2.append("</li>");
1274             }
1275             pw2.append("</ul></div>");
1276             pw2.close();
1277         }
1278 
1279         LanguageTagParser lpt2 = new LanguageTagParser();
1280 
getAlias(String locale)1281         private String getAlias(String locale) {
1282             lpt2.set(locale);
1283             locale = lpt2.toString(); // normalize
1284             //String language = lpt2.getLanguage();
1285             String script = lpt2.getScript();
1286             String region = lpt2.getRegion();
1287             // List variants = lpt2.getVariants();
1288             String temp;
1289             for (String old : localeAliasInfo.get("language").keySet()) {
1290                 if (locale.startsWith(old)) {
1291                     // the above is a rough check, and will fail with old=moh and locale=mo
1292                     if (!locale.equals(old) && !locale.startsWith(old + "_")) {
1293                         continue;
1294                     }
1295                     temp = localeAliasInfo.get("language").get(old);
1296                     lpt2.setLanguage(temp.split("\\s+")[0] + locale.substring(old.length()));
1297                     break;
1298                 }
1299             }
1300             temp = localeAliasInfo.get("script").get(script);
1301             if (temp != null) {
1302                 lpt2.setScript(temp.split("\\s+")[0]);
1303             }
1304             temp = localeAliasInfo.get("territory").get(region);
1305             if (temp != null) {
1306                 lpt2.setRegion(temp.split("\\s+")[0]);
1307             }
1308             return lpt2.toString();
1309         }
1310 
getVendorStatus(String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata)1311         private String getVendorStatus(String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata) {
1312             Level statusLevel = vendordata.get(vendor).get(locale);
1313             String status = statusLevel == null ? null : statusLevel.toString();
1314             String curLocale = locale;
1315             while (status == null) {
1316                 curLocale = LocaleIDParser.getParent(curLocale);
1317                 if ("root".equals(curLocale)) {
1318                     status = "&nbsp;";
1319                     break;
1320                 }
1321                 statusLevel = vendordata.get(vendor).get(curLocale);
1322                 if (statusLevel != null) {
1323                     status = "<i>(" + statusLevel + ")</i>";
1324                 }
1325             }
1326             return status;
1327         }
1328 
showCountryLanguageInfo(PrintWriter pw)1329         private void showCountryLanguageInfo(PrintWriter pw) throws IOException {
1330             PrintWriter pw21 = new PrintWriter(new FormattedFileWriter(null, "Territory-Language Information", null, SUPPLEMENTAL_INDEX_ANCHORS));
1331             PrintWriter pw2 = pw21;
1332             NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH);
1333             nf.setGroupingUsed(true);
1334             //NumberFormat percent = new DecimalFormat("000.0%");
1335             TablePrinter tablePrinter = new TablePrinter()
1336                 // tablePrinter.setSortPriorities(0,4)
1337                 .addColumn("T", "class='source'", null, "class='source'", true)
1338                 .setSortPriority(0)
1339                 .setBreakSpans(true)
1340                 .setRepeatHeader(true)
1341                 .setHidden(true)
1342                 .addColumn("Territory", "class='source'", null, "class='source'", true)
1343                 .setSortPriority(0)
1344                 .setBreakSpans(true)
1345                 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(),
1346                     "class='source'", true)
1347                 .addColumn("Terr. Literacy", "class='target'", "{0,number,@@}%", "class='targetRight'", true);
1348 
1349             tablePrinter
1350             .addColumn("Language", "class='target'", null, "class='target'", false)
1351             .addColumn("Code", "class='target'", "<a href=\"language_territory_information.html#{0}\">{0}</a>",
1352                 "class='target'", false)
1353             .addColumn("Lang. Pop.", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true)
1354             .addColumn("Pop.%", "class='target'", "{0,number,@@}%", "class='targetRight'", true)
1355             .setSortAscending(false).setSortPriority(1)
1356             .addColumn("Literacy%", "class='target'", "{0,number,@@}%", "class='targetRight'", true)
1357             .addColumn("Written%", "class='target'", "{0,number,@@}%", "class='targetRight'", true)
1358             .addColumn("Report Bug", "class='target'", null, "class='target'", false);
1359 
1360             for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) {
1361                 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode);
1362                 PopulationData territoryData2 = supplementalDataInfo.getPopulationDataForTerritory(territoryCode);
1363                 double territoryLiteracy = territoryData2.getLiteratePopulationPercent();
1364 
1365                 for (String languageCode : supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territoryCode)) {
1366                     PopulationData languageData = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode);
1367                     double languagePopulationPercent = 100 * languageData.getPopulation() / territoryData2.getPopulation();
1368                     double languageliteracy = languageData.getLiteratePopulationPercent();
1369                     double writingFrequency = languageData.getWritingPercent();
1370 
1371                     tablePrinter.addRow()
1372                     .addCell(getFirstPrimaryWeight(territoryName))
1373                     .addCell(territoryName)
1374                     .addCell(territoryCode)
1375                     .addCell(territoryLiteracy)
1376                     .addCell(getLanguageName(languageCode) + getOfficialStatus(territoryCode, languageCode))
1377                     .addCell(languageCode)
1378                     .addCell(languageData.getPopulation())
1379                     .addCell(languagePopulationPercent)
1380                     .addCell(languageliteracy)
1381                     .addCell(writingFrequency)
1382                     .addCell(
1383                         addBug(1217, "<i>bug</i>", "<email>", "Fix info for " + getLanguageName(languageCode)
1384                         + " (" + languageCode + ")"
1385                         + " in " + territoryName + " (" + territoryCode + ")",
1386                             "<fixed data for territory, plus references>"))
1387                     .finishRow();
1388                 }
1389 
1390                 tablePrinter.addRow()
1391                 .addCell(getFirstPrimaryWeight(territoryName))
1392                 .addCell(territoryName)
1393                 .addCell(territoryCode)
1394                 .addCell(territoryLiteracy)
1395                 .addCell(
1396                     addBug(1217, "<i>add new</i>", "<email>", "Add language to " + territoryName + "("
1397                         + territoryCode + ")",
1398                         "<language, speaker pop. and literacy in territory, plus references>"))
1399                 .addCell("")
1400                 .addCell(0.0d)
1401                 .addCell(0.0d)
1402                 .addCell(0.0d)
1403                 .addCell(0.0d)
1404                 .addCell("")
1405                 .finishRow();
1406 
1407             }
1408             String value = tablePrinter.toTable();
1409             pw2.println(value);
1410             pw2.close();
1411         }
1412 
showCountryInfo(PrintWriter pw)1413         private void showCountryInfo(PrintWriter pw) throws IOException {
1414             PrintWriter pw21 = new PrintWriter(new FormattedFileWriter(null, "Territory Information", null, SUPPLEMENTAL_INDEX_ANCHORS));
1415             PrintWriter pw2 = pw21;
1416             NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH);
1417             nf.setGroupingUsed(true);
1418             //NumberFormat percent = new DecimalFormat("000.0%");
1419             TablePrinter tablePrinter = new TablePrinter()
1420                 // tablePrinter.setSortPriorities(0,4)
1421                 .addColumn("T", "class='source'", null, "class='source'", true)
1422                 .setSortPriority(0)
1423                 .setBreakSpans(true)
1424                 .setRepeatHeader(true)
1425                 .setHidden(true)
1426                 .addColumn("Territory", "class='source'", null, "class='source'", true)
1427                 .setSortPriority(0)
1428                 .setBreakSpans(true)
1429                 .addColumn("Code", "class='source'", CldrUtility.getDoubleLinkMsg(),
1430                     "class='source'", true)
1431                 .addColumn("Terr. Pop (M)", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true)
1432                 .addColumn("Terr. GDP ($M PPP)", "class='target'", "{0,number,#,#@@}", "class='targetRight'", true)
1433                 .addColumn("Currencies (2006...)", "class='target'", null, "class='target'", true);
1434             for (Iterator<String> it = territoryTypes.iterator(); it.hasNext();) {
1435                 String header = it.next();
1436                 if (header.equals("calendar")) header = "calendar (+gregorian)";
1437                 tablePrinter.addColumn(header).setHeaderAttributes("class='target'")
1438                 .setCellAttributes("class='target'").setSpanRows(true);
1439             }
1440 
1441             tablePrinter
1442             .addColumn("Report Bug", "class='target'", null, "class='target'", false);
1443 
1444             for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) {
1445                 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode);
1446                 PopulationData territoryData2 = supplementalDataInfo.getPopulationDataForTerritory(territoryCode);
1447                 double population = territoryData2.getPopulation() / 1000000;
1448                 double gdp = territoryData2.getGdp() / 1000000;
1449 
1450                 Map<String, Set<String>> worldData = territoryData.get(getName(CLDRFile.TERRITORY_NAME, "001", false));
1451                 Map<String, Set<String>> countryData = territoryData.get(getName(CLDRFile.TERRITORY_NAME, territoryCode, false));
1452 
1453                 tablePrinter.addRow()
1454                 .addCell(getFirstPrimaryWeight(territoryName))
1455                 .addCell(territoryName)
1456                 .addCell(territoryCode)
1457                 .addCell(population)
1458                 .addCell(gdp)
1459                 .addCell(getCurrencyNames(territoryCode));
1460 
1461                 addOtherCountryData(tablePrinter, worldData, countryData);
1462 
1463                 tablePrinter
1464                 .addCell(
1465                     addBug(1217, "<i>bug</i>", "<email>", "Fix info for " + territoryName + " (" + territoryCode + ")",
1466                         "<fixed data for territory, plus references>"))
1467                 .finishRow();
1468 
1469             }
1470             String value = tablePrinter.toTable();
1471             pw2.println(value);
1472             pw2.close();
1473         }
1474 
1475         static Normalizer2 nfd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE);
1476 
1477         // Do just an approximation for now
1478 
getFirstPrimaryWeight(String territoryName)1479         private String getFirstPrimaryWeight(String territoryName) {
1480             char first = territoryName.charAt(0);
1481             String result = nfd.getDecomposition(first);
1482             if (result == null) {
1483                 return UTF16.valueOf(first);
1484             }
1485             return UTF16.valueOf(result.codePointAt(0));
1486         }
1487 
1488         // private String getTerritoryWithLikelyLink(String territoryCode) {
1489         // return "<a href='likely_subtags.html#und_"+ territoryCode + "'>" + territoryCode + "</a>";
1490         // }
1491 
getOfficialStatus(String territoryCode, String languageCode)1492         private String getOfficialStatus(String territoryCode, String languageCode) {
1493             PopulationData x = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode);
1494             if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return "";
1495             return " <span title='" + x.getOfficialStatus().toString().replace('_', ' ') + "'>{"
1496             + x.getOfficialStatus().toShortString() + "}</span>";
1497         }
1498 
getRawOfficialStatus(String territoryCode, String languageCode)1499         private String getRawOfficialStatus(String territoryCode, String languageCode) {
1500             PopulationData x = supplementalDataInfo.getLanguageAndTerritoryPopulationData(languageCode, territoryCode);
1501             if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return "";
1502             return x.getOfficialStatus().toString();
1503         }
1504 
addOtherCountryData(TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData)1505         private void addOtherCountryData(TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData) {
1506             for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext();) {
1507                 String type = it2.next();
1508                 Set<String> worldResults = worldData.get(type);
1509                 Set<String> territoryResults = null;
1510                 if (countryData != null) {
1511                     territoryResults = countryData.get(type);
1512                 }
1513                 if (territoryResults == null) {
1514                     territoryResults = worldResults;
1515                 }
1516                 String out = "";
1517                 if (territoryResults != null) {
1518                     out = territoryResults + "";
1519                     out = out.substring(1, out.length() - 1); // remove [ and ]
1520                 }
1521                 tablePrinter.addCell(out);
1522             }
1523         }
1524 
getCurrencyNames(String territoryCode)1525         private String getCurrencyNames(String territoryCode) {
1526             Set<String> currencies = territoriesToModernCurrencies.getAll(territoryCode);
1527             if (currencies == null || currencies.size() == 0) return "";
1528             StringBuilder buffer = new StringBuilder();
1529             for (String code : currencies) {
1530                 if (buffer.length() != 0) buffer.append(",<br>");
1531                 buffer.append(getName(CLDRFile.CURRENCY_NAME, code, false));
1532             }
1533             return buffer.toString();
1534         }
1535 
addCharSubstitution(String value, String substitute)1536         private void addCharSubstitution(String value, String substitute) {
1537             if (substitute.equals(value))
1538                 return;
1539             LinkedHashSet<String> already = charSubstitutions.get(value);
1540             if (already == null)
1541                 charSubstitutions.put(value, already = new LinkedHashSet<>(0));
1542             already.add(substitute);
1543             Log.logln(hex(value, " ") + "; " + hex(substitute, " "));
1544         }
1545 
1546         /**
1547          *
1548          */
1549 //        public void showTerritoryInfo() {
1550 //            Map territory_parent = new TreeMap();
1551 //            gather("001", territory_parent);
1552 //            for (Iterator it = territory_parent.keySet().iterator(); it.hasNext();) {
1553 //                String territory = (String) it.next();
1554 //                String parent = (String) territory_parent.get(territory);
1555 //                System.out.println(territory + "\t" + english.getName(english.TERRITORY_NAME, territory) + "\t"
1556 //                    + parent + "\t" + english.getName(english.TERRITORY_NAME, parent));
1557 //            }
1558 //        }
1559 
1560 //        private void gather(String item, Map territory_parent) {
1561 //            Collection containedByItem = (Collection) group_contains.get(item);
1562 //            if (containedByItem == null)
1563 //                return;
1564 //            for (Iterator it = containedByItem.iterator(); it.hasNext();) {
1565 //                String contained = (String) it.next();
1566 //                territory_parent.put(contained, item);
1567 //                gather(contained, territory_parent);
1568 //            }
1569 //        }
1570 
addTerritoryInfo(String territoriesList, String type, String info)1571         private void addTerritoryInfo(String territoriesList, String type, String info) {
1572             String[] territories = territoriesList.split("\\s+");
1573             territoryTypes.add(type);
1574             for (int i = 0; i < territories.length; ++i) {
1575                 String territory = getName(CLDRFile.TERRITORY_NAME, territories[i], false);
1576                 Map<String, Set<String>> s = territoryData.get(territory);
1577                 if (s == null) {
1578                     territoryData.put(territory, s = new TreeMap<>());
1579                 }
1580                 Set<String> ss = s.get(type);
1581                 if (ss == null) {
1582                     s.put(type, ss = new TreeSet<>());
1583                 }
1584                 ss.add(info);
1585             }
1586         }
1587 
showCalendarData(PrintWriter pw0)1588         public void showCalendarData(PrintWriter pw0) throws IOException {
1589             PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Other Territory Data", null, SUPPLEMENTAL_INDEX_ANCHORS));
1590             pw.println("<table>");
1591             pw.println("<tr><th class='source'>Territory</th>");
1592             for (Iterator<String> it = territoryTypes.iterator(); it.hasNext();) {
1593                 String header = it.next();
1594                 if (header.equals("calendar")) header = "calendar (+gregorian)";
1595                 pw.println("<th class='target'>" + header + "</th>");
1596             }
1597             pw.println("</tr>");
1598 
1599             String worldName = getName(CLDRFile.TERRITORY_NAME, "001", false);
1600             Map<String, Set<String>> worldData = territoryData.get(worldName);
1601             for (Iterator<String> it = territoryData.keySet().iterator(); it.hasNext();) {
1602                 String country = it.next();
1603                 if (country.equals(worldName))
1604                     continue;
1605                 showCountry(pw, country, country, worldData);
1606             }
1607             showCountry(pw, worldName, "Other", worldData);
1608             pw.println("</table>");
1609             pw.close();
1610         }
1611 
showCountry(PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData)1612         private void showCountry(PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData) {
1613             pw.println("<tr><td class='source'>" + countryTitle + "</td>");
1614             Map<String, Set<String>> data = territoryData.get(country);
1615             for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext();) {
1616                 String type = it2.next();
1617                 String target = "target";
1618                 Set<String> results = data.get(type);
1619                 Set<String> worldResults = worldData.get(type);
1620                 if (results == null) {
1621                     results = worldResults;
1622                     target = "target2";
1623                 } else if (results.equals(worldResults)) {
1624                     target = "target2";
1625                 }
1626                 String out = "";
1627                 if (results != null) {
1628                     out = results + "";
1629                     out = out.substring(1, out.length() - 1); // remove [ and ]
1630                 }
1631                 pw.println("<td class='" + target + "'>" + out + "</td>");
1632             }
1633             pw.println("</tr>");
1634         }
1635 
showCorrespondances()1636         public void showCorrespondances() {
1637             // show correspondances between language and script
1638             Map<String, String> name_script = new TreeMap<>();
1639             for (Iterator<String> it = sc.getAvailableCodes("script").iterator(); it.hasNext();) {
1640                 String script = it.next();
1641                 String name = english.getName(CLDRFile.SCRIPT_NAME, script);
1642                 if (name == null)
1643                     name = script;
1644                 name_script.put(name, script);
1645                 /*
1646                  * source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages
1647                  * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories
1648                  * : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages
1649                  * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts
1650                  */}
1651             String delimiter = "\\P{L}+";
1652             Map<String, String> name_language = new TreeMap<>();
1653             for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); it.hasNext();) {
1654                 String language = it.next();
1655                 String names = english.getName(CLDRFile.LANGUAGE_NAME, language);
1656                 if (names == null)
1657                     names = language;
1658                 name_language.put(names, language);
1659             }
1660             for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); it.hasNext();) {
1661                 String language = it.next();
1662                 String names = english.getName(CLDRFile.LANGUAGE_NAME, language);
1663                 if (names == null)
1664                     names = language;
1665                 String[] words = names.split(delimiter);
1666                 if (words.length > 1) {
1667                     // System.out.println(names);
1668                 }
1669                 for (int i = 0; i < words.length; ++i) {
1670                     String name = words[i];
1671                     String script = name_script.get(name);
1672                     if (script != null) {
1673                         Set<String> langSet = (Set<String>) script_languages.asMap().get(script);
1674                         if (langSet != null && langSet.contains(language))
1675                             System.out.print("*");
1676                         System.out.println("\t" + name + " [" + language + "]\t=> " + name + " [" + script + "]");
1677                     } else {
1678                         String language2 = name_language.get(name);
1679                         if (language2 != null && !language.equals(language2)) {
1680                             Set<String> langSet = (Set<String>) language_scripts.get(language);
1681                             if (langSet != null)
1682                                 System.out.print("*");
1683                             System.out.print("?\tSame script?\t + " + getName(CLDRFile.LANGUAGE_NAME, language, false)
1684                             + "\t & " + getName(CLDRFile.LANGUAGE_NAME, language2, false));
1685                             langSet = (Set<String>) language_scripts.get(language2);
1686                             if (langSet != null)
1687                                 System.out.print("*");
1688                             System.out.println();
1689                         }
1690                     }
1691                 }
1692             }
1693         }
1694 
1695         /**
1696          * @throws IOException
1697          *
1698          */
printCurrency(PrintWriter index)1699         public void printCurrency(PrintWriter index) throws IOException {
1700             PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Detailed Territory-Currency Information",
1701                 null
1702                 // "<p>The following table shows when currencies were in use in different countries. " +
1703                 // "See also <a href='#format_info'>Decimal Digits and Rounding</a>. " +
1704                 // "To correct any information here, please file a " +
1705                 // addBug(1274, "bug", "<email>", "Currency Bug",
1706                 // "<currency, country, and references supporting change>") +
1707                 // ".</p>"
1708                 , SUPPLEMENTAL_INDEX_ANCHORS));
1709             String section1 = "Territory to Currency";
1710             String section2 = "Decimal Digits and Rounding";
1711             showContents(pw, "territory_currency", section1, "format_info", section2);
1712 
1713             pw.println("<h2>" + CldrUtility.getDoubleLinkedText("territory_currency", "1. " + section1) + "</h2>");
1714 
1715             // doTitle(pw, "Territory \u2192 Currency");
1716             pw.println("<table>");
1717             pw.println("<tr><th class='source'>Territory</th>" +
1718                 "<th class='source'>Code</th>" +
1719                 "<th class='target'>From</th>" +
1720                 "<th class='target'>To</th>" +
1721                 "<th class='target'>Currency</th>" +
1722                 "<th class='target'>Name</th>" +
1723                 "</tr>");
1724 
1725             Relation<String, String> currencyToTerritory = Relation.of(new HashMap<String, Set<String>>(),
1726                 HashSet.class);
1727             Relation<String, String> modernCurrencyToTerritory = Relation.of(new HashMap<String, Set<String>>(),
1728                 HashSet.class);
1729 
1730             for (Entry<String, String> nameCode : NAME_TO_REGION.entrySet()) {
1731                 String name = nameCode.getKey();
1732                 String regionCode = nameCode.getValue();
1733                 if (!StandardCodes.isCountry(regionCode)) {
1734                     continue;
1735                 }
1736                 if (sc.isLstregPrivateUse("region", regionCode)) {
1737                     continue;
1738                 }
1739                 Set<CurrencyDateInfo> info = supplementalDataInfo.getCurrencyDateInfo(regionCode);
1740 
1741                 int infoSize = 1;
1742                 if (info != null) {
1743                     infoSize = info.size();
1744                 }
1745                 pw.println("<tr>" +
1746                     "<td class='source' rowSpan='" + infoSize + "'>" + name + "</td>" +
1747                     "<td class='source' rowSpan='" + infoSize + "'>" + CldrUtility.getDoubleLinkedText(regionCode)
1748                     + "</td>");
1749                 if (info == null) {
1750                     pw.println("<td class='target'>" + "<i>na</i>" + "</td>" +
1751                         "<td class='target'>" + "<i>na</i>" + "</td>" +
1752                         "<td class='target'>" + "<i>na</i>" + "</td>" +
1753                         "<td class='target'>" + "<i>na</i>" + "</td>" +
1754                         "</tr>");
1755                     continue;
1756                 }
1757                 boolean first = true;
1758                 for (CurrencyDateInfo infoItem : info) {
1759                     Date endData = infoItem.getEnd();
1760                     if (endData.equals(CurrencyDateInfo.END_OF_TIME)) {
1761                         modernCurrencyToTerritory.put(infoItem.getCurrency(), getTerritoryName(regionCode));
1762                     } else {
1763                         currencyToTerritory.put(infoItem.getCurrency(), getTerritoryName(regionCode));
1764                     }
1765                     if (first)
1766                         first = false;
1767                     else
1768                         pw.println("<tr>");
1769                     pw.println("<td class='target'>" + CurrencyDateInfo.formatDate(infoItem.getStart()) + "</td>" +
1770                         "<td class='target'>" + CurrencyDateInfo.formatDate(endData) + "</td>" +
1771                         "<td class='target'>" + infoItem.getCurrency() + "</td>" +
1772                         "<td class='target'>" + english.getName("currency", infoItem.getCurrency()) + "</td>" +
1773                         "</tr>");
1774                 }
1775             }
1776             // doFooter(pw);
1777             // pw.close();
1778             // pw = new PrintWriter(new FormattedFileWriter(index, "Currency Format Info", null));
1779             pw.write("</table>");
1780 
1781             pw.println("<h2>" + CldrUtility.getDoubleLinkedText("format_info", "2. " + section2) + "</h2>");
1782 
1783             pw.write("<p>This table shows the number of digits used for each currency, "
1784                 + " and the countries where it is or was in use. "
1785                 + "Countries where the currency is in current use are bolded. "
1786                 + "If the currency uses ‘nickel rounding’ in transactions, the digits are followed by ‘(5)’. "
1787                 + "Where the values are different in a cash context, that is shown in a second column."
1788                 + "</p>");
1789             pw.write("<div align='center'><table>");
1790 
1791             // doTitle(pw, "Currency Format Info");
1792             //             <info iso4217="CZK" digits="2" rounding="0" cashDigits="0" cashRounding="0"/>
1793 
1794             pw.println("<tr>" +
1795                 "<th class='source nowrap'>Name</th>" +
1796                 "<th class='source'>Currency</th>" +
1797                 "<th class='target'>Digits</th>" +
1798                 "<th class='target'>Cash Digits</th>" +
1799                 "<th class='target'>Countries</th>" +
1800                 "</tr>");
1801             Set<String> currencyList = new TreeSet<String>(col);
1802             currencyList.addAll(currency_fractions.keySet());
1803             currencyList.addAll(currency_territory.keySet());
1804 
1805             for (Entry<String, String> nameCode : NAME_TO_CURRENCY.entrySet()) {
1806                 //String name = nameCode.getKey();
1807                 String currency = nameCode.getValue();
1808                 CurrencyNumberInfo info = supplementalDataInfo.getCurrencyNumberInfo(currency);
1809                 Set<String> territories = currencyToTerritory.get(currency);
1810                 Set<String> modernTerritories = modernCurrencyToTerritory.get(currency);
1811 
1812                 // String fractions = (String) currency_fractions.get(currency);
1813                 // if (fractions == null)
1814                 // fractions = defaultDigits;
1815                 // Set territories = (Set) currency_territory.get(currency);
1816                 pw.print("<tr>" +
1817                     "<td class='source nowrap'>"
1818                     + TransliteratorUtilities.toHTML.transform(english.getName("currency", currency)) + "</td>" +
1819                     "<td class='source'>" + CldrUtility.getDoubleLinkedText(currency) + "</td>" +
1820                     "<td class='target'>" +
1821                     info.getDigits()
1822                     + (info.getRounding() == 0 ? "" : " (" + info.getRounding() + ")")
1823                     + "</td>"
1824                     + "<td class='target'>"
1825                     + (info.cashDigits == info.getDigits() && info.cashRounding == info.getRounding() ? "" : (info.cashDigits
1826                         + (info.cashRounding == 0 ? "" : " (" + info.cashRounding + ")")))
1827                     + "</td>" +
1828                     "<td class='target'>");
1829                 boolean first = true;
1830                 boolean needBreak = false;
1831                 if (modernTerritories != null) {
1832                     needBreak = true;
1833                     for (String territory : modernTerritories) {
1834                         if (first)
1835                             first = false;
1836                         else
1837                             pw.print(", ");
1838                         pw.print("<b>" + territory + "</b>");
1839                     }
1840                 }
1841                 //boolean haveBreak = true;
1842                 if (territories != null) {
1843                     for (String territory : territories) {
1844                         if (first)
1845                             first = false;
1846                         else if (!needBreak)
1847                             pw.print(", ");
1848                         else {
1849                             pw.print(",<br>");
1850                             needBreak = false;
1851                         }
1852                         pw.print(territory);
1853                     }
1854                 }
1855                 pw.println("</td></tr>");
1856             }
1857             pw.println("</table>");
1858             pw.close();
1859             // doFooter(pw);
1860 
1861             // if (false) {
1862             // doTitle(pw, "Territories Versus Currencies");
1863             // pw.println("<tr><th>Territories Without Currencies</th><th>Currencies Without Territories</th></tr>");
1864             // pw.println("<tr><td class='target'>");
1865             // Set territoriesWithoutCurrencies = new TreeSet();
1866             // territoriesWithoutCurrencies.addAll(sc.getGoodAvailableCodes("territory"));
1867             // territoriesWithoutCurrencies.removeAll(territoriesWithCurrencies);
1868             // territoriesWithoutCurrencies.removeAll(group_contains.keySet());
1869             // boolean first = true;
1870             // for (Iterator it = territoriesWithoutCurrencies.iterator(); it.hasNext();) {
1871             // if (first) first = false;
1872             // else pw.print(", ");
1873             // pw.print(english.getName(CLDRFile.TERRITORY_NAME, it.next().toString(), false));
1874             // }
1875             // pw.println("</td><td class='target'>");
1876             // Set currenciesWithoutTerritories = new TreeSet();
1877             // currenciesWithoutTerritories.addAll(sc.getGoodAvailableCodes("currency"));
1878             // currenciesWithoutTerritories.removeAll(currenciesWithTerritories);
1879             // first = true;
1880             // for (Iterator it = currenciesWithoutTerritories.iterator(); it.hasNext();) {
1881             // if (first) first = false;
1882             // else pw.print(", ");
1883             // pw.print(english.getName(CLDRFile.CURRENCY_NAME, it.next().toString(), false));
1884             // }
1885             // pw.println("</td></tr>");
1886             // doFooter(pw);
1887             // }
1888         }
1889 
getTerritoryName(String territory)1890         private String getTerritoryName(String territory) {
1891             String name;
1892             name = english.getName("territory", territory);
1893             if (name == null) {
1894                 name = sc.getData("territory", territory);
1895             }
1896             if (name != null) {
1897                 return TransliteratorUtilities.toHTML.transform(name) + " (" + territory + ")";
1898             } else {
1899                 return territory;
1900             }
1901         }
1902 
1903         /**
1904          * @throws IOException
1905          *
1906          */
printAliases(PrintWriter index)1907         public void printAliases(PrintWriter index) throws IOException {
1908             PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Aliases", null, SUPPLEMENTAL_INDEX_ANCHORS));
1909 
1910             // doTitle(pw, "Aliases");
1911             pw.println("<table>");
1912             pw.println("<tr><th class='source'>" + "Type" + "</th>" +
1913                 "<th class='source'>" + "Code" + "</th>" +
1914                 "<th class='target'>" + "Reason" + "</th>" +
1915                 "<th class='target'>" + "Substitute (if available)" + "</th></tr>");
1916             for (Iterator<String[]> it = aliases.iterator(); it.hasNext();) {
1917                 String[] items = it.next();
1918                 pw.println("<tr><td class='source'>" + items[0] + "</td>" +
1919                     "<td class='source'>" + CldrUtility.getDoubleLinkedText(items[1]) + "</td>" +
1920                     "<td class='target'>" + items[3] + "</td>" +
1921                     "<td class='target'>" + items[2] + "</td></tr>");
1922             }
1923             // doFooter(pw);
1924             pw.println("</table>");
1925             pw.close();
1926         }
1927 
1928         // deprecatedItems
1929         // public void printDeprecatedItems(PrintWriter pw) {
1930         // doTitle(pw, "Deprecated Items");
1931         // pw.print("<tr><td class='z0'><b>Type</b></td><td class='z1'><b>Elements</b></td><td class='z2'><b>Attributes</b></td><td class='z4'><b>Values</b></td>");
1932         // for (Iterator it = deprecatedItems.iterator(); it.hasNext();) {
1933         // Map source = (Map)it.next();
1934         // Object item;
1935         // pw.print("<tr>");
1936         // pw.print("<td class='z0'>" + ((item = source.get("type")) != null ? item : "<i>any</i>") + "</td>");
1937         // pw.print("<td class='z1'>" + ((item = source.get("elements")) != null ? item : "<i>any</i>") + "</td>");
1938         // pw.print("<td class='z2'>" + ((item = source.get("attributes")) != null ? item : "<i>any</i>") + "</td>");
1939         // pw.print("<td class='z4'>" + ((item = source.get("values")) != null ? item : "<i>any</i>") + "</td>");
1940         // pw.print("</tr>");
1941         // }
1942         // doFooter(pw);
1943         // }
1944 
printWindows_Tzid(PrintWriter index)1945         public void printWindows_Tzid(PrintWriter index) throws IOException {
1946             Map<String, Map<String, Map<String, String>>> zoneMapping = supplementalDataInfo
1947                 .getTypeToZoneToRegionToZone();
1948             PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, "Zone \u2192 Tzid", null, SUPPLEMENTAL_INDEX_ANCHORS));
1949             for (Entry<String, Map<String, Map<String, String>>> typeAndZoneToRegionToZone : zoneMapping.entrySet()) {
1950                 String type = typeAndZoneToRegionToZone.getKey();
1951                 Map<String, Map<String, String>> zoneToRegionToZone = typeAndZoneToRegionToZone.getValue();
1952                 pw.println("<br><h1>Mapping for: " + type + "</h1><br>");
1953                 // doTitle(pw, "Windows \u2192 Tzid");
1954                 pw.println("<table>");
1955                 pw.println("<tr><th class='source'>" + type + "</th><th class='source'>" + "Region"
1956                     + "</th><th class='target'>" + "TZID" + "</th></tr>");
1957 
1958                 for (Entry<String, Map<String, String>> zoneAndregionToZone : zoneToRegionToZone.entrySet()) {
1959                     String source = zoneAndregionToZone.getKey();
1960                     Map<String, String> regionToZone = zoneAndregionToZone.getValue();
1961                     for (Entry<String, String> regionAndZone : regionToZone.entrySet()) {
1962                         String region = regionAndZone.getKey();
1963                         String target = regionAndZone.getValue();
1964                         if (region == null) region = "<i>any</a>";
1965                         pw.println("<tr><td class='source'>" + source + "</td><td class='source'>" + region
1966                             + "</td><td class='target'>" + target + "</td></tr>");
1967                     }
1968                 }
1969                 // doFooter(pw);
1970                 pw.println("</table>");
1971             }
1972             pw.close();
1973         }
1974 
1975         // <info iso4217="ADP" digits="0" rounding="0"/>
1976 
printCharacters(PrintWriter index)1977         public void printCharacters(PrintWriter index) throws IOException {
1978             String title = "Character Fallback Substitutions";
1979 
1980             PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS));
1981             // doTitle(pw, title);
1982             pw.println("<table>");
1983 
1984             pw.println(
1985                 "<tr><th colSpan='3'>Substitute for character (if not in repertoire)</th><th colSpan='4'>The following (in priority order, first string that <i>is</i> in repertoire)</th></tr>");
1986             UnicodeSet chars = new UnicodeSet("[:NFKC_QuickCheck=N:]");
1987             for (com.ibm.icu.text.UnicodeSetIterator it = new com.ibm.icu.text.UnicodeSetIterator(chars); it.next();) {
1988                 String value = it.getString();
1989                 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFC));
1990                 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFKC));
1991             }
1992             int[] counts = new int[4];
1993             for (Iterator<String> it = charSubstitutions.keySet().iterator(); it.hasNext();) {
1994                 String value = it.next();
1995                 LinkedHashSet<String> substitutes = charSubstitutions.get(value);
1996                 String nfc = Normalizer.normalize(value, Normalizer.NFC);
1997                 String nfkc = Normalizer.normalize(value, Normalizer.NFKC);
1998 
1999                 String sourceTag = "<td class='source'>";
2000                 if (substitutes.size() > 1) {
2001                     sourceTag = "<td class='source' rowSpan='" + substitutes.size() + "'>";
2002                 }
2003                 boolean first = true;
2004                 for (Iterator<String> it2 = substitutes.iterator(); it2.hasNext();) {
2005                     String substitute = it2.next();
2006                     String type = "Explicit";
2007                     String targetTag = "<td class='target3'>";
2008                     if (substitute.equals(nfc)) {
2009                         type = "NFC";
2010                         targetTag = "<td class='target'>";
2011                         counts[2]++;
2012                     } else if (substitute.equals(nfkc)) {
2013                         type = "NFKC";
2014                         targetTag = "<td class='target4'>";
2015                         counts[3]++;
2016                     } else {
2017                         counts[0]++;
2018                     }
2019                     pw.println("<tr>"
2020                         + (!first ? "" : sourceTag + hex(value, ", ") + "</td>" + sourceTag
2021                             + TransliteratorUtilities.toHTML.transliterate(value) + "</td>" + sourceTag
2022                             + UCharacter.getName(value, ", ")
2023                             + "</td>")
2024                         + targetTag + type + "</td>" + targetTag + hex(substitute, ", ") + "</td>"
2025                         + targetTag + TransliteratorUtilities.toHTML.transliterate(substitute) + "</td>" + targetTag
2026                         + UCharacter.getName(substitute, ", ") + "</td></tr>");
2027                     first = false;
2028                 }
2029             }
2030             // doFooter(pw);
2031             pw.println("</table>");
2032 
2033             pw.close();
2034             for (int i = 0; i < counts.length; ++i) {
2035                 System.out.println("Count\t" + i + "\t" + counts[i]);
2036             }
2037         }
2038 
hex(String s, String separator)2039         public static String hex(String s, String separator) {
2040             StringBuffer result = new StringBuffer();
2041             int cp;
2042             for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
2043                 cp = UTF16.charAt(s, i);
2044                 if (i != 0)
2045                     result.append(separator);
2046                 result.append(com.ibm.icu.impl.Utility.hex(cp));
2047             }
2048             return result.toString();
2049         }
2050 
2051         /**
2052          *
2053          */
2054         // private PrintWriter doTitle(PrintWriter pw, String title) {
2055         // //String anchor = FileUtilities.anchorize(title);
2056         // pw.println("<div align='center'><table>");
2057         // //anchors.put(title, anchor);
2058         // //PrintWriter result = null;
2059         // //return result;
2060         // }
2061 
2062         // private void doFooter(PrintWriter pw) {
2063         // pw.println("</table></div>");
2064         // }
printContains2(PrintWriter pw, String lead, String start, int depth, boolean isFirst)2065         public void printContains2(PrintWriter pw, String lead, String start, int depth, boolean isFirst) {
2066             String name = depth == 4 ? start : getName(CLDRFile.TERRITORY_NAME, start, false);
2067             if (!isFirst)
2068                 pw.print(lead);
2069             int count = getTotalContainedItems(start, depth);
2070             pw.print("<td class='z" + depth + "' rowSpan='" + count + "'>" + name + "</td>"); // colSpan='" + (5 -
2071             // depth) + "'
2072             if (depth == 4)
2073                 pw.println("</tr>");
2074             Collection<String> contains = getContainedCollection(start, depth);
2075             if (contains != null) {
2076                 Collection<String> contains2 = new TreeSet<String>(territoryNameComparator);
2077                 contains2.addAll(contains);
2078                 boolean first = true;
2079                 for (Iterator<String> it = contains2.iterator(); it.hasNext();) {
2080                     String item = it.next();
2081                     printContains2(pw, lead, item, depth + 1, first); // + "<td>&nbsp;</td>"
2082                     first = false;
2083                 }
2084             }
2085         }
2086 
getTotalContainedItems(String start, int depth)2087         private int getTotalContainedItems(String start, int depth) {
2088             Collection<String> c = getContainedCollection(start, depth);
2089             if (c == null)
2090                 return 1;
2091             int sum = 0;
2092             for (Iterator<String> it = c.iterator(); it.hasNext();) {
2093                 sum += getTotalContainedItems(it.next(), depth + 1);
2094             }
2095             return sum;
2096         }
2097 
2098         /**
2099          *
2100          */
getContainedCollection(String start, int depth)2101         private Collection<String> getContainedCollection(String start, int depth) {
2102             Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start);
2103             if (contains == null) {
2104                 contains = sc.getCountryToZoneSet().get(start);
2105                 if (contains == null && depth == 3) {
2106                     contains = new TreeSet<>();
2107                     if (start.compareTo("A") >= 0) {
2108                         contains.add("<font color='red'>MISSING TZID</font>");
2109                     } else {
2110                         contains.add("<font color='red'>Not yet ISO code</font>");
2111                     }
2112                 }
2113             }
2114             return contains;
2115         }
2116 
2117         /**
2118          * @param table
2119          *            TODO
2120          *
2121          */
printMissing(PrintWriter pw, int source, int table)2122         public void printMissing(PrintWriter pw, int source, int table) {
2123             Set<String> missingItems = new HashSet<>();
2124             String type = null;
2125             if (source == CLDRFile.TERRITORY_NAME) {
2126                 type = "territory";
2127                 missingItems.addAll(sc.getAvailableCodes(type));
2128                 missingItems.removeAll(territory_languages.keySet());
2129                 missingItems.removeAll(supplementalDataInfo.getContainmentCore().keySet());
2130                 missingItems.remove("200"); // czechoslovakia
2131             } else if (source == CLDRFile.SCRIPT_NAME) {
2132                 type = "script";
2133                 missingItems.addAll(sc.getAvailableCodes(type));
2134                 missingItems.removeAll(script_languages.keySet());
2135             } else if (source == CLDRFile.LANGUAGE_NAME) {
2136                 type = "language";
2137                 missingItems.addAll(sc.getAvailableCodes(type));
2138                 if (table == CLDRFile.SCRIPT_NAME)
2139                     missingItems.removeAll(language_scripts.keySet());
2140                 if (table == CLDRFile.TERRITORY_NAME)
2141                     missingItems.removeAll(language_territories.keySet());
2142             } else {
2143                 throw new IllegalArgumentException("Illegal code");
2144             }
2145             Set<String> missingItemsNamed = new TreeSet<String>(col);
2146             for (Iterator<String> it = missingItems.iterator(); it.hasNext();) {
2147                 String item = it.next();
2148                 List<String> data = sc.getFullData(type, item);
2149                 if (data.get(0).equals("PRIVATE USE"))
2150                     continue;
2151                 if (data.size() < 3)
2152                     continue;
2153                 if (!"".equals(data.get(2)))
2154                     continue;
2155 
2156                 String itemName = getName(source, item, true);
2157                 missingItemsNamed.add(itemName);
2158             }
2159             pw.println("<div align='center'><table>");
2160             for (Iterator<String> it = missingItemsNamed.iterator(); it.hasNext();) {
2161                 pw.println("<tr><td class='target'>" + it.next() + "</td></tr>");
2162             }
2163             pw.println("</table></div>");
2164         }
2165 
2166         // source, eg english.TERRITORY_NAME
2167         // target, eg english.LANGUAGE_NAME
print(PrintWriter pw, int source, int target)2168         public void print(PrintWriter pw, int source, int target) {
2169             Multimap<String, String> data = source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages
2170                 : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories
2171                     : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages
2172                         : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts
2173                             : null;
2174             // transform into names, and sort
2175             Map<String, Set<String>> territory_languageNames = new TreeMap<String, Set<String>>(col);
2176             for (Iterator<String> it = data.keySet().iterator(); it.hasNext();) {
2177                 String territory = it.next();
2178                 String territoryName = getName(source, territory, true);
2179                 Set<String> s = territory_languageNames.get(territoryName);
2180                 if (s == null)
2181                     territory_languageNames.put(territoryName, s = new TreeSet<String>(col));
2182                 for (Iterator<String> it2 = data.get(territory).iterator(); it2.hasNext();) {
2183                     String language = it2.next();
2184                     String languageName = getName(target, language, true);
2185                     s.add(languageName);
2186                 }
2187             }
2188 
2189             pw.println("<div align='center'><table>");
2190 
2191             for (Iterator<String> it = territory_languageNames.keySet().iterator(); it.hasNext();) {
2192                 String territoryName = it.next();
2193                 pw.println("<tr><td class='source' colspan='2'>" + territoryName + "</td></tr>");
2194                 Set<String> s = territory_languageNames.get(territoryName);
2195                 for (Iterator<String> it2 = s.iterator(); it2.hasNext();) {
2196                     String languageName = it2.next();
2197                     pw.println("<tr><td>&nbsp;</td><td class='target'>" + languageName + "</td></tr>");
2198                 }
2199             }
2200             pw.println("</table></div>");
2201 
2202         }
2203 
2204         /**
2205          * @param codeFirst
2206          *            TODO
2207          *
2208          */
getName(int type, String oldcode, boolean codeFirst)2209         private String getName(int type, String oldcode, boolean codeFirst) {
2210             if (oldcode.contains(" ")) {
2211                 String[] result = oldcode.split("\\s+");
2212                 for (int i = 0; i < result.length; ++i) {
2213                     result[i] = getName(type, result[i], codeFirst);
2214                 }
2215                 return CldrUtility.join(Arrays.asList(result), ", ");
2216             } else {
2217                 int pos = oldcode.indexOf('*');
2218                 String code = pos < 0 ? oldcode : oldcode.substring(0, pos);
2219                 String ename = english.getName(type, code);
2220                 String nameString = ename == null ? code : ename;
2221                 return nameString.equals(oldcode) ? nameString
2222                     : codeFirst ? "[" + oldcode + "]" + "\t" + nameString
2223                         : nameString + "\t" + "[" + oldcode + "]";
2224             }
2225         }
2226 
2227         private String getName(String locale, boolean codeFirst) {
2228             String ename = getLanguageName(locale);
2229             return codeFirst ? "[" + locale + "]\t" + (ename == null ? locale : ename) : (ename == null ? locale
2230                 : ename) + "\t[" + locale + "]";
2231         }
2232 
2233         Comparator territoryNameComparator = new Comparator() {
2234             @Override
2235             public int compare(Object o1, Object o2) {
2236                 return col.compare(getName(CLDRFile.TERRITORY_NAME, (String) o1, false),
2237                     getName(CLDRFile.TERRITORY_NAME, (String) o2, false));
2238             }
2239         };
2240 
2241         static String[] stringArrayPattern = new String[0];
2242         static String[][] string2ArrayPattern = new String[0][];
2243 
2244         public static Map<String, String> territoryAliases = new HashMap<>();
2245 
2246         public void printContains(PrintWriter index) throws IOException {
2247             String title = "Territory Containment (UN M.49)";
2248 
2249             PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS));
2250             // doTitle(pw, title);
2251             List<String[]> rows = new ArrayList<>();
2252             printContains3("001", rows, new ArrayList<String>());
2253             TablePrinter tablePrinter = new TablePrinter()
2254                 .addColumn("World", "class='source'", null, "class='z0'", true).setSortPriority(0)
2255                 .addColumn("Continent", "class='source'", null, "class='z1'", true).setSortPriority(1)
2256                 .addColumn("Subcontinent", "class='source'", null, "class='z2'", true).setSortPriority(2)
2257                 .addColumn("Country (Territory)", "class='source'", null, "class='z3'", true).setSortPriority(3)
2258                 .addColumn("Time Zone", "class='source'", null, "class='z4'", true).setSortPriority(4);
2259             String[][] flatData = rows.toArray(string2ArrayPattern);
2260             pw.println(tablePrinter.addRows(flatData).toTable());
2261 
2262             showSubtable(pw, ContainmentStyle.grouping, "Groupings", "Grouping", "Contained Regions");
2263             showSubtable(pw, ContainmentStyle.deprecated, "Deprecated", "Container", "Deprecated Region");
2264 
2265 //            Relation<String, String> deprecated = supplementalDataInfo
2266 //                .getTerritoryToContained(ContainmentStyle.deprecated);
2267 //
2268 //            for (String region : deprecated.keySet()) {
2269 //                nameToContainers.add(region);
2270 //            }
2271 //            pw.println("<h2>Groupings and Deprecated Regions</h2>");
2272 //            for (String region : nameToContainers) {
2273 //                String name = getName(CLDRFile.TERRITORY_NAME, region, false);
2274 //                Set<String> dep = deprecated.get(region);
2275 //                Set<String> gro = grouping.get(region);
2276 //                Iterator<String> depIt = (dep == null ? Collections.EMPTY_SET : dep).iterator();
2277 //                Iterator<String> groIt = (gro == null ? Collections.EMPTY_SET : gro).iterator();
2278 //                while (depIt.hasNext() || groIt.hasNext()) {
2279 //                    String dep1 = depIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, depIt.next(), false) : "";
2280 //                    String gro1 = groIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, groIt.next(), false) : "";
2281 //                    tablePrinter2.addRow()
2282 //                    .addCell(name)
2283 //                    .addCell(gro1)
2284 //                    .addCell(dep1)
2285 //                    .finishRow();
2286 //                }
2287 //            }
2288 //            pw.println(tablePrinter2.toTable());
2289 //            pw.println("<h2>Other Groupings</h2>");
2290 //            for (Entry<String, Set<String>> regionContained : grouping.keyValuesSet()) {
2291 //                showContainers(pw, regionContained);
2292 //            }
2293 //
2294 //            pw.println("<h2>Deprecated Codes</h2>");
2295 //            for (Entry<String, Set<String>> regionContained : deprecated.keyValuesSet()) {
2296 //                showContainers(pw, regionContained);
2297 //            }
2298             pw.close();
2299         }
2300 
2301         public void showSubtable(PrintWriter pw, ContainmentStyle containmentStyle, String title, String containerTitle, String containeeTitle) {
2302             pw.println("<h2>" +
2303                 title +
2304                 "</h2>");
2305             TablePrinter tablePrinter2 = new TablePrinter()
2306                 .addColumn(containerTitle, "class='source'", null, "class='z0'", true).setSortPriority(0)
2307                 .addColumn(containeeTitle, "class='source'", null, "class='z4'", true).setSortPriority(1);
2308 
2309             Relation<String, String> grouping = supplementalDataInfo
2310                 .getTerritoryToContained(containmentStyle);
2311 
2312             for (Entry<String, String> containerRegion : grouping.keyValueSet()) {
2313                 String container = getName(CLDRFile.TERRITORY_NAME, containerRegion.getKey(), false);
2314                 String containee = getName(CLDRFile.TERRITORY_NAME, containerRegion.getValue(), false);
2315                 tablePrinter2.addRow()
2316                 .addCell(container)
2317                 .addCell(containee)
2318                 .finishRow();
2319             }
2320             pw.println(tablePrinter2.toTable());
2321         }
2322 
2323         public void showContainers(PrintWriter pw, Entry<String, Set<String>> regionContained) {
2324             String region = regionContained.getKey();
2325             Set<String> contained = regionContained.getValue();
2326             pw.println("<ul><li>" + getName(CLDRFile.TERRITORY_NAME, region, false) + "<ul>");
2327             for (String sub : contained) {
2328                 pw.println("<li>" + getName(CLDRFile.TERRITORY_NAME, sub, false) + "</li>");
2329             }
2330             pw.println("</ul></li></ul>");
2331         }
2332 
2333         private void printContains3(String start, List<String[]> rows, ArrayList<String> currentRow) {
2334             int len = currentRow.size();
2335             if (len > 3) {
2336                 return; // skip long items
2337             }
2338             currentRow.add(getName(CLDRFile.TERRITORY_NAME, start, false));
2339             //Collection<String> contains = (Collection<String>) group_contains.get(start);
2340             Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start);
2341             if (contains == null) {
2342                 contains = sc.getCountryToZoneSet().get(start);
2343                 currentRow.add("");
2344                 if (contains == null) {
2345                     currentRow.set(len + 1, "???");
2346                     rows.add(currentRow.toArray(stringArrayPattern));
2347                 } else {
2348                     for (String item : contains) {
2349                         currentRow.set(len + 1, item);
2350                         rows.add(currentRow.toArray(stringArrayPattern));
2351                     }
2352                 }
2353                 currentRow.remove(len + 1);
2354             } else {
2355                 for (String item : contains) {
2356                     if (territoryAliases.keySet().contains(item)) {
2357                         continue;
2358                     }
2359                     printContains3(item, rows, currentRow);
2360                 }
2361             }
2362             currentRow.remove(len);
2363         }
2364 
2365     }
2366 
2367     /**
2368      *
2369      */
getInverse(Map<String, Set<String>> language_territories)2370     private static Map<String, Set<String>> getInverse(Map<String, Set<String>> language_territories) {
2371         // get inverse relation
2372         Map<String, Set<String>> territory_languages = new TreeMap<>();
2373         for (Iterator<String> it = language_territories.keySet().iterator(); it.hasNext();) {
2374             String language = it.next();
2375             Set<String> territories = language_territories.get(language);
2376             for (Iterator<String> it2 = territories.iterator(); it2.hasNext();) {
2377                 String territory = it2.next();
2378                 Set<String> languages = territory_languages.get(territory);
2379                 if (languages == null)
2380                     territory_languages.put(territory, languages = new TreeSet<String>(col));
2381                 languages.add(language);
2382             }
2383         }
2384         return territory_languages;
2385 
2386     }
2387 
2388     static final Map<String, String> NAME_TO_REGION = getNameToCode(CodeType.territory, "region");
2389     static final Map<String, String> NAME_TO_CURRENCY = getNameToCode(CodeType.currency, "currency");
2390 
getNameToCode(CodeType codeType, String cldrCodeType)2391     private static SortedMap<String, String> getNameToCode(CodeType codeType, String cldrCodeType) {
2392         SortedMap<String, String> temp = new TreeMap<String, String>(col);
2393         for (String territory : StandardCodes.make().getAvailableCodes(codeType)) {
2394             String name = english.getName(cldrCodeType, territory);
2395             temp.put(name == null ? territory : name, territory);
2396         }
2397         temp = Collections.unmodifiableSortedMap(temp);
2398         return temp;
2399     }
2400 
2401     /**
2402      * @param value_delimiter
2403      *            TODO
2404      *
2405      */
addTokens(String key, String values, String value_delimiter, Map<String, Set<String>> key_value)2406     private static void addTokens(String key, String values, String value_delimiter, Map<String, Set<String>> key_value) {
2407         if (values != null) {
2408             Set<String> s = key_value.get(key);
2409             if (s == null)
2410                 key_value.put(key, s = new TreeSet<String>(col));
2411             s.addAll(Arrays.asList(values.split(value_delimiter)));
2412         }
2413     }
2414 
addTokens(String key, String values, String value_delimiter, Multimap<String, String> key_value)2415     private static void addTokens(String key, String values, String value_delimiter, Multimap<String, String> key_value) {
2416         if (values != null) {
2417             key_value.putAll(key, Arrays.asList(values.split(value_delimiter)));
2418         }
2419     }
2420 
showContents(Appendable pw, String... items)2421     public static void showContents(Appendable pw, String... items) {
2422         try {
2423             pw.append("</div>" + System.lineSeparator());
2424             pw.append("<h3>Contents</h3>" + System.lineSeparator());
2425             pw.append("<ol>" + System.lineSeparator());
2426             for (int i = 0; i < items.length; i += 2) {
2427                 pw.append("<li><a href='#" + items[i] + "'>" + items[i + 1] + "</a></li>" + System.lineSeparator());
2428             }
2429             pw.append("</ol><hr>" + System.lineSeparator());
2430 
2431             pw.append("<div align='center'>" + System.lineSeparator());
2432         } catch (IOException e) {
2433             throw new ICUUncheckedIOException(e);
2434         }
2435     }
2436 
2437 }
2438