• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.Collection;
9 import java.util.Collections;
10 import java.util.EnumMap;
11 import java.util.EnumSet;
12 import java.util.HashMap;
13 import java.util.HashSet;
14 import java.util.Iterator;
15 import java.util.LinkedHashMap;
16 import java.util.LinkedHashSet;
17 import java.util.List;
18 import java.util.Locale;
19 import java.util.Map;
20 import java.util.Map.Entry;
21 import java.util.Set;
22 import java.util.TreeMap;
23 import java.util.TreeSet;
24 import java.util.regex.Matcher;
25 import java.util.stream.Collectors;
26 
27 import org.unicode.cldr.draft.FileUtilities;
28 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
29 import org.unicode.cldr.tool.Option.Options;
30 import org.unicode.cldr.util.CLDRConfig;
31 import org.unicode.cldr.util.CLDRFile;
32 import org.unicode.cldr.util.CLDRFile.DraftStatus;
33 import org.unicode.cldr.util.CLDRFile.Status;
34 import org.unicode.cldr.util.CLDRInfo.CandidateInfo;
35 import org.unicode.cldr.util.CLDRInfo.PathValueInfo;
36 import org.unicode.cldr.util.CLDRInfo.UserInfo;
37 import org.unicode.cldr.util.CLDRLocale;
38 import org.unicode.cldr.util.CLDRPaths;
39 import org.unicode.cldr.util.CldrUtility;
40 import org.unicode.cldr.util.CoreCoverageInfo;
41 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems;
42 import org.unicode.cldr.util.Counter;
43 import org.unicode.cldr.util.Counter2;
44 import org.unicode.cldr.util.CoverageInfo;
45 import org.unicode.cldr.util.DtdType;
46 import org.unicode.cldr.util.LanguageTagCanonicalizer;
47 import org.unicode.cldr.util.LanguageTagParser;
48 import org.unicode.cldr.util.Level;
49 import org.unicode.cldr.util.Organization;
50 import org.unicode.cldr.util.PathHeader;
51 import org.unicode.cldr.util.PathHeader.Factory;
52 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
53 import org.unicode.cldr.util.PathStarrer;
54 import org.unicode.cldr.util.PatternCache;
55 import org.unicode.cldr.util.RegexLookup;
56 import org.unicode.cldr.util.RegexLookup.LookupType;
57 import org.unicode.cldr.util.SimpleFactory;
58 import org.unicode.cldr.util.StandardCodes;
59 import org.unicode.cldr.util.SupplementalDataInfo;
60 import org.unicode.cldr.util.VettingViewer;
61 import org.unicode.cldr.util.VettingViewer.MissingStatus;
62 import org.unicode.cldr.util.VoteResolver.VoterInfo;
63 
64 import com.google.common.base.Joiner;
65 import com.google.common.collect.ImmutableList;
66 import com.google.common.collect.ImmutableMultimap;
67 import com.google.common.collect.ImmutableSet;
68 import com.google.common.collect.Lists;
69 import com.google.common.collect.Multimap;
70 import com.google.common.collect.Ordering;
71 import com.google.common.collect.TreeMultimap;
72 import com.ibm.icu.impl.Relation;
73 import com.ibm.icu.text.NumberFormat;
74 import com.ibm.icu.util.ULocale;
75 import com.ibm.icu.util.VersionInfo;
76 
77 public class ShowLocaleCoverage {
78     // thresholds for measuring Level attainment
79     private static final double BASIC_THRESHOLD = 1;
80     private static final double MODERATE_THRESHOLD = 0.995;
81     private static final double MODERN_THRESHOLD = 0.995;
82 
83     // used to show higher level in properties file
84     private static final double THRESHOLD_HIGHER = 0.90d;
85 
86     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
87     private static final String TSV_MISSING_SUMMARY_HEADER =
88         "#Path Level"
89             + "\t#Locales"
90             + "\tLocales"
91             + "\tSection"
92             + "\tPage"
93             + "\tHeader"
94             + "\tCode"
95             ;
96 
97     private static final String TSV_MISSING_HEADER =
98         "#LCode"
99             + "\tEnglish Name"
100             + "\tScript"
101             + "\tLocale Level"
102             + "\tPath Level"
103             + "\tSTStatus"
104             + "\tBailey"
105             + "\tSection"
106             + "\tPage"
107             + "\tHeader"
108             + "\tCode"
109             + "\tST Link"
110             ;
111 
112     private static final String PROPERTIES_HEADER =
113         // Extra tabs are for github table formatting
114         "#\tcoverageLevels.txt\t\n"
115         + "#\tCopyright © 2022 Unicode, Inc.\n"
116         + "#\tCLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)\n"
117         + "#\tFor terms of use, see http://www.unicode.org/copyright.html\n"
118         + "#\t\n"
119         + "#\tProvides the Coverage Level of locales at Basic or Above.\n"
120         + "#\tFor more info,see the Locale Coverage Chart for this version.\n"
121         + "#\tGenerated by ShowLocaleCoverage.\n"
122         + "#\t\n"
123         + "#Locale"
124         + " ;\tLevel"
125 //            + " ;\t% of Higher"
126 //            + " ;\tHigher Level"
127 ;
128 
129     private static final String TSV_MISSING_BASIC_HEADER = "#Locale\tProv.\tUnconf.\tMissing\tPath*";
130     private static final String TSV_MISSING_COUNTS_HEADER = "#Locale\tTargetLevel\t№ Found\t№ Unconfirmed\t№ Missing";
131 
132     private static final boolean DEBUG = true;
133     private static final char DEBUG_FILTER = 0; // use letter to only load locales starting with that letter
134 
135     private static final String LATEST = ToolConstants.CHART_VERSION;
136     public static CLDRConfig testInfo = ToolConfig.getToolInstance();
137     private static final StandardCodes SC = StandardCodes.make();
138     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo.getSupplementalDataInfo();
139     private static final StandardCodes STANDARD_CODES = SC;
140 
141     static org.unicode.cldr.util.Factory factory = testInfo.getCommonAndSeedAndMainAndAnnotationsFactory();
142     private static final CLDRFile ENGLISH = factory.make("en", true);
143 
144     // added info using pattern in VettingViewer.
145 
146     static final RegexLookup<Boolean> HACK = RegexLookup.<Boolean> of(LookupType.STANDARD, RegexLookup.RegexFinderTransformPath)
147         .add("//ldml/localeDisplayNames/keys/key[@type=\"(d0|em|fw|i0|k0|lw|m0|rg|s0|ss|t0|x0)\"]", true)
148         .add("//ldml/localeDisplayNames/types/type[@key=\"(em|fw|kr|lw|ss)\"].*", true)
149         .add("//ldml/localeDisplayNames/languages/language[@type=\".*_.*\"]", true)
150         .add("//ldml/localeDisplayNames/languages/language[@type=\".*\"][@alt=\".*\"]", true)
151         .add("//ldml/localeDisplayNames/territories/territory[@type=\".*\"][@alt=\".*\"]", true)
152         .add("//ldml/localeDisplayNames/territories/territory[@type=\"EZ\"]", true);
153 
154     //private static final String OUT_DIRECTORY = CLDRPaths.GEN_DIRECTORY + "/coverage/"; // CldrUtility.MAIN_DIRECTORY;
155 
156     final static Options myOptions = new Options();
157 
158     enum MyOptions {
159         filter(".+", ".*", "Filter the information based on id, using a regex argument."),
160         //        draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft status."),
161         chart(null, null, "chart only"),
162         growth("true", "true", "Compute growth data"),
163         organization(".+", null, "Only locales for organization"),
164         version(".+",
165             LATEST, "To get different versions"),
166         rawData(null, null, "Output the raw data from all coverage levels"),
167         targetDir(".*",
168             CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."),
169         directories("(.*:)?[a-z]+(,[a-z]+)*", "common",
170             "Space-delimited list of main source directories: common,seed,exemplar.\n" +
171             "Optional, <baseDir>:common,seed"),;
172 
173         // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target directory."),
174         // layouts(null, null, "Only create html files for keyboard layouts"),
175         // repertoire(null, null, "Only create html files for repertoire"), ;
176         // boilerplate
177         final Option option;
178 
MyOptions(String argumentPattern, String defaultArgument, String helpText)179         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
180             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
181         }
182     }
183 
184     static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY = new RegexLookup<Boolean>()
185         .add("\\[@alt=\"accounting\"]", true)
186         .add("\\[@alt=\"variant\"]", true)
187         .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true)
188         .add("^//ldml/localeDisplayNames/languages/language.*_", true)
189         .add("^//ldml/numbers/currencies/currency.*/symbol", true)
190         .add("^//ldml/characters/exemplarCharacters", true);
191 
192     static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed;
193     static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH);
194 
195     static boolean RAW_DATA = true;
196     private static Set<String> COMMON_LOCALES;
197 
198 
199     static class StatusData {
200         int missing;
201         int provisional;
202         int unconfirmed;
203     }
204     static class StatusCounter {
205         PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*");
206         Map<String, StatusData> starredPathToData = new TreeMap<>();
207         int missingTotal;
208         int provisionalTotal;
209         int unconfirmedTotal;
210 
gatherStarred(String path, DraftStatus draftStatus)211         public void gatherStarred(String path, DraftStatus draftStatus) {
212             String starredPath = pathStarrer.set(path);
213             StatusData statusData = starredPathToData.get(starredPath);
214             if (statusData == null) {
215                 starredPathToData.put(starredPath, statusData = new StatusData());
216             }
217             if (draftStatus == null) {
218                 ++statusData.missing;
219                 ++missingTotal;
220             } else switch(draftStatus) {
221             case unconfirmed:
222                 ++statusData.unconfirmed;
223                 ++unconfirmedTotal;
224                 break;
225             case provisional:
226                 ++statusData.provisional;
227                 ++provisionalTotal;
228                 break;
229             default:
230                 break;
231             }
232         }
233     }
234 
main(String[] args)235     public static void main(String[] args) throws IOException {
236         myOptions.parse(MyOptions.filter, args, true);
237 
238         Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher("");
239 
240         if (MyOptions.chart.option.doesOccur()) {
241             showCoverage(null, matcher);
242             return;
243         }
244 
245 
246         if (MyOptions.growth.option.doesOccur()) {
247             try (PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-growth.tsv")) {
248                 doGrowth(matcher, out);
249                 return;
250             }
251         }
252 
253         Set<String> locales = null;
254         String organization = MyOptions.organization.option.getValue();
255         boolean useOrgLevel = MyOptions.organization.option.doesOccur();
256         if (useOrgLevel) {
257             locales = STANDARD_CODES.getLocaleCoverageLocales(organization);
258         }
259 
260         if (MyOptions.version.option.doesOccur()) {
261             String number = MyOptions.version.option.getValue().trim();
262             if (!number.contains(".")) {
263                 number += ".0";
264             }
265             factory = org.unicode.cldr.util.Factory.make(
266                 CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*");
267         } else {
268             if (MyOptions.directories.option.doesOccur()) {
269                 String directories = MyOptions.directories.option.getValue().trim();
270                 CLDRConfig cldrConfig = CONFIG;
271                 String base = null;
272                 int colonPos = directories.indexOf(':');
273                 if (colonPos >= 0) {
274                     base = directories.substring(0, colonPos).trim();
275                     directories = directories.substring(colonPos + 1).trim();
276                 } else {
277                     base = cldrConfig.getCldrBaseDirectory().toString();
278                 }
279                 String[] items = directories.split(",\\s*");
280                 File[] fullDirectories = new File[items.length];
281                 int i = 0;
282                 for (String item : items) {
283                     fullDirectories[i++] = new File(base + "/" + item + "/main");
284                 }
285                 factory = SimpleFactory.make(fullDirectories, ".*");
286                 COMMON_LOCALES = SimpleFactory.make(base + "/" + "common" + "/main", ".*").getAvailableLanguages();
287             }
288         }
289         fixCommonLocales();
290 
291         RAW_DATA = MyOptions.rawData.option.doesOccur();
292 
293         //showEnglish();
294 
295         showCoverage(null, matcher, locales, useOrgLevel);
296     }
297 
fixCommonLocales()298     public static void fixCommonLocales() {
299         if (COMMON_LOCALES == null) {
300             COMMON_LOCALES = factory.getAvailableLanguages();
301         }
302     }
303 
doGrowth(Matcher matcher, PrintWriter out)304     private static void doGrowth(Matcher matcher, PrintWriter out) {
305         TreeMap<String, List<Double>> growthData = new TreeMap<>(Ordering.natural().reverse()); // sort by version, descending
306 //        if (DEBUG) {
307 //            for (String dir : new File(CLDRPaths.ARCHIVE_DIRECTORY).list()) {
308 //                if (!dir.startsWith("cldr")) {
309 //                    continue;
310 //                }
311 //                String version = getNormalizedVersion(dir);
312 //                if (version == null) {
313 //                    continue;
314 //                }
315 //                org.unicode.cldr.util.Factory newFactory = org.unicode.cldr.util.Factory.make(
316 //                    CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/main/", ".*");
317 //                System.out.println("Reading: " + version);
318 //                Map<String, FoundAndTotal> currentData = addGrowth(newFactory, matcher);
319 //                System.out.println("Read: " + version + "\t" + currentData);
320 //                break;
321 //            }
322 //        }
323         Map<String, FoundAndTotal> latestData = null;
324         for (ReleaseInfo versionNormalizedVersionAndYear : versionToYear) {
325             VersionInfo version = versionNormalizedVersionAndYear.version;
326             int year = versionNormalizedVersionAndYear.year;
327             String dir = ToolConstants.getBaseDirectory(version.getVersionString(2, 3));
328             Map<String, FoundAndTotal> currentData = addGrowth(factory, dir, matcher, false);
329             long found = 0;
330             long total = 0;
331             for (Entry<String, FoundAndTotal> entry : currentData.entrySet()) {
332                 found += entry.getValue().found;
333                 total += entry.getValue().total;
334             }
335             System.out.println("year\t" + year
336                 + "\tversion\t" + version
337                 + "\tlocales\t" + currentData.size()
338                 + "\tfound\t" + found
339                 + "\ttotal\t" + total
340                 + "\tdetails\t" + currentData
341                 );
342             out.flush();
343             if (latestData == null) {
344                 latestData = currentData;
345             }
346             Counter2<String> completionData = getCompletion(latestData, currentData);
347             addCompletionList(year+"", completionData, growthData);
348             if (DEBUG) System.out.println(currentData);
349         }
350         boolean first = true;
351         for (Entry<String, List<Double>> entry : growthData.entrySet()) {
352             if (first) {
353                 for (int i = 0; i < entry.getValue().size(); ++i) {
354                     out.print("\t" + i);
355                 }
356                 out.println();
357                 first = false;
358             }
359             out.println(entry.getKey() + "\t" + Joiner.on("\t").join(entry.getValue()));
360         }
361     }
362 
363     static final class ReleaseInfo {
ReleaseInfo(VersionInfo versionInfo, int year)364         public ReleaseInfo(VersionInfo versionInfo, int year) {
365             this.version = versionInfo;
366             this.year = year;
367         }
368         VersionInfo version;
369         int year;
370     }
371 
372     // TODO merge this into ToolConstants, and have the version expressed as VersionInfo.
373     static final List<ReleaseInfo> versionToYear;
374     static {
375         Object[][] mapping = {
376             { VersionInfo.getInstance(42), 2022 },
377             { VersionInfo.getInstance(40), 2021 },
378             { VersionInfo.getInstance(38), 2020 },
379             { VersionInfo.getInstance(36), 2019 },
380             { VersionInfo.getInstance(34), 2018 },
381             { VersionInfo.getInstance(32), 2017 },
382             { VersionInfo.getInstance(30), 2016 },
383             { VersionInfo.getInstance(28), 2015 },
384             { VersionInfo.getInstance(26), 2014 },
385             { VersionInfo.getInstance(24), 2013 },
386             { VersionInfo.getInstance(22,1), 2012 },
387             { VersionInfo.getInstance(2,0,1), 2011 },
388             { VersionInfo.getInstance(1,9,1), 2010 },
389             { VersionInfo.getInstance(1,7,2), 2009 },
390             { VersionInfo.getInstance(1,6,1), 2008 },
391             { VersionInfo.getInstance(1,5,1), 2007 },
392             { VersionInfo.getInstance(1,4,1), 2006 },
393             { VersionInfo.getInstance(1,3), 2005 },
394             { VersionInfo.getInstance(1,2), 2004 },
395             { VersionInfo.getInstance(1,1,1), 2003 },
396         };
397         List<ReleaseInfo> _versionToYear = new ArrayList<>();
398         for (Object[] row : mapping) {
_versionToYear.add(new ReleaseInfo((VersionInfo)row[0], (int)row[1]))399             _versionToYear.add(new ReleaseInfo((VersionInfo)row[0], (int)row[1]));
400         }
401         versionToYear = ImmutableList.copyOf(_versionToYear);
402     }
403 
404 //    public static String getNormalizedVersion(String dir) {
405 //        String rawVersion = dir.substring(dir.indexOf('-') + 1);
406 //        int firstDot = rawVersion.indexOf('.');
407 //        int secondDot = rawVersion.indexOf('.', firstDot + 1);
408 //        if (secondDot > 0) {
409 //            rawVersion = rawVersion.substring(0, firstDot) + rawVersion.substring(firstDot + 1, secondDot);
410 //        } else {
411 //            rawVersion = rawVersion.substring(0, firstDot);
412 //        }
413 //        String result = getYearFromVersion(rawVersion, true);
414 //        return result == null ? null : result.toString();
415 //    }
416 
417 //    private static String getYearFromVersion(String version, boolean allowNull) {
418 //        String result = versionToYear.get(version);
419 //        if (!allowNull && result == null) {
420 //            throw new IllegalArgumentException("No year for version: " + version);
421 //        }
422 //        return result;
423 //    }
424 //
425 //    private static String getVersionFromYear(String year, boolean allowNull) {
426 //        String result = versionToYear.inverse().get(year);
427 //        if (!allowNull && result == null) {
428 //            throw new IllegalArgumentException("No version for year: " + year);
429 //        }
430 //        return result;
431 //    }
432 
addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData)433     public static void addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData) {
434         List<Double> x = new ArrayList<>();
435         for (String key : completionData.getKeysetSortedByCount(false)) {
436             x.add(completionData.getCount(key));
437         }
438         growthData.put(version, x);
439         System.out.println(version + "\t" + x.size());
440     }
441 
getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData)442     public static Counter2<String> getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData) {
443         Counter2<String> completionData = new Counter2<>();
444         for (Entry<String, FoundAndTotal> entry : latestData.entrySet()) {
445             final String locale = entry.getKey();
446             final FoundAndTotal currentRecord = currentData.get(locale);
447             if (currentRecord == null) {
448                 continue;
449             }
450             double total = entry.getValue().total;
451             if (total == 0) {
452                 continue;
453             }
454             double completion = currentRecord.found / total;
455             completionData.add(locale, completion);
456         }
457         return completionData;
458     }
459 
460     static class FoundAndTotal {
461         final int found;
462         final int total;
463 
464         @SafeVarargs
FoundAndTotal(Counter<Level>.... counters)465         public FoundAndTotal(Counter<Level>... counters) {
466             final int[] count = { 0, 0, 0 };
467             for (Level level : Level.values()) {
468                 if (level == Level.COMPREHENSIVE) {
469                     continue;
470                 }
471                 int i = 0;
472                 for (Counter<Level> counter : counters) {
473                     count[i++] += counter.get(level);
474                 }
475             }
476             found = count[0];
477             total = found + count[1] + count[2];
478         }
479 
480         @Override
toString()481         public String toString() {
482             return found + "/" + total;
483         }
484     }
485 
addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing)486     private static Map<String, FoundAndTotal> addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing) {
487         final File mainDir = new File(dir + "/common/main/");
488         final File annotationDir = new File(dir + "/common/annotations/");
489         File[] paths = annotationDir.exists() ? new File[] {mainDir, annotationDir} : new File[] {mainDir};
490         org.unicode.cldr.util.Factory newFactory;
491         try {
492             newFactory = SimpleFactory.make(paths, ".*");
493         } catch (RuntimeException e1) {
494             throw e1;
495         }
496         Map<String, FoundAndTotal> data = new HashMap<>();
497         char c = 0;
498         Set<String> latestAvailable = newFactory.getAvailableLanguages();
499         for (String locale : newFactory.getAvailableLanguages()) {
500             if (!matcher.reset(locale).matches()) {
501                 continue;
502             }
503             if (!latestAvailable.contains(locale)) {
504                 continue;
505             }
506             if (SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales().contains(locale)
507                 || locale.equals("root")
508                 || locale.equals("und")
509                 || locale.equals("supplementalData")) {
510                 continue;
511             }
512             char nc = locale.charAt(0);
513             if (nc != c) {
514                 System.out.println("\t" + locale);
515                 c = nc;
516             }
517             if (DEBUG_FILTER != 0 && DEBUG_FILTER != nc) {
518                 continue;
519             }
520             CLDRFile latestFile = null;
521             try {
522                 latestFile = latestFactory.make(locale, true);
523             } catch (Exception e2) {
524                 System.out.println("Can't make latest CLDRFile for: " + locale + "\tpast: " + mainDir + "\tlatest: " + Arrays.asList(latestFactory.getSourceDirectories()));
525                 continue;
526             }
527             CLDRFile file = null;
528             try {
529                 file = newFactory.make(locale, true);
530             } catch (Exception e2) {
531                 System.out.println("Can't make CLDRFile for: " + locale + "\tpast: " + mainDir);
532                 continue;
533             }
534             // HACK check bogus
535 //            Collection<String> extra = file.getExtraPaths();
536 //
537 //            final Iterable<String> fullIterable = file.fullIterable();
538 //            for (String path : fullIterable) {
539 //                if (path.contains("\"one[@")) {
540 //                    boolean inside = extra.contains(path);
541 //                    Status status = new Status();
542 //                    String loc = file.getSourceLocaleID(path, status );
543 //                    int debug = 0;
544 //                }
545 //            }
546             // END HACK
547             Counter<Level> foundCounter = new Counter<>();
548             Counter<Level> unconfirmedCounter = new Counter<>();
549             Counter<Level> missingCounter = new Counter<>();
550             Set<String> unconfirmedPaths = null;
551             Relation<MissingStatus, String> missingPaths = null;
552             unconfirmedPaths = new LinkedHashSet<>();
553             missingPaths = Relation.of(new LinkedHashMap<MissingStatus, Set<String>>(), LinkedHashSet.class);
554             VettingViewer.getStatus(latestFile.fullIterable(), file,
555                 pathHeaderFactory, foundCounter, unconfirmedCounter,
556                 missingCounter, missingPaths, unconfirmedPaths);
557 
558             // HACK
559             Set<Entry<MissingStatus, String>> missingRemovals = new HashSet<>();
560             for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) {
561                 if (e.getKey() == MissingStatus.ABSENT) {
562                     final String path = e.getValue();
563                     if (HACK.get(path) != null) {
564                         missingRemovals.add(e);
565                         missingCounter.add(Level.MODERN, -1);
566                         foundCounter.add(Level.MODERN, 1);
567                     } else {
568                         Status status = new Status();
569                         String loc = file.getSourceLocaleID(path, status);
570                         int debug = 0;
571                     }
572                 }
573             }
574             for (Entry<MissingStatus, String> e : missingRemovals) {
575                 missingPaths.remove(e.getKey(), e.getValue());
576             }
577             // END HACK
578 
579             if (showMissing) {
580                 int count = 0;
581                 for (String s : unconfirmedPaths) {
582                     System.out.println(++count + "\t" + locale + "\tunconfirmed\t" + s);
583                 }
584                 for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) {
585                     String path = e.getValue();
586                     Status status = new Status();
587                     String loc = file.getSourceLocaleID(path, status);
588                     int debug = 0;
589 
590                     System.out.println(++count + "\t" + locale + "\t" + CldrUtility.toString(e));
591                 }
592                 int debug = 0;
593             }
594 
595             data.put(locale, new FoundAndTotal(foundCounter, unconfirmedCounter, missingCounter));
596         }
597         return Collections.unmodifiableMap(data);
598     }
599 
showCoverage(Anchors anchors, Matcher matcher)600     public static void showCoverage(Anchors anchors, Matcher matcher) throws IOException {
601         showCoverage(anchors, matcher, null, false);
602     }
603 
showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)604     public static void showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel) throws IOException {
605         final String title = "Locale Coverage";
606         try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors));
607             PrintWriter tsv_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv");
608             PrintWriter tsv_missing = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv");
609             PrintWriter tsv_missing_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-summary.tsv");
610             PrintWriter tsv_missing_basic = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-basic.tsv");
611             PrintWriter tsv_missing_counts = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-counts.tsv");
612             PrintWriter propertiesCoverage = FileUtilities.openUTF8Writer(CLDRPaths.COMMON_DIRECTORY + "properties/", "coverageLevels.txt");
613             ){
614             tsv_missing_summary.println(TSV_MISSING_SUMMARY_HEADER);
615             tsv_missing.println(TSV_MISSING_HEADER);
616             tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER);
617             tsv_missing_counts.println(TSV_MISSING_COUNTS_HEADER);
618 
619             propertiesCoverage.println(PROPERTIES_HEADER);
620 
621             Set<String> checkModernLocales = STANDARD_CODES.getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN));
622             Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages());
623             availableLanguages.addAll(checkModernLocales);
624 
625             Multimap<String, String> languageToRegion = TreeMultimap.create();
626             LanguageTagParser ltp = new LanguageTagParser();
627             LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true);
628             for (String locale : factory.getAvailable()) {
629                 String country = ltp.set(locale).getRegion();
630                 if (!country.isEmpty()) {
631                     languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country);
632                 }
633             }
634             languageToRegion = ImmutableMultimap.copyOf(languageToRegion);
635 
636             fixCommonLocales();
637 
638             System.out.println(Joiner.on("\n").join(languageToRegion.asMap().entrySet()));
639 
640             System.out.println("# Checking: " + availableLanguages);
641 
642             NumberFormat percentFormat = NumberFormat.getPercentInstance(Locale.ENGLISH);
643             percentFormat.setMaximumFractionDigits(1);
644 
645             pw.println("<p style='text-align: left'>This chart shows the coverage levels in this release. "
646                 + "Totals are listed after the main chart.</p>\n"
647                 + "<blockquote><ul>\n"
648                 + "<li><a href='#main_table'>Main Table</a></li>\n"
649                 + "<li><a href='#level_counts'>Level Counts</a></li>\n"
650                 + "</ul></blockquote>\n"
651                 + "<h3>Column Key</h3>\n"
652                 + "<table class='subtle' style='margin-left:3em; margin-right:3em'>\n"
653                 + "<tr><th>Direct.</th><td>The CLDR source directory</td></tr>\n"
654                 + "<tr><th>Default Region</th><td>The default region for locale code, based on likely subtags</td></tr>\n"
655                 + "<tr><th>№ Locales</th><td>Note that the coverage of regional locales inherits from their parents.</td></tr>\n"
656                 + "<tr><th>Target Level</th><td>The default target Coverage Level in CLDR. "
657                 + "Particular organizations may have different target levels. "
658                 + "Languages with high levels of coverage are marked with ‡, even though they are not tracked by the technical committee.</td></tr>\n"
659                 + "<tr><th>≟</th><td>Indicates whether the Computed Level equals the CLDR Target or not.</td></tr>\n"
660                 + "<tr><th>Computed Level</th><td>Computed from the percentage values, "
661                 + "taking the first level that meets a threshold (currently �� "
662                 + percentFormat.format(MODERN_THRESHOLD)
663                 + ", ⓜ "
664                 + percentFormat.format(MODERATE_THRESHOLD)
665                 + ", ⓑ "
666                 + percentFormat.format(BASIC_THRESHOLD)
667                 + ").</td></tr>\n"
668                 + "<tr><th>ICU</th><td>Indicates whether included in the current version of ICU</td></tr>\n"
669                 + "<tr><th>Confirmed</th><td>Confirmed items as a percentage of all supplied items. "
670                 + "If low, the coverage can be improved by getting multiple organizations to confirm.</td></tr>\n"
671                 + "<tr><th>��%, ⓜ%, ⓑ%, ⓒ%</th><td>Coverage at Levels: �� = Modern, ⓜ = Moderate, ⓑ = Basic, ⓒ = Core. "
672                 + "The percentage of items at that level and below is computed from <i>confirmed_items/total_items</i>. "
673                 + "A high-level summary of the meaning of the coverage values is at "
674                 + "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. "
675                 + "The Core values are described on <a target='_blank' href='https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales'>Core Data</a>. "
676                 + "</td></tr>\n"
677                 + "<tr><th>Missing Features</th><td>These are not single items, but rather specific features, such as plural rules or unit grammar info. "
678                 + "They are listed if missing at the computed level.<br>"
679                 + "Example: <i>ⓜ collation</i> means this feature should be supported at a Moderate level.<br>"
680                 + "<i>Except for Core, these are not accounted for in the percent values.</i></td></tr>\n"
681                 + "<tr><th><a href='https://github.com/unicode-org/cldr-staging/tree/main/docs/charts/42/tsv'>TSV Files</a>:</th><td>\n"
682                 + "<ul><li>locale-coverage.tsv — A version of this file, suitable for loading into a spreadsheet.</li>\n"
683                 + "<li>locale-missing.tsv — Missing items for the CLDR target locales.</li>\n"
684                 + "<li>locale-missing-summary.tsv — Summary of missing items for the CLDR target locales, by Section/Page/Header.</li>\n"
685                 + "<li>locale-missing-basic.tsv — Missing items that keep locales from reaching the Basic level.</li></td></tr>\n"
686                 + "<li>locale-missing-count.tsv — Counts of items per locale that are found, unconfirmed, or missing, at the target level. "
687                 + "(Or at *basic, if there is no target level.)</li></td></tr>\n"
688                 + "</table>\n"
689                 );
690 
691             Relation<MissingStatus, String> missingPaths = Relation.of(new EnumMap<MissingStatus, Set<String>>(
692                 MissingStatus.class), TreeSet.class, CLDRFile.getComparator(DtdType.ldml));
693             Set<String> unconfirmed = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml));
694 
695             Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales();
696 
697             Counter<Level> foundCounter = new Counter<>();
698             Counter<Level> unconfirmedCounter = new Counter<>();
699             Counter<Level> missingCounter = new Counter<>();
700 
701             List<Level> levelsToShow = new ArrayList<>(EnumSet.allOf(Level.class));
702             levelsToShow.remove(Level.COMPREHENSIVE);
703             levelsToShow.remove(Level.UNDETERMINED);
704             levelsToShow = ImmutableList.copyOf(levelsToShow);
705             List<Level> reversedLevels = new ArrayList<>(levelsToShow);
706             Collections.reverse(reversedLevels);
707             reversedLevels = ImmutableList.copyOf(reversedLevels);
708 
709             int localeCount = 0;
710 
711             final TablePrinter tablePrinter = new TablePrinter()
712                 .addColumn("Direct.", "class='source'", null, "class='source'", true)
713                 .setBreakSpans(true).setSpanRows(false)
714                 .addColumn("Language", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
715                 .setBreakSpans(true)
716                 .addColumn("English Name", "class='source'", null, "class='source'", true)
717                 .setBreakSpans(true)
718                 .addColumn("Native Name", "class='source'", null, "class='source'", true)
719                 .setBreakSpans(true)
720                 .addColumn("Script", "class='source'", null, "class='source'", true)
721                 .setBreakSpans(true)
722                 .addColumn("Default Region", "class='source'", null, "class='source'", true)
723                 .setBreakSpans(true)
724                 .addColumn("№ Locales", "class='source'", null, "class='targetRight'", true)
725                 .setBreakSpans(true).setCellPattern("{0,number}")
726                 .addColumn("Target Level", "class='source'", null, "class='source'", true)
727                 .setBreakSpans(true)
728                 .addColumn("≟", "class='target'", null, "class='target'", true)
729                 .setBreakSpans(true).setSortPriority(1).setSortAscending(false)
730                 .addColumn("Computed Level", "class='target'", null, "class='target'", true)
731                 .setBreakSpans(true).setSortPriority(0).setSortAscending(false)
732                 .addColumn("ICU", "class='target'", null, "class='target'", true)
733                 .setBreakSpans(true)
734                 .addColumn("Confirmed", "class='target'", null, "class='targetRight' style='color:gray'", true)
735                 .setBreakSpans(true).setCellPattern("{0,number,0.0%}")
736                 ;
737 
738             NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH);
739             tsvPercent.setMaximumFractionDigits(2);
740 
741             for (Level level : reversedLevels) {
742                 String titleLevel = level.getAbbreviation() + "%";
743                 tablePrinter.addColumn(titleLevel, "class='target'", null, "class='targetRight'", true)
744                 .setCellPattern("{0,number,0.0%}")
745                 .setBreakSpans(true);
746 
747                 switch(level) {
748                 default:
749                     tablePrinter.setSortPriority(2).setSortAscending(false);
750                     break;
751                 case BASIC:
752                     tablePrinter.setSortPriority(3).setSortAscending(false);
753                     break;
754                 case MODERATE:
755                     tablePrinter.setSortPriority(4).setSortAscending(false);
756                     break;
757                 case MODERN:
758                     tablePrinter.setSortPriority(5).setSortAscending(false);
759                     break;
760                 }
761             }
762             tablePrinter
763             .addColumn("Missing Features", "class='target'", null, "class='target'", true)
764             .setBreakSpans(true)
765             ;
766 
767             long start = System.currentTimeMillis();
768             LikelySubtags likelySubtags = new LikelySubtags();
769 
770             EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class);
771             targetLevel.put(Level.CORE, 2 / 100d);
772             targetLevel.put(Level.BASIC, 16 / 100d);
773             targetLevel.put(Level.MODERATE, 33 / 100d);
774             targetLevel.put(Level.MODERN, 100 / 100d);
775 
776             Multimap<String, String> pathToLocale = TreeMultimap.create();
777 
778             Counter<Level> computedLevels = new Counter<>();
779             Counter<Level> computedSublocaleLevels = new Counter<>();
780 
781             for (String locale : availableLanguages) {
782                 try {
783                     if (locale.contains("supplemental") // for old versionsl
784                         || locale.startsWith("sr_Latn")) {
785                         continue;
786                     }
787                     if (locales != null && !locales.contains(locale)) {
788                         String base = CLDRLocale.getInstance(locale).getLanguage();
789                         if (!locales.contains(base)) {
790                             continue;
791                         }
792                     }
793                     if (matcher != null && !matcher.reset(locale).matches()) {
794                         continue;
795                     }
796                     if (defaultContents.contains(locale) || "root".equals(locale) || "und".equals(locale)) {
797                         continue;
798                     }
799 
800                     tsv_missing_summary.flush();
801                     tsv_missing.flush();
802                     tsv_missing_basic.flush();
803                     tsv_missing_counts.flush();
804 
805                     boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists();
806 
807                     String region = ltp.set(locale).getRegion();
808                     if (!region.isEmpty()) continue; // skip regions
809 
810                     final Level cldrLocaleLevelGoal = SC.getLocaleCoverageLevel(Organization.cldr, locale);
811                     final String specialFlag = getSpecialFlag(locale);
812 
813                     final boolean cldrLevelGoalBasicToModern = Level.CORE_TO_MODERN.contains(cldrLocaleLevelGoal);
814 
815                     String max = likelySubtags.maximize(locale);
816                     final String script = ltp.set(max).getScript();
817                     final String defRegion = ltp.getRegion();
818 
819                     final String language = likelySubtags.minimize(locale);
820 
821                     missingPaths.clear();
822                     unconfirmed.clear();
823 
824                     final CLDRFile file = factory.make(locale, true, minimumDraftStatus);
825 
826                     if (locale.equals("af")) {
827                         int debug = 0;
828                     }
829 
830                     Iterable<String> pathSource = new IterableFilter(file.fullIterable());
831 
832                     VettingViewer.getStatus(pathSource, file,
833                         pathHeaderFactory, foundCounter, unconfirmedCounter,
834                         missingCounter, missingPaths, unconfirmed);
835 
836                     {
837                         long found = 0;
838                         long unconfirmedc = 0;
839                         long missing = 0;
840                         Level adjustedGoal = cldrLocaleLevelGoal.compareTo(Level.BASIC) < 0 ? Level.BASIC : cldrLocaleLevelGoal;
841                         for (Level level : Level.values()) {
842                             if (level.compareTo(adjustedGoal) <= 0) {
843                                 found += foundCounter.get(level);
844                                 unconfirmedc += unconfirmedCounter.get(level);
845                                 missing += missingCounter.get(level);
846                             }
847                         }
848                         String goalFlag = cldrLocaleLevelGoal == adjustedGoal ? "" : "*";
849                         tsv_missing_counts.println(specialFlag + locale + "\t" + goalFlag + adjustedGoal + "\t" + found + "\t" + unconfirmedc + "\t" + missing);
850                     }
851 
852                     Collection<String> sublocales = languageToRegion.asMap().get(language);
853                     if (sublocales == null) {
854                         sublocales = Collections.emptySet();
855                     }
856                     sublocales = ImmutableSet.copyOf(sublocales);
857 
858                     final String seedString = isSeed ? "seed" : "common";
859 
860                     // get the totals
861 
862                     EnumMap<Level, Integer> totals = new EnumMap<>(Level.class);
863                     EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class);
864                     Set<CoreItems> specialMissingPaths = EnumSet.noneOf(CoreItems.class);
865 
866                     StatusCounter starredCounter = new StatusCounter();
867 
868                     {
869                         Multimap<CoreItems, String> detailedErrors = TreeMultimap.create();
870                         Set<CoreItems> coverage = CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors);
871                         for (CoreItems item : coverage) {
872                             foundCounter.add(item.desiredLevel, 1);
873                         }
874                         for (Entry<CoreItems, String> entry : detailedErrors.entries()) {
875                             CoreItems coreItem = entry.getKey();
876                             String path = entry.getValue();
877                             specialMissingPaths.add(coreItem);
878                             // if goal (eg modern) >= itemLevel, indicate it is missing
879                             if (coreItem.desiredLevel == Level.BASIC) {
880                                 starredCounter.gatherStarred(path, null);
881                             }
882                             missingCounter.add(coreItem.desiredLevel, 1);
883                         }
884                     }
885 
886                     if (cldrLevelGoalBasicToModern) {
887                         Level goalLevel = cldrLocaleLevelGoal;
888                         for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
889                             String path = entry.getValue();
890                             String status = entry.getKey().toString();
891                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
892                             if (goalLevel.compareTo(foundLevel) >= 0) {
893                                 String line = spreadsheetLine(locale, language, script, specialFlag, file.getStringValue(path), goalLevel, foundLevel, status, path, file, pathToLocale);
894                                 tsv_missing.println(line);
895                             }
896                         }
897                         for (String path : unconfirmed) {
898                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
899                             if (goalLevel.compareTo(foundLevel) >= 0) {
900                                 String line = spreadsheetLine(locale, language, script, specialFlag, file.getStringValue(path), goalLevel, foundLevel, "n/a", path, file, pathToLocale);
901                                 tsv_missing.println(line);
902                             }
903                         }
904                     } else {
905                         Level goalLevel = Level.BASIC;
906                         for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
907                             String path = entry.getValue();
908                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
909                             if (goalLevel.compareTo(foundLevel) >= 0) {
910                                 starredCounter.gatherStarred(path, null);
911                             }
912                         }
913                         for (String path : unconfirmed) {
914                             String fullPath = file.getFullXPath(path);
915                             DraftStatus draftStatus = fullPath.contains("unconfirmed") ? DraftStatus.unconfirmed : DraftStatus.provisional;
916 
917                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
918                             if (goalLevel.compareTo(foundLevel) >= 0) {
919                                 starredCounter.gatherStarred(path, draftStatus);
920                             }
921                         }
922                     }
923 
924                     if (!starredCounter.starredPathToData.isEmpty()) {
925                         for (Entry<String, StatusData> starred : starredCounter.starredPathToData.entrySet()) {
926                             String starredPath = starred.getKey();
927                             StatusData statusData = starred.getValue();
928                             tsv_missing_basic.println(specialFlag + locale //
929                                 + "\t" + statusData.missing //
930                                 + "\t" + statusData.provisional //
931                                 + "\t" + statusData.unconfirmed //
932                                 + "\t" + starredPath.replace("\"*\"", "'*'"));
933                         }
934                         tsv_missing_basic.println(specialFlag + locale  //
935                             + "\t" + starredCounter.missingTotal //
936                             + "\t" + starredCounter.provisionalTotal //
937                             + "\t" + starredCounter.unconfirmedTotal //
938                             + "\tTotals");
939                         tsv_missing_basic.println("\t\t\t"); // for a proper table in github
940                     }
941 
942                     int sumFound = 0;
943                     int sumMissing = 0;
944                     int sumUnconfirmed = 0;
945 
946                     for (Level level : levelsToShow) {
947                         long foundCount = foundCounter.get(level);
948                         long unconfirmedCount = unconfirmedCounter.get(level);
949                         long missingCount = missingCounter.get(level);
950 
951                         sumFound += foundCount;
952                         sumUnconfirmed += unconfirmedCount;
953                         sumMissing += missingCount;
954 
955                         confirmed.put(level, sumFound);
956                         totals.put(level, sumFound + sumUnconfirmed + sumMissing);
957                     }
958 
959                     // double modernTotal = totals.get(Level.MODERN);
960 
961 
962                     // first get the accumulated values
963                     EnumMap<Level, Integer> accumTotals = new EnumMap<>(Level.class);
964                     EnumMap<Level, Integer> accumConfirmed = new EnumMap<>(Level.class);
965                     int currTotals = 0;
966                     int currConfirmed = 0;
967                     for (Level level : levelsToShow) {
968                         currTotals += totals.get(level);
969                         currConfirmed += confirmed.get(level);
970                         accumConfirmed.put(level, currConfirmed);
971                         accumTotals.put(level, currTotals);
972                     }
973 
974                     // print the totals
975 
976                     Level computed = Level.UNDETERMINED;
977                     Map<Level, Double> levelToProportion = new EnumMap<>(Level.class);
978 
979                     for (Level level : reversedLevels) {
980                         int confirmedCoverage = accumConfirmed.get(level);
981                         double total = accumTotals.get(level);
982 
983                         final double proportion = confirmedCoverage / total;
984                         levelToProportion.put(level, proportion);
985 
986                         if (computed == Level.UNDETERMINED) {
987                             switch (level) {
988                             case MODERN:
989                                 if (proportion >= MODERN_THRESHOLD) {
990                                     computed = level;
991                                 }
992                                 break;
993                             case MODERATE:
994                                 if (proportion >= MODERATE_THRESHOLD) {
995                                     computed = level;
996                                 }
997                                 break;
998                             case BASIC:
999                                 if (proportion >= BASIC_THRESHOLD) {
1000                                     computed = level;
1001                                 }
1002                                 break;
1003                             default:
1004                                 break;
1005                             }
1006                         }
1007                     }
1008 
1009                     Set<CoreItems> shownMissingPaths = EnumSet.noneOf(CoreItems.class);
1010                     Level computedWithCore = computed == Level.UNDETERMINED ? Level.CORE : computed;
1011                     for (CoreItems item : specialMissingPaths) {
1012                         if (item.desiredLevel.compareTo(computedWithCore) <= 0) {
1013                             shownMissingPaths.add(item);
1014                         } else {
1015                             int debug = 0;
1016                         }
1017                     }
1018                     final String coreMissingString = Joiner.on(", ").join(shownMissingPaths);
1019 
1020                     String visibleComputed = computed == Level.UNDETERMINED ? "" : computed.toString();
1021                     computedLevels.add(computed, 1);
1022                     computedSublocaleLevels.add(computed, sublocales.size());
1023 
1024                     tablePrinter.addRow()
1025                     .addCell(seedString)
1026                     .addCell(language)
1027                     .addCell(ENGLISH.getName(language))
1028                     .addCell(file.getName(language))
1029                     .addCell(script)
1030                     .addCell(defRegion)
1031                     .addCell(sublocales.size())
1032                     .addCell(cldrLocaleLevelGoal == Level.UNDETERMINED ? "" : specialFlag + cldrLocaleLevelGoal.toString())
1033                     .addCell(computed == cldrLocaleLevelGoal ? " ≡" : " ≠")
1034                     .addCell(visibleComputed)
1035                     .addCell(getIcuValue(language))
1036                     .addCell(sumFound/(double)(sumFound+sumUnconfirmed))
1037                     ;
1038 
1039                     // print the totals
1040                     for (Level level : reversedLevels) {
1041                         tablePrinter.addCell(levelToProportion.get(level));
1042                     }
1043 
1044                     tablePrinter
1045                     .addCell(coreMissingString)
1046                     .finishRow();
1047 
1048                     // now write properties file line
1049 
1050                     if (computed != Level.UNDETERMINED) {
1051                         propertiesCoverage.println(locale
1052                             + " ;\t" + visibleComputed);
1053 //                        Level higher = Level.UNDETERMINED;
1054 //                        switch (computed) {
1055 //                        default:
1056 //                            higher = Level.UNDETERMINED;
1057 //                            break;
1058 //                        case MODERATE:
1059 //                            higher = Level.MODERN;
1060 //                            break;
1061 //                        case BASIC:
1062 //                            higher = Level.MODERATE;
1063 //                            break;
1064 //                        }
1065 //                        double higherProportion = higher == Level.UNDETERMINED ? 0d : levelToProportion.get(higher);
1066 //
1067 //                        if (higherProportion >= THRESHOLD_HIGHER) {
1068 //                            propertiesCoverage.println(
1069 //                                " ;\t" + tsvPercent.format(higherProportion) +
1070 //                                " ;\t" + higher
1071 //                                );
1072 //                        } else {
1073 //                            propertiesCoverage.println(" ;\t" + "" + " ;\t" + "");
1074 //                        }
1075                     }
1076                     localeCount++;
1077                 } catch (Exception e) {
1078                     throw new IllegalArgumentException(e);
1079                 }
1080             }
1081 
1082             propertiesCoverage.println("#EOF\t"); // needs extra tabs to look right in github
1083 
1084             pw.println("<h3><a name='main_table' href='#main_table'>Main Table</a></h3>");
1085             pw.println(tablePrinter.toTable());
1086 
1087             pw.println(
1088                 "<h3><a name='level_counts' href='#level_counts'>Level Counts</a></h3>\n"
1089                     + "<table class='subtle'><tr>\n"
1090                     + "<th style='text-align:left'>" + "Level" + "</th>"
1091                     + "<th style='text-align:left'>" + "Languages" + "</th>"
1092                     + "<th style='text-align:left'>" + "Locales" + "</th>"
1093                     + "</tr>"
1094                 );
1095             long totalCount = 0;
1096             long totalLocaleCount = 0;
1097             for (Level level : Lists.reverse(Arrays.asList(Level.values()))) {
1098                 final long count = computedLevels.get(level);
1099                 final long localesCount = computedSublocaleLevels.get(level);
1100                 if (count == 0 || level == Level.UNDETERMINED) {
1101                     continue;
1102                 }
1103                 totalCount += count;
1104                 totalLocaleCount += localesCount;
1105                 String visibleImputed = level == Level.UNDETERMINED ? "<" + Level.BASIC.toString() : level.toString();
1106                 pw.println("<tr>"
1107                     + "<th style='text-align:left'>" + visibleImputed + "</th>"
1108                     + "<td style='text-align:right'>" + count + "</td>"
1109                     + "<td style='text-align:right'>" + localesCount + "</td>"
1110                     + "</tr>");
1111             }
1112             pw.println("<tr>"
1113                 + "<th style='text-align:left'>" + "Total" + "</th>"
1114                 + "<td style='text-align:right'>" + totalCount + "</td>"
1115                 + "<td style='text-align:right'>" + totalLocaleCount + "</td>"
1116                 + "</tr>\n"
1117                 );
1118 
1119             pw.println("<tr>"
1120                 + "<th style='text-align:left'>" + "in dev." + "</th>"
1121                 + "<td style='text-align:right'>" + computedLevels.get(Level.UNDETERMINED) + "</td>"
1122                 + "<td style='text-align:right'>" + computedSublocaleLevels.get(Level.UNDETERMINED) + "</td>"
1123                 + "</tr>\n"
1124                 + "</table>"
1125                 );
1126 
1127 
1128             Multimap<Level, String> levelToLocales = TreeMultimap.create();
1129 
1130             for ( Entry<String, Collection<String>> entry : pathToLocale.asMap().entrySet()) {
1131                 String path = entry.getKey();
1132                 Collection<String> localeSet = entry.getValue();
1133                 levelToLocales.clear();
1134                 for (String locale : localeSet) {
1135                     Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
1136                     levelToLocales.put(foundLevel, locale);
1137                 }
1138                 String phString = "n/a\tn/a\tn/a\tn/a";
1139                 try {
1140                     PathHeader ph = pathHeaderFactory.fromPath(path);
1141                     phString = ph.toString();
1142                 } catch (Exception e) {
1143                 }
1144                 for (Entry<Level, Collection<String>> entry2 : levelToLocales.asMap().entrySet()) {
1145                     Level level = entry2.getKey();
1146                     localeSet = entry2.getValue();
1147                     tsv_missing_summary.println(
1148                         level
1149                         + "\t" + localeSet.size()
1150                         + "\t" + Joiner.on(" ").join(localeSet.stream().map(x -> x + getSpecialFlag(x)).collect(Collectors.toSet()))
1151                         + "\t" + phString
1152                         );
1153                 }
1154             }
1155             tablePrinter.toTsv(tsv_summary);
1156             long end = System.currentTimeMillis();
1157             System.out.println((end - start) + " millis = "
1158                 + ((end - start) / localeCount) + " millis/locale");
1159             ShowPlurals.appendBlanksForScrolling(pw);
1160         }
1161     }
1162 
getSpecialFlag(String locale)1163     public static String getSpecialFlag(String locale) {
1164         return SC.getLocaleCoverageLevel(Organization.special, locale) == Level.UNDETERMINED ? "" : "‡";
1165     }
1166 
1167     private static class IterableFilter implements Iterable<String> {
1168         private Iterable<String> source;
1169 
IterableFilter(Iterable<String> source)1170         IterableFilter(Iterable<String> source) {
1171             this.source = source;
1172         }
1173 
1174         /**
1175          * When some paths are defined after submission, we need to change them to COMPREHENSIVE in computing the vetting status.
1176          */
1177 
1178         static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of(
1179             "//ldml/personNames/nameOrderLocales[@order=\"givenFirst\"]",
1180             "//ldml/personNames/nameOrderLocales[@order=\"surnameFirst\"]",
1181             "//ldml/personNames/foreignSpaceReplacement[@xml:space=\"preserve\"]",
1182             "//ldml/personNames/initialPattern[@type=\"initial\"]",
1183             "//ldml/personNames/initialPattern[@type=\"initialSequence\"]",
1184             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern",
1185             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1186             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1187             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1188             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"monogram\"][@formality=\"formal\"]/namePattern",
1189             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1190             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1191             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1192             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1193             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1194             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"monogram\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1195             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1196             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1197             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1198             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1199             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1200             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"monogram\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1201             "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1202             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern",
1203             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1204             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1205             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1206             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"monogram\"][@formality=\"formal\"]/namePattern",
1207             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1208             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1209             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1210             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1211             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1212             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"monogram\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1213             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1214             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1215             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1216             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1217             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]",
1218             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"monogram\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1219             "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]",
1220             "//ldml/personNames/personName[@order=\"sorting\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern",
1221             "//ldml/personNames/personName[@order=\"sorting\"][@length=\"long\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]",
1222             "//ldml/personNames/personName[@order=\"sorting\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]",
1223             "//ldml/personNames/personName[@order=\"sorting\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]",
1224             "//ldml/personNames/personName[@order=\"sorting\"][@length=\"short\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]",
1225             "//ldml/personNames/personName[@order=\"sorting\"][@length=\"short\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]",
1226             "//ldml/personNames/sampleName[@item=\"givenOnly\"]/nameField[@type=\"given\"]",
1227             "//ldml/personNames/sampleName[@item=\"givenSurnameOnly\"]/nameField[@type=\"given\"]",
1228             "//ldml/personNames/sampleName[@item=\"givenSurnameOnly\"]/nameField[@type=\"surname\"]",
1229             "//ldml/personNames/sampleName[@item=\"given12Surname\"]/nameField[@type=\"given\"]",
1230             "//ldml/personNames/sampleName[@item=\"given12Surname\"]/nameField[@type=\"given2\"]",
1231             "//ldml/personNames/sampleName[@item=\"given12Surname\"]/nameField[@type=\"surname\"]",
1232             "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"prefix\"]",
1233             "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"given\"]",
1234             "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"given-informal\"]",
1235             "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"given2\"]",
1236             "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"surname-prefix\"]",
1237             "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"surname-core\"]",
1238             "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"surname2\"]",
1239             "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"suffix\"]"
1240             );
1241         @Override
iterator()1242         public Iterator<String> iterator() {
1243             return new IteratorFilter(source.iterator());
1244         }
1245 
1246         static class IteratorFilter implements Iterator<String> {
1247             Iterator<String> source;
1248             String peek;
1249 
IteratorFilter(Iterator<String> source)1250             public IteratorFilter(Iterator<String> source) {
1251                 this.source = source;
1252                 fillPeek();
1253             }
1254             @Override
hasNext()1255             public boolean hasNext() {
1256                 return peek != null;
1257             }
1258             @Override
next()1259             public String next() {
1260                 String result = peek;
1261                 fillPeek();
1262                 return result;
1263             }
1264 
fillPeek()1265             private void fillPeek() {
1266                 peek = null;
1267                 while (source.hasNext()) {
1268                     peek = source.next();
1269                     // if it is ok to assess, then break
1270                     if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek)
1271                         && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) {
1272                         break;
1273                     }
1274                     peek = null;
1275                 }
1276             }
1277         }
1278 
1279     }
1280     static final CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO);
1281 
1282 // userInfo.getVoterInfo().getLevel().compareTo(VoteResolver.Level.tc)
1283     static final VoterInfo dummyVoterInfo = new VoterInfo(Organization.cldr, org.unicode.cldr.util.VoteResolver.Level.vetter, "somename");
1284 
1285     static final UserInfo dummyUserInfo = new UserInfo() {
1286         @Override
1287         public VoterInfo getVoterInfo() {
1288             return dummyVoterInfo;
1289         }
1290     };
1291     static final PathValueInfo dummyPathValueInfo = new PathValueInfo() {
1292         // pathValueInfo.getCoverageLevel().compareTo(Level.COMPREHENSIVE)
1293         @Override
1294         public Collection<? extends CandidateInfo> getValues() {
1295             throw new UnsupportedOperationException();
1296         }
1297         @Override
1298         public CandidateInfo getCurrentItem() {
1299             throw new UnsupportedOperationException();
1300         }
1301         @Override
1302         public String getBaselineValue() {
1303             throw new UnsupportedOperationException();
1304         }
1305         @Override
1306         public Level getCoverageLevel() {
1307             return Level.MODERN;
1308         }
1309         @Override
1310         public boolean hadVotesSometimeThisRelease() {
1311             throw new UnsupportedOperationException();
1312         }
1313         @Override
1314         public CLDRLocale getLocale() {
1315             throw new UnsupportedOperationException();
1316         }
1317         @Override
1318         public String getXpath() {
1319             throw new UnsupportedOperationException();
1320         }
1321     };
1322 
spreadsheetLine(String locale, String language, String script, String specialFlag, String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, CLDRFile resolvedFile, Multimap<String, String> pathToLocale)1323     private static String spreadsheetLine(String locale, String language, String script, String specialFlag,
1324         String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path,
1325         CLDRFile resolvedFile, Multimap<String, String> pathToLocale) {
1326         if (pathToLocale != null) {
1327             pathToLocale.put(path, locale);
1328         }
1329 //        String stLink = "n/a";
1330 //        String englishValue = "n/a";
1331 //        StatusAction action = null;
1332 //        String icuValue = getIcuValue(locale);
1333 
1334         SurveyToolStatus surveyToolStatus = null;
1335         String bailey = resolvedFile == null ? "" : resolvedFile.getStringValue(path);
1336 
1337         String phString = "na\tn/a\tn/a\t" + path;
1338         try {
1339             PathHeader ph = pathHeaderFactory.fromPath(path);
1340             phString = ph.toString();
1341 //            stLink = URLS.forXpath(locale, path);
1342 //            englishValue = ENGLISH.getStringValue(path);
1343 //            action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo, InputMethod.DIRECT, ph, dummyUserInfo);
1344         } catch (Exception e) {
1345 
1346         }
1347 
1348         String line =
1349             specialFlag + language
1350             + "\t" + ENGLISH.getName(language)
1351             + "\t" + ENGLISH.getName("script", script)
1352             + "\t" + cldrLocaleLevelGoal
1353             + "\t" + itemLevel
1354             + "\t" + (surveyToolStatus == null ? "n/a" : surveyToolStatus.toString())
1355             + "\t" + bailey
1356             + "\t" + phString
1357             + "\t" + PathHeader.getUrlForLocalePath(locale, path)
1358             ;
1359         return line;
1360     }
1361 
1362 
1363 
getIcuValue(String locale)1364     private static String getIcuValue(String locale) {
1365         return ICU_Locales.contains(new ULocale(locale)) ? "ICU" : "";
1366     }
1367 
1368     static final Set<ULocale> ICU_Locales = ImmutableSet.copyOf(ULocale.getAvailableLocales());
1369 }
1370