• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Splitter;
5 import com.google.common.collect.Comparators;
6 import com.google.common.collect.ImmutableList;
7 import com.google.common.collect.ImmutableMultimap;
8 import com.google.common.collect.ImmutableSet;
9 import com.google.common.collect.Lists;
10 import com.google.common.collect.Multimap;
11 import com.google.common.collect.TreeMultimap;
12 import com.ibm.icu.impl.Relation;
13 import com.ibm.icu.text.NumberFormat;
14 import com.ibm.icu.util.ULocale;
15 import java.io.File;
16 import java.io.IOException;
17 import java.io.PrintWriter;
18 import java.util.ArrayList;
19 import java.util.Arrays;
20 import java.util.Collection;
21 import java.util.Collections;
22 import java.util.Comparator;
23 import java.util.EnumMap;
24 import java.util.EnumSet;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Locale;
28 import java.util.Map;
29 import java.util.Map.Entry;
30 import java.util.Set;
31 import java.util.TreeMap;
32 import java.util.TreeSet;
33 import java.util.regex.Matcher;
34 import java.util.stream.Collectors;
35 import org.unicode.cldr.draft.FileUtilities;
36 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
37 import org.unicode.cldr.tool.Option.Options;
38 import org.unicode.cldr.util.CLDRConfig;
39 import org.unicode.cldr.util.CLDRFile;
40 import org.unicode.cldr.util.CLDRFile.DraftStatus;
41 import org.unicode.cldr.util.CLDRLocale;
42 import org.unicode.cldr.util.CLDRPaths;
43 import org.unicode.cldr.util.CLDRURLS;
44 import org.unicode.cldr.util.CldrUtility;
45 import org.unicode.cldr.util.CoreCoverageInfo;
46 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems;
47 import org.unicode.cldr.util.Counter;
48 import org.unicode.cldr.util.CoverageInfo;
49 import org.unicode.cldr.util.DtdType;
50 import org.unicode.cldr.util.LanguageTagCanonicalizer;
51 import org.unicode.cldr.util.LanguageTagParser;
52 import org.unicode.cldr.util.Level;
53 import org.unicode.cldr.util.LocaleNames;
54 import org.unicode.cldr.util.Organization;
55 import org.unicode.cldr.util.PathHeader;
56 import org.unicode.cldr.util.PathHeader.Factory;
57 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
58 import org.unicode.cldr.util.PathStarrer;
59 import org.unicode.cldr.util.PatternCache;
60 import org.unicode.cldr.util.RegexLookup;
61 import org.unicode.cldr.util.SimpleFactory;
62 import org.unicode.cldr.util.StandardCodes;
63 import org.unicode.cldr.util.SupplementalDataInfo;
64 import org.unicode.cldr.util.TempPrintWriter;
65 import org.unicode.cldr.util.VettingViewer;
66 import org.unicode.cldr.util.VettingViewer.MissingStatus;
67 
68 public class ShowLocaleCoverage {
69 
70     private static final String TSV_BASE =
71             CLDRURLS.CLDR_STAGING_REPO_MAIN
72                     + "docs/charts/"
73                     + ToolConstants.CHART_VI.getVersionString(1, 2)
74                     + "/tsv/";
75     public static final Splitter LF_SPLITTER = Splitter.on('\n');
76 
77     // thresholds for measuring Level attainment
78     private static final double BASIC_THRESHOLD = 1;
79     private static final double MODERATE_THRESHOLD = 0.995;
80     private static final double MODERN_THRESHOLD = 0.995;
81 
82     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
83     private static final String TSV_MISSING_SUMMARY_HEADER =
84             "#Path Level"
85                     + "\t#Locales"
86                     + "\tLocales"
87                     + "\tSection"
88                     + "\tPage"
89                     + "\tHeader"
90                     + "\tCode";
91 
92     private static final String TSV_MISSING_HEADER =
93             "#LCode"
94                     + "\tEnglish Name"
95                     + "\tScript"
96                     + "\tLocale Level"
97                     + "\tPath Level"
98                     + "\tSTStatus"
99                     + "\tBailey"
100                     + "\tSection"
101                     + "\tPage"
102                     + "\tHeader"
103                     + "\tCode"
104                     + "\tST Link";
105 
106     private static final String PROPERTIES_HEADER =
107             "# coverageLevels.txt\n"
108                     + "# Copyright © 2023 Unicode, Inc.\n"
109                     + "# CLDR data files are interpreted according to the\n"
110                     + "# LDML specification: http://unicode.org/reports/tr35/\n"
111                     + "# For terms of use, see http://www.unicode.org/copyright.html\n"
112                     + "#\n"
113                     + "# For format and usage information, see:\n"
114                     + "# https://cldr.unicode.org/index/cldr-spec/coverage-levels.\n"
115                     + "\n";
116     private static final String TSV_MISSING_BASIC_HEADER =
117             "#Locale\tProv.\tUnconf.\tMissing\tPath*\tAttributes";
118     private static final String TSV_MISSING_COUNTS_HEADER =
119             "#Locale\tTargetLevel\t№ Found\t№ Unconfirmed\t№ Missing";
120 
121     private static final boolean DEBUG = true;
122     private static final char DEBUG_FILTER =
123             0; // use letter to only load locales starting with that letter
124 
125     private static final String LATEST = ToolConstants.CHART_VERSION;
126     private static CLDRConfig testInfo = ToolConfig.getToolInstance();
127     private static final StandardCodes SC = StandardCodes.make();
128     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO =
129             testInfo.getSupplementalDataInfo();
130     private static final StandardCodes STANDARD_CODES = SC;
131 
132     private static org.unicode.cldr.util.Factory factory =
133             testInfo.getCommonAndSeedAndMainAndAnnotationsFactory();
134     private static final CLDRFile ENGLISH = factory.make("en", true);
135 
136     static final Options myOptions = new Options();
137 
138     enum MyOptions {
139         filter(".+", ".*", "Filter the information based on id, using a regex argument."),
140         //        draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft
141         // status."),
142         chart(null, null, "chart only"),
143         organization(".+", null, "Only locales for organization"),
144         version(".+", LATEST, "To get different versions"),
145         rawData(null, null, "Output the raw data from all coverage levels"),
146         targetDir(".*", CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."),
147         directories(
148                 "(.*:)?[a-z]+(,[a-z]+)*",
149                 "common",
150                 "Space-delimited list of main source directories: common,seed,exemplar.\n"
151                         + "Optional, <baseDir>:common,seed"),
152         ;
153 
154         // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target
155         // directory."),
156         // layouts(null, null, "Only create html files for keyboard layouts"),
157         // repertoire(null, null, "Only create html files for repertoire"), ;
158         // boilerplate
159         final Option option;
160 
MyOptions(String argumentPattern, String defaultArgument, String helpText)161         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
162             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
163         }
164     }
165 
166     private static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY =
167             new RegexLookup<Boolean>()
168                     .add("\\[@alt=\"accounting\"]", true)
169                     .add("\\[@alt=\"variant\"]", true)
170                     .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true)
171                     .add("^//ldml/localeDisplayNames/languages/language.*_", true)
172                     .add("^//ldml/numbers/currencies/currency.*/symbol", true)
173                     .add("^//ldml/characters/exemplarCharacters", true);
174 
175     private static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed;
176     private static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH);
177 
178     private static Set<String> COMMON_LOCALES;
179 
180     public static class StatusData {
181         int missing;
182         int provisional;
183         int unconfirmed;
184         Set<List<String>> values =
185                 new TreeSet<>(Comparators.lexicographical(Comparator.<String>naturalOrder()));
186     }
187 
188     public static class StatusCounter {
189         private static final Set<String> ATTRS_TO_REMOVE = Set.of("standard");
190         PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*");
191         Map<String, StatusData> starredPathToData = new TreeMap<>();
192         int missingTotal;
193         int provisionalTotal;
194         int unconfirmedTotal;
195 
gatherStarred(String path, DraftStatus draftStatus)196         public void gatherStarred(String path, DraftStatus draftStatus) {
197             String starredPath = pathStarrer.set(path);
198             StatusData statusData = starredPathToData.get(starredPath);
199             if (statusData == null) {
200                 starredPathToData.put(starredPath, statusData = new StatusData());
201             }
202             if (draftStatus == null) {
203                 ++statusData.missing;
204                 ++missingTotal;
205             } else {
206                 switch (draftStatus) {
207                     case unconfirmed:
208                         ++statusData.unconfirmed;
209                         ++unconfirmedTotal;
210                         break;
211                     case provisional:
212                         ++statusData.provisional;
213                         ++provisionalTotal;
214                         break;
215                     default:
216                         break;
217                 }
218             }
219             final List<String> attributes =
220                     CldrUtility.removeAll(
221                             new ArrayList<>(pathStarrer.getAttributes()), ATTRS_TO_REMOVE);
222             if (!attributes.isEmpty()) {
223                 statusData.values.add(attributes);
224             }
225         }
226     }
227 
main(String[] args)228     public static void main(String[] args) throws IOException {
229         myOptions.parse(MyOptions.filter, args, true);
230 
231         Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher("");
232 
233         if (MyOptions.chart.option.doesOccur()) {
234             showCoverage(null, matcher);
235             return;
236         }
237 
238         Set<String> locales = null;
239         String organization = MyOptions.organization.option.getValue();
240         boolean useOrgLevel = MyOptions.organization.option.doesOccur();
241         if (useOrgLevel) {
242             locales = STANDARD_CODES.getLocaleCoverageLocales(organization);
243         }
244 
245         if (MyOptions.version.option.doesOccur()) {
246             String number = MyOptions.version.option.getValue().trim();
247             if (!number.contains(".")) {
248                 number += ".0";
249             }
250             factory =
251                     org.unicode.cldr.util.Factory.make(
252                             CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*");
253         } else {
254             if (MyOptions.directories.option.doesOccur()) {
255                 String directories = MyOptions.directories.option.getValue().trim();
256                 CLDRConfig cldrConfig = CONFIG;
257                 String base = null;
258                 int colonPos = directories.indexOf(':');
259                 if (colonPos >= 0) {
260                     base = directories.substring(0, colonPos).trim();
261                     directories = directories.substring(colonPos + 1).trim();
262                 } else {
263                     base = cldrConfig.getCldrBaseDirectory().toString();
264                 }
265                 String[] items = directories.split(",\\s*");
266                 File[] fullDirectories = new File[items.length];
267                 int i = 0;
268                 for (String item : items) {
269                     fullDirectories[i++] = new File(base + "/" + item + "/main");
270                 }
271                 factory = SimpleFactory.make(fullDirectories, ".*");
272                 COMMON_LOCALES =
273                         SimpleFactory.make(base + "/" + "common" + "/main", ".*")
274                                 .getAvailableLanguages();
275             }
276         }
277         fixCommonLocales();
278 
279         showCoverage(null, matcher, locales, useOrgLevel);
280     }
281 
fixCommonLocales()282     private static void fixCommonLocales() {
283         if (COMMON_LOCALES == null) {
284             COMMON_LOCALES = factory.getAvailableLanguages();
285         }
286     }
287 
288     public static class FoundAndTotal {
289         final int found;
290         final int total;
291 
292         @SafeVarargs
FoundAndTotal(Counter<Level>.... counters)293         public FoundAndTotal(Counter<Level>... counters) {
294             final int[] count = {0, 0, 0};
295             for (Level level : Level.values()) {
296                 if (level == Level.COMPREHENSIVE) {
297                     continue;
298                 }
299                 int i = 0;
300                 for (Counter<Level> counter : counters) {
301                     count[i++] += counter.get(level);
302                 }
303             }
304             found = count[0];
305             total = found + count[1] + count[2];
306         }
307 
308         @Override
toString()309         public String toString() {
310             return found + "/" + total;
311         }
312     }
313 
showCoverage(Anchors anchors, Matcher matcher)314     static void showCoverage(Anchors anchors, Matcher matcher) throws IOException {
315         showCoverage(anchors, matcher, null, false);
316     }
317 
showCoverage( Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)318     private static void showCoverage(
319             Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)
320             throws IOException {
321         final String title = "Locale Coverage";
322         try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors));
323                 PrintWriter tsv_summary =
324                         FileUtilities.openUTF8Writer(
325                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv");
326                 PrintWriter tsv_missing =
327                         FileUtilities.openUTF8Writer(
328                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv");
329                 PrintWriter tsv_missing_summary =
330                         FileUtilities.openUTF8Writer(
331                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-summary.tsv");
332                 PrintWriter tsv_missing_basic =
333                         FileUtilities.openUTF8Writer(
334                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-basic.tsv");
335                 PrintWriter tsv_missing_counts =
336                         FileUtilities.openUTF8Writer(
337                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-counts.tsv");
338                 TempPrintWriter propertiesCoverage =
339                         TempPrintWriter.openUTF8Writer(
340                                 CLDRPaths.COMMON_DIRECTORY + "properties/",
341                                 "coverageLevels.txt"); ) {
342             tsv_missing_summary.println(TSV_MISSING_SUMMARY_HEADER);
343             tsv_missing.println(TSV_MISSING_HEADER);
344             tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER);
345             tsv_missing_counts.println(TSV_MISSING_COUNTS_HEADER);
346 
347             final int propertiesCoverageTabCount = 2;
348             propertiesCoverage.printlnWithTabs(propertiesCoverageTabCount, PROPERTIES_HEADER);
349 
350             Set<String> checkModernLocales =
351                     STANDARD_CODES.getLocaleCoverageLocales(
352                             Organization.cldr, EnumSet.of(Level.MODERN));
353             Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages());
354             availableLanguages.addAll(checkModernLocales);
355 
356             Multimap<String, String> languageToRegion = TreeMultimap.create();
357             LanguageTagParser ltp = new LanguageTagParser();
358             LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true);
359             for (String locale : factory.getAvailable()) {
360                 String country = ltp.set(locale).getRegion();
361                 if (!country.isEmpty()) {
362                     languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country);
363                 }
364             }
365             languageToRegion = ImmutableMultimap.copyOf(languageToRegion);
366 
367             fixCommonLocales();
368 
369             System.out.println(Joiner.on("\n").join(languageToRegion.asMap().entrySet()));
370 
371             System.out.println("# Checking: " + availableLanguages);
372 
373             NumberFormat percentFormat = NumberFormat.getPercentInstance(Locale.ENGLISH);
374             percentFormat.setMaximumFractionDigits(1);
375 
376             pw.println(
377                     "<p style='text-align: left'>This chart shows the coverage levels in this release. "
378                             + "Totals are listed after the main chart.</p>\n"
379                             + "<blockquote><ul>\n"
380                             + "<li><a href='#main_table'>Main Table</a></li>\n"
381                             + "<li><a href='#level_counts'>Level Counts</a></li>\n"
382                             + "</ul></blockquote>\n"
383                             + "<h3>Column Key</h3>\n"
384                             + "<table class='subtle' style='margin-left:3em; margin-right:3em'>\n"
385                             + "<tr><th>Default Region</th><td>The default region for locale code, based on likely subtags</td></tr>\n"
386                             + "<tr><th>№ Locales</th><td>Note that the coverage of regional locales inherits from their parents.</td></tr>\n"
387                             + "<tr><th>Target Level</th><td>The default target Coverage Level in CLDR. "
388                             + "Particular organizations may have different target levels. "
389                             + "Languages with high levels of coverage are marked with ‡, even though they are not tracked by the technical committee.</td></tr>\n"
390                             + "<tr><th>≟</th><td>Indicates whether the CLDR Target is less than, equal to, or greater than the Computed Level.</td></tr>\n"
391                             + "<tr><th>Computed Level</th><td>Computed from the percentage values, "
392                             + "taking the first level that meets a threshold (currently �� "
393                             + percentFormat.format(MODERN_THRESHOLD)
394                             + ", ⓜ "
395                             + percentFormat.format(MODERATE_THRESHOLD)
396                             + ", ⓑ "
397                             + percentFormat.format(BASIC_THRESHOLD)
398                             + ").</td></tr>\n"
399                             + "<tr><th>ICU</th><td>Indicates whether included in the current version of ICU</td></tr>\n"
400                             + "<tr><th>Confirmed</th><td>Confirmed items as a percentage of all supplied items. "
401                             + "If low, the coverage can be improved by getting multiple organizations to confirm.</td></tr>\n"
402                             + "<tr><th>��%, ⓜ%, ⓑ%, ⓒ%</th><td>Coverage at Levels: �� = Modern, ⓜ = Moderate, ⓑ = Basic, ⓒ = Core. "
403                             + "The percentage of items at that level and below is computed from <i>confirmed_items/total_items</i>. "
404                             + "A high-level summary of the meaning of the coverage values is at "
405                             + "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. "
406                             + "The Core values are described on <a target='_blank' href='https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales'>Core Data</a>. "
407                             + "</td></tr>\n"
408                             + "<tr><th>Missing Features</th><td>These are not single items, but rather specific features, such as plural rules or unit grammar info. "
409                             + "They are listed if missing at the computed level. For more information, see <a href='https://cldr.unicode.org/index/locale-coverage'>Missing Features</a><br>"
410                             + "Example: <i>ⓜ collation</i> means this feature should be supported at a Moderate level.<br>"
411                             + "<ul><li>"
412                             + "<i>Except for Core, these are not accounted for in the percent values.</i>"
413                             + "</li><li>"
414                             + "The information needs to be provided in tickets, not through the Survey Tool."
415                             + "</li></ul>"
416                             + "</td></tr>\n"
417                             + "<tr><th>"
418                             + linkTsv("", "TSVFiles")
419                             + ":</th><td>\n"
420                             + "<ul><li>"
421                             + linkTsv("locale-coverage.tsv")
422                             + " — A version of this file, suitable for loading into a spreadsheet.</li>\n"
423                             + "<li>"
424                             + linkTsv("locale-missing.tsv")
425                             + " — Missing items for the CLDR target locales.</li>\n"
426                             + "<li>"
427                             + linkTsv("locale-missing-summary.tsv")
428                             + " — Summary of missing items for the CLDR target locales, by Section/Page/Header.</li>\n"
429                             + "<li>"
430                             + linkTsv("locale-missing-basic.tsv")
431                             + " — Missing items that keep locales from reaching the Basic level.</li>\n"
432                             + "<li>"
433                             + linkTsv("locale-missing-counts.tsv")
434                             + " — Counts of items per locale that are found, unconfirmed, or missing, at the target level. "
435                             + "(Or at *basic, if there is no target level.)</li>\n"
436                             + "</td></tr>\n"
437                             + "</table>\n");
438 
439             Relation<MissingStatus, String> missingPaths =
440                     Relation.of(
441                             new EnumMap<MissingStatus, Set<String>>(MissingStatus.class),
442                             TreeSet.class,
443                             CLDRFile.getComparator(DtdType.ldml));
444             Set<String> unconfirmed = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml));
445 
446             Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales();
447 
448             Counter<Level> foundCounter = new Counter<>();
449             Counter<Level> unconfirmedCounter = new Counter<>();
450             Counter<Level> missingCounter = new Counter<>();
451 
452             List<Level> levelsToShow = new ArrayList<>(EnumSet.allOf(Level.class));
453             levelsToShow.remove(Level.COMPREHENSIVE);
454             levelsToShow.remove(Level.UNDETERMINED);
455             levelsToShow = ImmutableList.copyOf(levelsToShow);
456             List<Level> reversedLevels = new ArrayList<>(levelsToShow);
457             Collections.reverse(reversedLevels);
458             reversedLevels = ImmutableList.copyOf(reversedLevels);
459 
460             int localeCount = 0;
461 
462             final TablePrinter tablePrinter =
463                     new TablePrinter()
464                             .addColumn(
465                                     "Language",
466                                     "class='source'",
467                                     CldrUtility.getDoubleLinkMsg(),
468                                     "class='source'",
469                                     true)
470                             .setBreakSpans(true)
471                             .addColumn(
472                                     "English Name", "class='source'", null, "class='source'", true)
473                             .setBreakSpans(true)
474                             .addColumn(
475                                     "Native Name", "class='source'", null, "class='source'", true)
476                             .setBreakSpans(true)
477                             .addColumn("Script", "class='source'", null, "class='source'", true)
478                             .setBreakSpans(true)
479                             .addColumn(
480                                     "Default Region",
481                                     "class='source'",
482                                     null,
483                                     "class='source'",
484                                     true)
485                             .setBreakSpans(true)
486                             .addColumn(
487                                     "№ Locales",
488                                     "class='source'",
489                                     null,
490                                     "class='targetRight'",
491                                     true)
492                             .setBreakSpans(true)
493                             .setCellPattern("{0,number}")
494                             .addColumn(
495                                     "Target Level", "class='source'", null, "class='source'", true)
496                             .setBreakSpans(true)
497                             .addColumn("≟", "class='target'", null, "class='target'", true)
498                             .setBreakSpans(true)
499                             .setSortPriority(1)
500                             .setSortAscending(false)
501                             .addColumn(
502                                     "Computed Level",
503                                     "class='target'",
504                                     null,
505                                     "class='target'",
506                                     true)
507                             .setBreakSpans(true)
508                             .setSortPriority(0)
509                             .setSortAscending(false)
510                             .addColumn("ICU", "class='target'", null, "class='target'", true)
511                             .setBreakSpans(true)
512                             .addColumn(
513                                     "Confirmed",
514                                     "class='target'",
515                                     null,
516                                     "class='targetRight' style='color:gray'",
517                                     true)
518                             .setBreakSpans(true)
519                             .setCellPattern("{0,number,0.0%}");
520 
521             NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH);
522             tsvPercent.setMaximumFractionDigits(2);
523 
524             for (Level level : reversedLevels) {
525                 String titleLevel = level.getAbbreviation() + "%";
526                 tablePrinter
527                         .addColumn(titleLevel, "class='target'", null, "class='targetRight'", true)
528                         .setCellPattern("{0,number,0.0%}")
529                         .setBreakSpans(true);
530 
531                 switch (level) {
532                     default:
533                         tablePrinter.setSortPriority(2).setSortAscending(false);
534                         break;
535                     case BASIC:
536                         tablePrinter.setSortPriority(3).setSortAscending(false);
537                         break;
538                     case MODERATE:
539                         tablePrinter.setSortPriority(4).setSortAscending(false);
540                         break;
541                     case MODERN:
542                         tablePrinter.setSortPriority(5).setSortAscending(false);
543                         break;
544                 }
545             }
546             tablePrinter
547                     .addColumn("Missing Features", "class='target'", null, "class='target'", true)
548                     .setBreakSpans(true);
549 
550             long start = System.currentTimeMillis();
551             LikelySubtags likelySubtags = new LikelySubtags();
552 
553             EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class);
554             targetLevel.put(Level.CORE, 2 / 100d);
555             targetLevel.put(Level.BASIC, 16 / 100d);
556             targetLevel.put(Level.MODERATE, 33 / 100d);
557             targetLevel.put(Level.MODERN, 100 / 100d);
558 
559             Multimap<String, String> pathToLocale = TreeMultimap.create();
560 
561             Counter<Level> computedLevels = new Counter<>();
562             Counter<Level> computedSublocaleLevels = new Counter<>();
563 
564             for (String locale : availableLanguages) {
565                 try {
566                     if (locale.contains("supplemental") // for old versionsl
567                     //                        || locale.startsWith("sr_Latn")
568                     ) {
569                         continue;
570                     }
571                     if (locales != null && !locales.contains(locale)) {
572                         String base = CLDRLocale.getInstance(locale).getLanguage();
573                         if (!locales.contains(base)) {
574                             continue;
575                         }
576                     }
577                     if (matcher != null && !matcher.reset(locale).matches()) {
578                         continue;
579                     }
580                     if (defaultContents.contains(locale)
581                             || LocaleNames.ROOT.equals(locale)
582                             || LocaleNames.UND.equals(locale)) {
583                         continue;
584                     }
585 
586                     tsv_missing_summary.flush();
587                     tsv_missing.flush();
588                     tsv_missing_basic.flush();
589                     tsv_missing_counts.flush();
590 
591                     boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists();
592 
593                     String region = ltp.set(locale).getRegion();
594                     if (!region.isEmpty()) continue; // skip regions
595 
596                     final Level cldrLocaleLevelGoal =
597                             SC.getLocaleCoverageLevel(Organization.cldr, locale);
598                     final String specialFlag = getSpecialFlag(locale);
599 
600                     final boolean cldrLevelGoalBasicToModern =
601                             Level.CORE_TO_MODERN.contains(cldrLocaleLevelGoal);
602 
603                     String max = likelySubtags.maximize(locale);
604                     final String script = ltp.set(max).getScript();
605                     final String defRegion = ltp.getRegion();
606 
607                     final String language = likelySubtags.minimize(locale);
608 
609                     missingPaths.clear();
610                     unconfirmed.clear();
611 
612                     final CLDRFile file = factory.make(locale, true, minimumDraftStatus);
613 
614                     if (locale.equals("af")) {
615                         int debug = 0;
616                     }
617 
618                     Iterable<String> pathSource = new IterableFilter(file.fullIterable());
619 
620                     VettingViewer.getStatus(
621                             pathSource,
622                             file,
623                             pathHeaderFactory,
624                             foundCounter,
625                             unconfirmedCounter,
626                             missingCounter,
627                             missingPaths,
628                             unconfirmed);
629 
630                     {
631                         long found = 0;
632                         long unconfirmedc = 0;
633                         long missing = 0;
634                         Level adjustedGoal =
635                                 cldrLocaleLevelGoal.compareTo(Level.BASIC) < 0
636                                         ? Level.BASIC
637                                         : cldrLocaleLevelGoal;
638                         for (Level level : Level.values()) {
639                             if (level.compareTo(adjustedGoal) <= 0) {
640                                 found += foundCounter.get(level);
641                                 unconfirmedc += unconfirmedCounter.get(level);
642                                 missing += missingCounter.get(level);
643                             }
644                         }
645                         String goalFlag = cldrLocaleLevelGoal == adjustedGoal ? "" : "*";
646                         tsv_missing_counts.println(
647                                 specialFlag
648                                         + locale
649                                         + "\t"
650                                         + goalFlag
651                                         + adjustedGoal
652                                         + "\t"
653                                         + found
654                                         + "\t"
655                                         + unconfirmedc
656                                         + "\t"
657                                         + missing);
658                     }
659 
660                     Collection<String> sublocales = languageToRegion.asMap().get(language);
661                     if (sublocales == null) {
662                         sublocales = Collections.emptySet();
663                     }
664                     sublocales = ImmutableSet.copyOf(sublocales);
665 
666                     // get the totals
667 
668                     EnumMap<Level, Integer> totals = new EnumMap<>(Level.class);
669                     EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class);
670                     Set<CoreItems> specialMissingPaths = EnumSet.noneOf(CoreItems.class);
671 
672                     StatusCounter starredCounter = new StatusCounter();
673 
674                     {
675                         Multimap<CoreItems, String> detailedErrors = TreeMultimap.create();
676                         Set<CoreItems> coverage =
677                                 CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors);
678                         for (CoreItems item : coverage) {
679                             foundCounter.add(item.desiredLevel, 1);
680                         }
681                         for (Entry<CoreItems, String> entry : detailedErrors.entries()) {
682                             CoreItems coreItem = entry.getKey();
683                             String path = entry.getValue();
684                             specialMissingPaths.add(coreItem);
685                             // if goal (eg modern) >= itemLevel, indicate it is missing
686                             if (coreItem.desiredLevel == Level.BASIC) {
687                                 starredCounter.gatherStarred(path, null);
688                             }
689                             missingCounter.add(coreItem.desiredLevel, 1);
690                         }
691                     }
692 
693                     if (cldrLevelGoalBasicToModern) {
694                         Level goalLevel = cldrLocaleLevelGoal;
695                         for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
696                             String path = entry.getValue();
697                             String status = entry.getKey().toString();
698                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
699                             if (goalLevel.compareTo(foundLevel) >= 0) {
700                                 String line =
701                                         spreadsheetLine(
702                                                 locale,
703                                                 language,
704                                                 script,
705                                                 specialFlag,
706                                                 file.getStringValue(path),
707                                                 goalLevel,
708                                                 foundLevel,
709                                                 status,
710                                                 path,
711                                                 file,
712                                                 pathToLocale);
713                                 String lineToPrint1 = line;
714                                 tsv_missing.println(lineToPrint1);
715                             }
716                         }
717                         for (String path : unconfirmed) {
718                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
719                             if (goalLevel.compareTo(foundLevel) >= 0) {
720                                 String line =
721                                         spreadsheetLine(
722                                                 locale,
723                                                 language,
724                                                 script,
725                                                 specialFlag,
726                                                 file.getStringValue(path),
727                                                 goalLevel,
728                                                 foundLevel,
729                                                 "n/a",
730                                                 path,
731                                                 file,
732                                                 pathToLocale);
733                                 tsv_missing.println(line);
734                             }
735                         }
736                     } else {
737                         Level goalLevel = Level.BASIC;
738                         for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
739                             String path = entry.getValue();
740                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
741                             if (goalLevel.compareTo(foundLevel) >= 0) {
742                                 starredCounter.gatherStarred(path, null);
743                             }
744                         }
745                         for (String path : unconfirmed) {
746                             String fullPath = file.getFullXPath(path);
747                             DraftStatus draftStatus =
748                                     fullPath.contains("unconfirmed")
749                                             ? DraftStatus.unconfirmed
750                                             : DraftStatus.provisional;
751 
752                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
753                             if (goalLevel.compareTo(foundLevel) >= 0) {
754                                 starredCounter.gatherStarred(path, draftStatus);
755                             }
756                         }
757                     }
758 
759                     if (!starredCounter.starredPathToData.isEmpty()) {
760                         for (Entry<String, StatusData> starred :
761                                 starredCounter.starredPathToData.entrySet()) {
762                             String starredPath = starred.getKey();
763                             StatusData statusData = starred.getValue();
764                             String valueString =
765                                     statusData.values.stream()
766                                             .map(x -> Joiner.on(", ").join(x))
767                                             .collect(Collectors.joining("; "));
768 
769                             tsv_missing_basic.println(
770                                     specialFlag
771                                             + locale //
772                                             + "\t"
773                                             + statusData.missing //
774                                             + "\t"
775                                             + statusData.provisional //
776                                             + "\t"
777                                             + statusData.unconfirmed //
778                                             + "\t"
779                                             + starredPath.replace("\"*\"", "'*'")
780                                             + "\t"
781                                             + valueString
782                                     //
783                                     );
784                         }
785                         tsv_missing_basic.println(
786                                 specialFlag
787                                         + locale //
788                                         + "\t"
789                                         + starredCounter.missingTotal //
790                                         + "\t"
791                                         + starredCounter.provisionalTotal //
792                                         + "\t"
793                                         + starredCounter.unconfirmedTotal //
794                                         + "\tTotals\t");
795                         tsv_missing_basic.println("\t\t\t\t\t"); // for a proper table in github
796                     }
797 
798                     int sumFound = 0;
799                     int sumMissing = 0;
800                     int sumUnconfirmed = 0;
801 
802                     for (Level level : levelsToShow) {
803                         long foundCount = foundCounter.get(level);
804                         long unconfirmedCount = unconfirmedCounter.get(level);
805                         long missingCount = missingCounter.get(level);
806 
807                         sumFound += foundCount;
808                         sumUnconfirmed += unconfirmedCount;
809                         sumMissing += missingCount;
810 
811                         confirmed.put(level, sumFound);
812                         totals.put(level, sumFound + sumUnconfirmed + sumMissing);
813                     }
814 
815                     // double modernTotal = totals.get(Level.MODERN);
816 
817                     // first get the accumulated values
818                     EnumMap<Level, Integer> accumTotals = new EnumMap<>(Level.class);
819                     EnumMap<Level, Integer> accumConfirmed = new EnumMap<>(Level.class);
820                     int currTotals = 0;
821                     int currConfirmed = 0;
822                     for (Level level : levelsToShow) {
823                         currTotals += totals.get(level);
824                         currConfirmed += confirmed.get(level);
825                         accumConfirmed.put(level, currConfirmed);
826                         accumTotals.put(level, currTotals);
827                     }
828 
829                     // print the totals
830 
831                     Level computed = Level.UNDETERMINED;
832                     Map<Level, Double> levelToProportion = new EnumMap<>(Level.class);
833 
834                     for (Level level : reversedLevels) {
835                         int confirmedCoverage = accumConfirmed.get(level);
836                         double total = accumTotals.get(level);
837 
838                         final double proportion = confirmedCoverage / total;
839                         levelToProportion.put(level, proportion);
840 
841                         if (computed == Level.UNDETERMINED) {
842                             switch (level) {
843                                 case MODERN:
844                                     if (proportion >= MODERN_THRESHOLD) {
845                                         computed = level;
846                                     }
847                                     break;
848                                 case MODERATE:
849                                     if (proportion >= MODERATE_THRESHOLD) {
850                                         computed = level;
851                                     }
852                                     break;
853                                 case BASIC:
854                                     if (proportion >= BASIC_THRESHOLD) {
855                                         computed = level;
856                                     }
857                                     break;
858                                 default:
859                                     break;
860                             }
861                         }
862                     }
863 
864                     Set<CoreItems> shownMissingPaths = EnumSet.noneOf(CoreItems.class);
865                     Level computedWithCore =
866                             computed == Level.UNDETERMINED ? Level.BASIC : computed;
867                     for (CoreItems item : specialMissingPaths) {
868                         if (item.desiredLevel.compareTo(computedWithCore) <= 0) {
869                             shownMissingPaths.add(item);
870                         } else {
871                             int debug = 0;
872                         }
873                     }
874                     computedLevels.add(computed, 1);
875                     computedSublocaleLevels.add(computed, sublocales.size());
876 
877                     final String coreMissingString = Joiner.on(", ").join(shownMissingPaths);
878                     final String visibleLevelComputed =
879                             computed == Level.UNDETERMINED ? "" : computed.toString();
880                     final String visibleLevelGoal =
881                             cldrLocaleLevelGoal == Level.UNDETERMINED
882                                     ? ""
883                                     : specialFlag + cldrLocaleLevelGoal.toString();
884                     final String goalComparedToComputed =
885                             computed == cldrLocaleLevelGoal
886                                     ? " ≡"
887                                     : cldrLocaleLevelGoal.compareTo(computed) < 0 ? " <" : " >";
888 
889                     tablePrinter
890                             .addRow()
891                             .addCell(language)
892                             .addCell(ENGLISH.getName(language, true, CLDRFile.SHORT_ALTS))
893                             .addCell(file.getName(language))
894                             .addCell(script)
895                             .addCell(defRegion)
896                             .addCell(sublocales.size())
897                             .addCell(visibleLevelGoal)
898                             .addCell(goalComparedToComputed)
899                             .addCell(visibleLevelComputed)
900                             .addCell(getIcuValue(language))
901                             .addCell(sumFound / (double) (sumFound + sumUnconfirmed));
902 
903                     // print the totals
904                     for (Level level : reversedLevels) {
905                         tablePrinter.addCell(levelToProportion.get(level));
906                     }
907 
908                     tablePrinter.addCell(coreMissingString).finishRow();
909 
910                     // now write properties file line
911 
912                     if (computed != Level.UNDETERMINED) {
913                         propertiesCoverage.printlnWithTabs(
914                                 propertiesCoverageTabCount,
915                                 locale
916                                         + " ;\t"
917                                         + visibleLevelComputed
918                                         + " ;\t"
919                                         + ENGLISH.getName(locale));
920                         // TODO decide whether to restore this
921                         //                        Level higher = Level.UNDETERMINED;
922                         //                        switch (computed) {
923                         //                        default:
924                         //                            higher = Level.UNDETERMINED;
925                         //                            break;
926                         //                        case MODERATE:
927                         //                            higher = Level.MODERN;
928                         //                            break;
929                         //                        case BASIC:
930                         //                            higher = Level.MODERATE;
931                         //                            break;
932                         //                        }
933                         //                        double higherProportion = higher ==
934                         // Level.UNDETERMINED ? 0d : levelToProportion.get(higher);
935                         //
936                         //                        if (higherProportion >= THRESHOLD_HIGHER) {
937                         //                            propertiesCoverage.println(
938                         //                                " ;\t" +
939                         // tsvPercent.format(higherProportion) +
940                         //                                " ;\t" + higher
941                         //                                );
942                         //                        } else {
943                         //                            propertiesCoverage.println(" ;\t" + "" + "
944                         // ;\t" + "");
945                         //                        }
946                     }
947                     localeCount++;
948                 } catch (Exception e) {
949                     throw new IllegalArgumentException(e);
950                 }
951             }
952             String lineToPrint = "\n#EOF";
953             propertiesCoverage.printlnWithTabs(propertiesCoverageTabCount, lineToPrint);
954 
955             pw.println("<h3><a name='main_table' href='#main_table'>Main Table</a></h3>");
956             pw.println(tablePrinter.toTable());
957 
958             pw.println(
959                     "<h3><a name='level_counts' href='#level_counts'>Level Counts</a></h3>\n"
960                             + "<table class='subtle'><tr>\n"
961                             + "<th style='text-align:left'>"
962                             + "Level"
963                             + "</th>"
964                             + "<th style='text-align:left'>"
965                             + "Languages"
966                             + "</th>"
967                             + "<th style='text-align:left'>"
968                             + "Locales"
969                             + "</th>"
970                             + "</tr>");
971             long totalCount = 0;
972             long totalLocaleCount = 0;
973             for (Level level : Lists.reverse(Arrays.asList(Level.values()))) {
974                 final long count = computedLevels.get(level);
975                 final long localesCount = computedSublocaleLevels.get(level);
976                 if (count == 0 || level == Level.UNDETERMINED) {
977                     continue;
978                 }
979                 totalCount += count;
980                 totalLocaleCount += localesCount;
981                 String visibleImputed =
982                         level == Level.UNDETERMINED
983                                 ? "<" + Level.BASIC.toString()
984                                 : level.toString();
985                 pw.println(
986                         "<tr>"
987                                 + "<th style='text-align:left'>"
988                                 + visibleImputed
989                                 + "</th>"
990                                 + "<td style='text-align:right'>"
991                                 + count
992                                 + "</td>"
993                                 + "<td style='text-align:right'>"
994                                 + localesCount
995                                 + "</td>"
996                                 + "</tr>");
997             }
998             pw.println(
999                     "<tr>"
1000                             + "<th style='text-align:left'>"
1001                             + "Total"
1002                             + "</th>"
1003                             + "<td style='text-align:right'>"
1004                             + totalCount
1005                             + "</td>"
1006                             + "<td style='text-align:right'>"
1007                             + totalLocaleCount
1008                             + "</td>"
1009                             + "</tr>\n");
1010 
1011             pw.println(
1012                     "<tr>"
1013                             + "<th style='text-align:left'>"
1014                             + "in dev."
1015                             + "</th>"
1016                             + "<td style='text-align:right'>"
1017                             + computedLevels.get(Level.UNDETERMINED)
1018                             + "</td>"
1019                             + "<td style='text-align:right'>"
1020                             + computedSublocaleLevels.get(Level.UNDETERMINED)
1021                             + "</td>"
1022                             + "</tr>\n"
1023                             + "</table>");
1024 
1025             Multimap<Level, String> levelToLocales = TreeMultimap.create();
1026 
1027             for (Entry<String, Collection<String>> entry : pathToLocale.asMap().entrySet()) {
1028                 String path = entry.getKey();
1029                 Collection<String> localeSet = entry.getValue();
1030                 levelToLocales.clear();
1031                 for (String locale : localeSet) {
1032                     Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
1033                     levelToLocales.put(foundLevel, locale);
1034                 }
1035                 String phString = "n/a\tn/a\tn/a\tn/a";
1036                 try {
1037                     PathHeader ph = pathHeaderFactory.fromPath(path);
1038                     phString = ph.toString();
1039                 } catch (Exception e) {
1040                 }
1041                 for (Entry<Level, Collection<String>> entry2 : levelToLocales.asMap().entrySet()) {
1042                     Level level = entry2.getKey();
1043                     localeSet = entry2.getValue();
1044                     tsv_missing_summary.println(
1045                             level
1046                                     + "\t"
1047                                     + localeSet.size()
1048                                     + "\t"
1049                                     + Joiner.on(" ")
1050                                             .join(
1051                                                     localeSet.stream()
1052                                                             .map(x -> x + getSpecialFlag(x))
1053                                                             .collect(Collectors.toSet()))
1054                                     + "\t"
1055                                     + phString);
1056                 }
1057             }
1058             tablePrinter.toTsv(tsv_summary);
1059             long end = System.currentTimeMillis();
1060             System.out.println(
1061                     (end - start)
1062                             + " millis = "
1063                             + ((end - start) / localeCount)
1064                             + " millis/locale");
1065             ShowPlurals.appendBlanksForScrolling(pw);
1066         }
1067     }
1068 
linkTsv(String tsvFileName)1069     private static String linkTsv(String tsvFileName) {
1070         return "<a href='" + TSV_BASE + tsvFileName + "' target='cldr-tsv'>" + tsvFileName + "</a>";
1071     }
1072 
linkTsv(String tsvFileName, String anchorText)1073     private static String linkTsv(String tsvFileName, String anchorText) {
1074         return "<a href='" + TSV_BASE + tsvFileName + "' target='cldr-tsv'>" + anchorText + "</a>";
1075     }
1076 
getSpecialFlag(String locale)1077     private static String getSpecialFlag(String locale) {
1078         return SC.getLocaleCoverageLevel(Organization.special, locale) == Level.UNDETERMINED
1079                 ? ""
1080                 : "‡";
1081     }
1082 
1083     private static class IterableFilter implements Iterable<String> {
1084         private Iterable<String> source;
1085 
IterableFilter(Iterable<String> source)1086         IterableFilter(Iterable<String> source) {
1087             this.source = source;
1088         }
1089 
1090         /**
1091          * When some paths are defined after submission, we need to change them to COMPREHENSIVE in
1092          * computing the vetting status.
1093          */
1094         private static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of();
1095 
1096         @Override
iterator()1097         public Iterator<String> iterator() {
1098             return new IteratorFilter(source.iterator());
1099         }
1100 
1101         private static class IteratorFilter implements Iterator<String> {
1102             Iterator<String> source;
1103             String peek;
1104 
IteratorFilter(Iterator<String> source)1105             public IteratorFilter(Iterator<String> source) {
1106                 this.source = source;
1107                 fillPeek();
1108             }
1109 
1110             @Override
hasNext()1111             public boolean hasNext() {
1112                 return peek != null;
1113             }
1114 
1115             @Override
next()1116             public String next() {
1117                 String result = peek;
1118                 fillPeek();
1119                 return result;
1120             }
1121 
fillPeek()1122             private void fillPeek() {
1123                 peek = null;
1124                 while (source.hasNext()) {
1125                     peek = source.next();
1126                     // if it is ok to assess, then break
1127                     if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek)
1128                             && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) {
1129                         break;
1130                     }
1131                     peek = null;
1132                 }
1133             }
1134         }
1135     }
1136 
1137     private static final CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO);
1138 
spreadsheetLine( String locale, String language, String script, String specialFlag, String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, CLDRFile resolvedFile, Multimap<String, String> pathToLocale)1139     private static String spreadsheetLine(
1140             String locale,
1141             String language,
1142             String script,
1143             String specialFlag,
1144             String nativeValue,
1145             Level cldrLocaleLevelGoal,
1146             Level itemLevel,
1147             String status,
1148             String path,
1149             CLDRFile resolvedFile,
1150             Multimap<String, String> pathToLocale) {
1151         if (pathToLocale != null) {
1152             pathToLocale.put(path, locale);
1153         }
1154         //        String stLink = "n/a";
1155         //        String englishValue = "n/a";
1156         //        StatusAction action = null;
1157         //        String icuValue = getIcuValue(locale);
1158 
1159         SurveyToolStatus surveyToolStatus = null;
1160         String bailey = resolvedFile == null ? "" : resolvedFile.getStringValue(path);
1161 
1162         String phString = "na\tn/a\tn/a\t" + path;
1163         try {
1164             PathHeader ph = pathHeaderFactory.fromPath(path);
1165             phString = ph.toString();
1166             //            stLink = URLS.forXpath(locale, path);
1167             //            englishValue = ENGLISH.getStringValue(path);
1168             //            action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo,
1169             // InputMethod.DIRECT, ph, dummyUserInfo);
1170         } catch (Exception e) {
1171 
1172         }
1173 
1174         String line =
1175                 specialFlag
1176                         + language
1177                         + "\t"
1178                         + ENGLISH.getName(language)
1179                         + "\t"
1180                         + ENGLISH.getName("script", script)
1181                         + "\t"
1182                         + cldrLocaleLevelGoal
1183                         + "\t"
1184                         + itemLevel
1185                         + "\t"
1186                         + (surveyToolStatus == null ? "n/a" : surveyToolStatus.toString())
1187                         + "\t"
1188                         + bailey
1189                         + "\t"
1190                         + phString
1191                         + "\t"
1192                         + PathHeader.getUrlForLocalePath(locale, path);
1193         return line;
1194     }
1195 
getIcuValue(String locale)1196     private static String getIcuValue(String locale) {
1197         return ICU_Locales.contains(new ULocale(locale)) ? "ICU" : "";
1198     }
1199 
1200     private static final Set<ULocale> ICU_Locales =
1201             ImmutableSet.copyOf(ULocale.getAvailableLocales());
1202 }
1203