1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Splitter; 5 import com.google.common.collect.Comparators; 6 import com.google.common.collect.ImmutableList; 7 import com.google.common.collect.ImmutableMultimap; 8 import com.google.common.collect.ImmutableSet; 9 import com.google.common.collect.Lists; 10 import com.google.common.collect.Multimap; 11 import com.google.common.collect.TreeMultimap; 12 import com.ibm.icu.impl.Relation; 13 import com.ibm.icu.text.NumberFormat; 14 import com.ibm.icu.util.ULocale; 15 import java.io.File; 16 import java.io.IOException; 17 import java.io.PrintWriter; 18 import java.util.ArrayList; 19 import java.util.Arrays; 20 import java.util.Collection; 21 import java.util.Collections; 22 import java.util.Comparator; 23 import java.util.EnumMap; 24 import java.util.EnumSet; 25 import java.util.Iterator; 26 import java.util.List; 27 import java.util.Locale; 28 import java.util.Map; 29 import java.util.Map.Entry; 30 import java.util.Set; 31 import java.util.TreeMap; 32 import java.util.TreeSet; 33 import java.util.regex.Matcher; 34 import java.util.stream.Collectors; 35 import org.unicode.cldr.draft.FileUtilities; 36 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 37 import org.unicode.cldr.tool.Option.Options; 38 import org.unicode.cldr.util.CLDRConfig; 39 import org.unicode.cldr.util.CLDRFile; 40 import org.unicode.cldr.util.CLDRFile.DraftStatus; 41 import org.unicode.cldr.util.CLDRLocale; 42 import org.unicode.cldr.util.CLDRPaths; 43 import org.unicode.cldr.util.CLDRURLS; 44 import org.unicode.cldr.util.CldrUtility; 45 import org.unicode.cldr.util.CoreCoverageInfo; 46 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems; 47 import org.unicode.cldr.util.Counter; 48 import org.unicode.cldr.util.CoverageInfo; 49 import org.unicode.cldr.util.DtdType; 50 import org.unicode.cldr.util.LanguageTagCanonicalizer; 51 import org.unicode.cldr.util.LanguageTagParser; 52 import org.unicode.cldr.util.Level; 53 import org.unicode.cldr.util.LocaleNames; 54 import org.unicode.cldr.util.Organization; 55 import org.unicode.cldr.util.PathHeader; 56 import org.unicode.cldr.util.PathHeader.Factory; 57 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 58 import org.unicode.cldr.util.PathStarrer; 59 import org.unicode.cldr.util.PatternCache; 60 import org.unicode.cldr.util.RegexLookup; 61 import org.unicode.cldr.util.SimpleFactory; 62 import org.unicode.cldr.util.StandardCodes; 63 import org.unicode.cldr.util.SupplementalDataInfo; 64 import org.unicode.cldr.util.TempPrintWriter; 65 import org.unicode.cldr.util.VettingViewer; 66 import org.unicode.cldr.util.VettingViewer.MissingStatus; 67 68 public class ShowLocaleCoverage { 69 70 private static final String TSV_BASE = 71 CLDRURLS.CLDR_STAGING_REPO_MAIN 72 + "docs/charts/" 73 + ToolConstants.CHART_VI.getVersionString(1, 2) 74 + "/tsv/"; 75 public static final Splitter LF_SPLITTER = Splitter.on('\n'); 76 77 // thresholds for measuring Level attainment 78 private static final double BASIC_THRESHOLD = 1; 79 private static final double MODERATE_THRESHOLD = 0.995; 80 private static final double MODERN_THRESHOLD = 0.995; 81 82 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 83 private static final String TSV_MISSING_SUMMARY_HEADER = 84 "#Path Level" 85 + "\t#Locales" 86 + "\tLocales" 87 + "\tSection" 88 + "\tPage" 89 + "\tHeader" 90 + "\tCode"; 91 92 private static final String TSV_MISSING_HEADER = 93 "#LCode" 94 + "\tEnglish Name" 95 + "\tScript" 96 + "\tLocale Level" 97 + "\tPath Level" 98 + "\tSTStatus" 99 + "\tBailey" 100 + "\tSection" 101 + "\tPage" 102 + "\tHeader" 103 + "\tCode" 104 + "\tST Link"; 105 106 private static final String PROPERTIES_HEADER = 107 "# coverageLevels.txt\n" 108 + "# Copyright © 2023 Unicode, Inc.\n" 109 + "# CLDR data files are interpreted according to the\n" 110 + "# LDML specification: http://unicode.org/reports/tr35/\n" 111 + "# For terms of use, see http://www.unicode.org/copyright.html\n" 112 + "#\n" 113 + "# For format and usage information, see:\n" 114 + "# https://cldr.unicode.org/index/cldr-spec/coverage-levels.\n" 115 + "\n"; 116 private static final String TSV_MISSING_BASIC_HEADER = 117 "#Locale\tProv.\tUnconf.\tMissing\tPath*\tAttributes"; 118 private static final String TSV_MISSING_COUNTS_HEADER = 119 "#Locale\tTargetLevel\t№ Found\t№ Unconfirmed\t№ Missing"; 120 121 private static final boolean DEBUG = true; 122 private static final char DEBUG_FILTER = 123 0; // use letter to only load locales starting with that letter 124 125 private static final String LATEST = ToolConstants.CHART_VERSION; 126 private static CLDRConfig testInfo = ToolConfig.getToolInstance(); 127 private static final StandardCodes SC = StandardCodes.make(); 128 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = 129 testInfo.getSupplementalDataInfo(); 130 private static final StandardCodes STANDARD_CODES = SC; 131 132 private static org.unicode.cldr.util.Factory factory = 133 testInfo.getCommonAndSeedAndMainAndAnnotationsFactory(); 134 private static final CLDRFile ENGLISH = factory.make("en", true); 135 136 static final Options myOptions = new Options(); 137 138 enum MyOptions { 139 filter(".+", ".*", "Filter the information based on id, using a regex argument."), 140 // draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft 141 // status."), 142 chart(null, null, "chart only"), 143 organization(".+", null, "Only locales for organization"), 144 version(".+", LATEST, "To get different versions"), 145 rawData(null, null, "Output the raw data from all coverage levels"), 146 targetDir(".*", CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."), 147 directories( 148 "(.*:)?[a-z]+(,[a-z]+)*", 149 "common", 150 "Space-delimited list of main source directories: common,seed,exemplar.\n" 151 + "Optional, <baseDir>:common,seed"), 152 ; 153 154 // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target 155 // directory."), 156 // layouts(null, null, "Only create html files for keyboard layouts"), 157 // repertoire(null, null, "Only create html files for repertoire"), ; 158 // boilerplate 159 final Option option; 160 MyOptions(String argumentPattern, String defaultArgument, String helpText)161 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 162 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 163 } 164 } 165 166 private static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY = 167 new RegexLookup<Boolean>() 168 .add("\\[@alt=\"accounting\"]", true) 169 .add("\\[@alt=\"variant\"]", true) 170 .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true) 171 .add("^//ldml/localeDisplayNames/languages/language.*_", true) 172 .add("^//ldml/numbers/currencies/currency.*/symbol", true) 173 .add("^//ldml/characters/exemplarCharacters", true); 174 175 private static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed; 176 private static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH); 177 178 private static Set<String> COMMON_LOCALES; 179 180 public static class StatusData { 181 int missing; 182 int provisional; 183 int unconfirmed; 184 Set<List<String>> values = 185 new TreeSet<>(Comparators.lexicographical(Comparator.<String>naturalOrder())); 186 } 187 188 public static class StatusCounter { 189 private static final Set<String> ATTRS_TO_REMOVE = Set.of("standard"); 190 PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); 191 Map<String, StatusData> starredPathToData = new TreeMap<>(); 192 int missingTotal; 193 int provisionalTotal; 194 int unconfirmedTotal; 195 gatherStarred(String path, DraftStatus draftStatus)196 public void gatherStarred(String path, DraftStatus draftStatus) { 197 String starredPath = pathStarrer.set(path); 198 StatusData statusData = starredPathToData.get(starredPath); 199 if (statusData == null) { 200 starredPathToData.put(starredPath, statusData = new StatusData()); 201 } 202 if (draftStatus == null) { 203 ++statusData.missing; 204 ++missingTotal; 205 } else { 206 switch (draftStatus) { 207 case unconfirmed: 208 ++statusData.unconfirmed; 209 ++unconfirmedTotal; 210 break; 211 case provisional: 212 ++statusData.provisional; 213 ++provisionalTotal; 214 break; 215 default: 216 break; 217 } 218 } 219 final List<String> attributes = 220 CldrUtility.removeAll( 221 new ArrayList<>(pathStarrer.getAttributes()), ATTRS_TO_REMOVE); 222 if (!attributes.isEmpty()) { 223 statusData.values.add(attributes); 224 } 225 } 226 } 227 main(String[] args)228 public static void main(String[] args) throws IOException { 229 myOptions.parse(MyOptions.filter, args, true); 230 231 Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher(""); 232 233 if (MyOptions.chart.option.doesOccur()) { 234 showCoverage(null, matcher); 235 return; 236 } 237 238 Set<String> locales = null; 239 String organization = MyOptions.organization.option.getValue(); 240 boolean useOrgLevel = MyOptions.organization.option.doesOccur(); 241 if (useOrgLevel) { 242 locales = STANDARD_CODES.getLocaleCoverageLocales(organization); 243 } 244 245 if (MyOptions.version.option.doesOccur()) { 246 String number = MyOptions.version.option.getValue().trim(); 247 if (!number.contains(".")) { 248 number += ".0"; 249 } 250 factory = 251 org.unicode.cldr.util.Factory.make( 252 CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*"); 253 } else { 254 if (MyOptions.directories.option.doesOccur()) { 255 String directories = MyOptions.directories.option.getValue().trim(); 256 CLDRConfig cldrConfig = CONFIG; 257 String base = null; 258 int colonPos = directories.indexOf(':'); 259 if (colonPos >= 0) { 260 base = directories.substring(0, colonPos).trim(); 261 directories = directories.substring(colonPos + 1).trim(); 262 } else { 263 base = cldrConfig.getCldrBaseDirectory().toString(); 264 } 265 String[] items = directories.split(",\\s*"); 266 File[] fullDirectories = new File[items.length]; 267 int i = 0; 268 for (String item : items) { 269 fullDirectories[i++] = new File(base + "/" + item + "/main"); 270 } 271 factory = SimpleFactory.make(fullDirectories, ".*"); 272 COMMON_LOCALES = 273 SimpleFactory.make(base + "/" + "common" + "/main", ".*") 274 .getAvailableLanguages(); 275 } 276 } 277 fixCommonLocales(); 278 279 showCoverage(null, matcher, locales, useOrgLevel); 280 } 281 fixCommonLocales()282 private static void fixCommonLocales() { 283 if (COMMON_LOCALES == null) { 284 COMMON_LOCALES = factory.getAvailableLanguages(); 285 } 286 } 287 288 public static class FoundAndTotal { 289 final int found; 290 final int total; 291 292 @SafeVarargs FoundAndTotal(Counter<Level>.... counters)293 public FoundAndTotal(Counter<Level>... counters) { 294 final int[] count = {0, 0, 0}; 295 for (Level level : Level.values()) { 296 if (level == Level.COMPREHENSIVE) { 297 continue; 298 } 299 int i = 0; 300 for (Counter<Level> counter : counters) { 301 count[i++] += counter.get(level); 302 } 303 } 304 found = count[0]; 305 total = found + count[1] + count[2]; 306 } 307 308 @Override toString()309 public String toString() { 310 return found + "/" + total; 311 } 312 } 313 showCoverage(Anchors anchors, Matcher matcher)314 static void showCoverage(Anchors anchors, Matcher matcher) throws IOException { 315 showCoverage(anchors, matcher, null, false); 316 } 317 showCoverage( Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)318 private static void showCoverage( 319 Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel) 320 throws IOException { 321 final String title = "Locale Coverage"; 322 try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors)); 323 PrintWriter tsv_summary = 324 FileUtilities.openUTF8Writer( 325 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv"); 326 PrintWriter tsv_missing = 327 FileUtilities.openUTF8Writer( 328 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv"); 329 PrintWriter tsv_missing_summary = 330 FileUtilities.openUTF8Writer( 331 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-summary.tsv"); 332 PrintWriter tsv_missing_basic = 333 FileUtilities.openUTF8Writer( 334 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-basic.tsv"); 335 PrintWriter tsv_missing_counts = 336 FileUtilities.openUTF8Writer( 337 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-counts.tsv"); 338 TempPrintWriter propertiesCoverage = 339 TempPrintWriter.openUTF8Writer( 340 CLDRPaths.COMMON_DIRECTORY + "properties/", 341 "coverageLevels.txt"); ) { 342 tsv_missing_summary.println(TSV_MISSING_SUMMARY_HEADER); 343 tsv_missing.println(TSV_MISSING_HEADER); 344 tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER); 345 tsv_missing_counts.println(TSV_MISSING_COUNTS_HEADER); 346 347 final int propertiesCoverageTabCount = 2; 348 propertiesCoverage.printlnWithTabs(propertiesCoverageTabCount, PROPERTIES_HEADER); 349 350 Set<String> checkModernLocales = 351 STANDARD_CODES.getLocaleCoverageLocales( 352 Organization.cldr, EnumSet.of(Level.MODERN)); 353 Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages()); 354 availableLanguages.addAll(checkModernLocales); 355 356 Multimap<String, String> languageToRegion = TreeMultimap.create(); 357 LanguageTagParser ltp = new LanguageTagParser(); 358 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true); 359 for (String locale : factory.getAvailable()) { 360 String country = ltp.set(locale).getRegion(); 361 if (!country.isEmpty()) { 362 languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country); 363 } 364 } 365 languageToRegion = ImmutableMultimap.copyOf(languageToRegion); 366 367 fixCommonLocales(); 368 369 System.out.println(Joiner.on("\n").join(languageToRegion.asMap().entrySet())); 370 371 System.out.println("# Checking: " + availableLanguages); 372 373 NumberFormat percentFormat = NumberFormat.getPercentInstance(Locale.ENGLISH); 374 percentFormat.setMaximumFractionDigits(1); 375 376 pw.println( 377 "<p style='text-align: left'>This chart shows the coverage levels in this release. " 378 + "Totals are listed after the main chart.</p>\n" 379 + "<blockquote><ul>\n" 380 + "<li><a href='#main_table'>Main Table</a></li>\n" 381 + "<li><a href='#level_counts'>Level Counts</a></li>\n" 382 + "</ul></blockquote>\n" 383 + "<h3>Column Key</h3>\n" 384 + "<table class='subtle' style='margin-left:3em; margin-right:3em'>\n" 385 + "<tr><th>Default Region</th><td>The default region for locale code, based on likely subtags</td></tr>\n" 386 + "<tr><th>№ Locales</th><td>Note that the coverage of regional locales inherits from their parents.</td></tr>\n" 387 + "<tr><th>Target Level</th><td>The default target Coverage Level in CLDR. " 388 + "Particular organizations may have different target levels. " 389 + "Languages with high levels of coverage are marked with ‡, even though they are not tracked by the technical committee.</td></tr>\n" 390 + "<tr><th>≟</th><td>Indicates whether the CLDR Target is less than, equal to, or greater than the Computed Level.</td></tr>\n" 391 + "<tr><th>Computed Level</th><td>Computed from the percentage values, " 392 + "taking the first level that meets a threshold (currently " 393 + percentFormat.format(MODERN_THRESHOLD) 394 + ", ⓜ " 395 + percentFormat.format(MODERATE_THRESHOLD) 396 + ", ⓑ " 397 + percentFormat.format(BASIC_THRESHOLD) 398 + ").</td></tr>\n" 399 + "<tr><th>ICU</th><td>Indicates whether included in the current version of ICU</td></tr>\n" 400 + "<tr><th>Confirmed</th><td>Confirmed items as a percentage of all supplied items. " 401 + "If low, the coverage can be improved by getting multiple organizations to confirm.</td></tr>\n" 402 + "<tr><th>%, ⓜ%, ⓑ%, ⓒ%</th><td>Coverage at Levels: = Modern, ⓜ = Moderate, ⓑ = Basic, ⓒ = Core. " 403 + "The percentage of items at that level and below is computed from <i>confirmed_items/total_items</i>. " 404 + "A high-level summary of the meaning of the coverage values is at " 405 + "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. " 406 + "The Core values are described on <a target='_blank' href='https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales'>Core Data</a>. " 407 + "</td></tr>\n" 408 + "<tr><th>Missing Features</th><td>These are not single items, but rather specific features, such as plural rules or unit grammar info. " 409 + "They are listed if missing at the computed level. For more information, see <a href='https://cldr.unicode.org/index/locale-coverage'>Missing Features</a><br>" 410 + "Example: <i>ⓜ collation</i> means this feature should be supported at a Moderate level.<br>" 411 + "<ul><li>" 412 + "<i>Except for Core, these are not accounted for in the percent values.</i>" 413 + "</li><li>" 414 + "The information needs to be provided in tickets, not through the Survey Tool." 415 + "</li></ul>" 416 + "</td></tr>\n" 417 + "<tr><th>" 418 + linkTsv("", "TSVFiles") 419 + ":</th><td>\n" 420 + "<ul><li>" 421 + linkTsv("locale-coverage.tsv") 422 + " — A version of this file, suitable for loading into a spreadsheet.</li>\n" 423 + "<li>" 424 + linkTsv("locale-missing.tsv") 425 + " — Missing items for the CLDR target locales.</li>\n" 426 + "<li>" 427 + linkTsv("locale-missing-summary.tsv") 428 + " — Summary of missing items for the CLDR target locales, by Section/Page/Header.</li>\n" 429 + "<li>" 430 + linkTsv("locale-missing-basic.tsv") 431 + " — Missing items that keep locales from reaching the Basic level.</li>\n" 432 + "<li>" 433 + linkTsv("locale-missing-counts.tsv") 434 + " — Counts of items per locale that are found, unconfirmed, or missing, at the target level. " 435 + "(Or at *basic, if there is no target level.)</li>\n" 436 + "</td></tr>\n" 437 + "</table>\n"); 438 439 Relation<MissingStatus, String> missingPaths = 440 Relation.of( 441 new EnumMap<MissingStatus, Set<String>>(MissingStatus.class), 442 TreeSet.class, 443 CLDRFile.getComparator(DtdType.ldml)); 444 Set<String> unconfirmed = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml)); 445 446 Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales(); 447 448 Counter<Level> foundCounter = new Counter<>(); 449 Counter<Level> unconfirmedCounter = new Counter<>(); 450 Counter<Level> missingCounter = new Counter<>(); 451 452 List<Level> levelsToShow = new ArrayList<>(EnumSet.allOf(Level.class)); 453 levelsToShow.remove(Level.COMPREHENSIVE); 454 levelsToShow.remove(Level.UNDETERMINED); 455 levelsToShow = ImmutableList.copyOf(levelsToShow); 456 List<Level> reversedLevels = new ArrayList<>(levelsToShow); 457 Collections.reverse(reversedLevels); 458 reversedLevels = ImmutableList.copyOf(reversedLevels); 459 460 int localeCount = 0; 461 462 final TablePrinter tablePrinter = 463 new TablePrinter() 464 .addColumn( 465 "Language", 466 "class='source'", 467 CldrUtility.getDoubleLinkMsg(), 468 "class='source'", 469 true) 470 .setBreakSpans(true) 471 .addColumn( 472 "English Name", "class='source'", null, "class='source'", true) 473 .setBreakSpans(true) 474 .addColumn( 475 "Native Name", "class='source'", null, "class='source'", true) 476 .setBreakSpans(true) 477 .addColumn("Script", "class='source'", null, "class='source'", true) 478 .setBreakSpans(true) 479 .addColumn( 480 "Default Region", 481 "class='source'", 482 null, 483 "class='source'", 484 true) 485 .setBreakSpans(true) 486 .addColumn( 487 "№ Locales", 488 "class='source'", 489 null, 490 "class='targetRight'", 491 true) 492 .setBreakSpans(true) 493 .setCellPattern("{0,number}") 494 .addColumn( 495 "Target Level", "class='source'", null, "class='source'", true) 496 .setBreakSpans(true) 497 .addColumn("≟", "class='target'", null, "class='target'", true) 498 .setBreakSpans(true) 499 .setSortPriority(1) 500 .setSortAscending(false) 501 .addColumn( 502 "Computed Level", 503 "class='target'", 504 null, 505 "class='target'", 506 true) 507 .setBreakSpans(true) 508 .setSortPriority(0) 509 .setSortAscending(false) 510 .addColumn("ICU", "class='target'", null, "class='target'", true) 511 .setBreakSpans(true) 512 .addColumn( 513 "Confirmed", 514 "class='target'", 515 null, 516 "class='targetRight' style='color:gray'", 517 true) 518 .setBreakSpans(true) 519 .setCellPattern("{0,number,0.0%}"); 520 521 NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH); 522 tsvPercent.setMaximumFractionDigits(2); 523 524 for (Level level : reversedLevels) { 525 String titleLevel = level.getAbbreviation() + "%"; 526 tablePrinter 527 .addColumn(titleLevel, "class='target'", null, "class='targetRight'", true) 528 .setCellPattern("{0,number,0.0%}") 529 .setBreakSpans(true); 530 531 switch (level) { 532 default: 533 tablePrinter.setSortPriority(2).setSortAscending(false); 534 break; 535 case BASIC: 536 tablePrinter.setSortPriority(3).setSortAscending(false); 537 break; 538 case MODERATE: 539 tablePrinter.setSortPriority(4).setSortAscending(false); 540 break; 541 case MODERN: 542 tablePrinter.setSortPriority(5).setSortAscending(false); 543 break; 544 } 545 } 546 tablePrinter 547 .addColumn("Missing Features", "class='target'", null, "class='target'", true) 548 .setBreakSpans(true); 549 550 long start = System.currentTimeMillis(); 551 LikelySubtags likelySubtags = new LikelySubtags(); 552 553 EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class); 554 targetLevel.put(Level.CORE, 2 / 100d); 555 targetLevel.put(Level.BASIC, 16 / 100d); 556 targetLevel.put(Level.MODERATE, 33 / 100d); 557 targetLevel.put(Level.MODERN, 100 / 100d); 558 559 Multimap<String, String> pathToLocale = TreeMultimap.create(); 560 561 Counter<Level> computedLevels = new Counter<>(); 562 Counter<Level> computedSublocaleLevels = new Counter<>(); 563 564 for (String locale : availableLanguages) { 565 try { 566 if (locale.contains("supplemental") // for old versionsl 567 // || locale.startsWith("sr_Latn") 568 ) { 569 continue; 570 } 571 if (locales != null && !locales.contains(locale)) { 572 String base = CLDRLocale.getInstance(locale).getLanguage(); 573 if (!locales.contains(base)) { 574 continue; 575 } 576 } 577 if (matcher != null && !matcher.reset(locale).matches()) { 578 continue; 579 } 580 if (defaultContents.contains(locale) 581 || LocaleNames.ROOT.equals(locale) 582 || LocaleNames.UND.equals(locale)) { 583 continue; 584 } 585 586 tsv_missing_summary.flush(); 587 tsv_missing.flush(); 588 tsv_missing_basic.flush(); 589 tsv_missing_counts.flush(); 590 591 boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists(); 592 593 String region = ltp.set(locale).getRegion(); 594 if (!region.isEmpty()) continue; // skip regions 595 596 final Level cldrLocaleLevelGoal = 597 SC.getLocaleCoverageLevel(Organization.cldr, locale); 598 final String specialFlag = getSpecialFlag(locale); 599 600 final boolean cldrLevelGoalBasicToModern = 601 Level.CORE_TO_MODERN.contains(cldrLocaleLevelGoal); 602 603 String max = likelySubtags.maximize(locale); 604 final String script = ltp.set(max).getScript(); 605 final String defRegion = ltp.getRegion(); 606 607 final String language = likelySubtags.minimize(locale); 608 609 missingPaths.clear(); 610 unconfirmed.clear(); 611 612 final CLDRFile file = factory.make(locale, true, minimumDraftStatus); 613 614 if (locale.equals("af")) { 615 int debug = 0; 616 } 617 618 Iterable<String> pathSource = new IterableFilter(file.fullIterable()); 619 620 VettingViewer.getStatus( 621 pathSource, 622 file, 623 pathHeaderFactory, 624 foundCounter, 625 unconfirmedCounter, 626 missingCounter, 627 missingPaths, 628 unconfirmed); 629 630 { 631 long found = 0; 632 long unconfirmedc = 0; 633 long missing = 0; 634 Level adjustedGoal = 635 cldrLocaleLevelGoal.compareTo(Level.BASIC) < 0 636 ? Level.BASIC 637 : cldrLocaleLevelGoal; 638 for (Level level : Level.values()) { 639 if (level.compareTo(adjustedGoal) <= 0) { 640 found += foundCounter.get(level); 641 unconfirmedc += unconfirmedCounter.get(level); 642 missing += missingCounter.get(level); 643 } 644 } 645 String goalFlag = cldrLocaleLevelGoal == adjustedGoal ? "" : "*"; 646 tsv_missing_counts.println( 647 specialFlag 648 + locale 649 + "\t" 650 + goalFlag 651 + adjustedGoal 652 + "\t" 653 + found 654 + "\t" 655 + unconfirmedc 656 + "\t" 657 + missing); 658 } 659 660 Collection<String> sublocales = languageToRegion.asMap().get(language); 661 if (sublocales == null) { 662 sublocales = Collections.emptySet(); 663 } 664 sublocales = ImmutableSet.copyOf(sublocales); 665 666 // get the totals 667 668 EnumMap<Level, Integer> totals = new EnumMap<>(Level.class); 669 EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class); 670 Set<CoreItems> specialMissingPaths = EnumSet.noneOf(CoreItems.class); 671 672 StatusCounter starredCounter = new StatusCounter(); 673 674 { 675 Multimap<CoreItems, String> detailedErrors = TreeMultimap.create(); 676 Set<CoreItems> coverage = 677 CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors); 678 for (CoreItems item : coverage) { 679 foundCounter.add(item.desiredLevel, 1); 680 } 681 for (Entry<CoreItems, String> entry : detailedErrors.entries()) { 682 CoreItems coreItem = entry.getKey(); 683 String path = entry.getValue(); 684 specialMissingPaths.add(coreItem); 685 // if goal (eg modern) >= itemLevel, indicate it is missing 686 if (coreItem.desiredLevel == Level.BASIC) { 687 starredCounter.gatherStarred(path, null); 688 } 689 missingCounter.add(coreItem.desiredLevel, 1); 690 } 691 } 692 693 if (cldrLevelGoalBasicToModern) { 694 Level goalLevel = cldrLocaleLevelGoal; 695 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 696 String path = entry.getValue(); 697 String status = entry.getKey().toString(); 698 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 699 if (goalLevel.compareTo(foundLevel) >= 0) { 700 String line = 701 spreadsheetLine( 702 locale, 703 language, 704 script, 705 specialFlag, 706 file.getStringValue(path), 707 goalLevel, 708 foundLevel, 709 status, 710 path, 711 file, 712 pathToLocale); 713 String lineToPrint1 = line; 714 tsv_missing.println(lineToPrint1); 715 } 716 } 717 for (String path : unconfirmed) { 718 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 719 if (goalLevel.compareTo(foundLevel) >= 0) { 720 String line = 721 spreadsheetLine( 722 locale, 723 language, 724 script, 725 specialFlag, 726 file.getStringValue(path), 727 goalLevel, 728 foundLevel, 729 "n/a", 730 path, 731 file, 732 pathToLocale); 733 tsv_missing.println(line); 734 } 735 } 736 } else { 737 Level goalLevel = Level.BASIC; 738 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 739 String path = entry.getValue(); 740 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 741 if (goalLevel.compareTo(foundLevel) >= 0) { 742 starredCounter.gatherStarred(path, null); 743 } 744 } 745 for (String path : unconfirmed) { 746 String fullPath = file.getFullXPath(path); 747 DraftStatus draftStatus = 748 fullPath.contains("unconfirmed") 749 ? DraftStatus.unconfirmed 750 : DraftStatus.provisional; 751 752 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 753 if (goalLevel.compareTo(foundLevel) >= 0) { 754 starredCounter.gatherStarred(path, draftStatus); 755 } 756 } 757 } 758 759 if (!starredCounter.starredPathToData.isEmpty()) { 760 for (Entry<String, StatusData> starred : 761 starredCounter.starredPathToData.entrySet()) { 762 String starredPath = starred.getKey(); 763 StatusData statusData = starred.getValue(); 764 String valueString = 765 statusData.values.stream() 766 .map(x -> Joiner.on(", ").join(x)) 767 .collect(Collectors.joining("; ")); 768 769 tsv_missing_basic.println( 770 specialFlag 771 + locale // 772 + "\t" 773 + statusData.missing // 774 + "\t" 775 + statusData.provisional // 776 + "\t" 777 + statusData.unconfirmed // 778 + "\t" 779 + starredPath.replace("\"*\"", "'*'") 780 + "\t" 781 + valueString 782 // 783 ); 784 } 785 tsv_missing_basic.println( 786 specialFlag 787 + locale // 788 + "\t" 789 + starredCounter.missingTotal // 790 + "\t" 791 + starredCounter.provisionalTotal // 792 + "\t" 793 + starredCounter.unconfirmedTotal // 794 + "\tTotals\t"); 795 tsv_missing_basic.println("\t\t\t\t\t"); // for a proper table in github 796 } 797 798 int sumFound = 0; 799 int sumMissing = 0; 800 int sumUnconfirmed = 0; 801 802 for (Level level : levelsToShow) { 803 long foundCount = foundCounter.get(level); 804 long unconfirmedCount = unconfirmedCounter.get(level); 805 long missingCount = missingCounter.get(level); 806 807 sumFound += foundCount; 808 sumUnconfirmed += unconfirmedCount; 809 sumMissing += missingCount; 810 811 confirmed.put(level, sumFound); 812 totals.put(level, sumFound + sumUnconfirmed + sumMissing); 813 } 814 815 // double modernTotal = totals.get(Level.MODERN); 816 817 // first get the accumulated values 818 EnumMap<Level, Integer> accumTotals = new EnumMap<>(Level.class); 819 EnumMap<Level, Integer> accumConfirmed = new EnumMap<>(Level.class); 820 int currTotals = 0; 821 int currConfirmed = 0; 822 for (Level level : levelsToShow) { 823 currTotals += totals.get(level); 824 currConfirmed += confirmed.get(level); 825 accumConfirmed.put(level, currConfirmed); 826 accumTotals.put(level, currTotals); 827 } 828 829 // print the totals 830 831 Level computed = Level.UNDETERMINED; 832 Map<Level, Double> levelToProportion = new EnumMap<>(Level.class); 833 834 for (Level level : reversedLevels) { 835 int confirmedCoverage = accumConfirmed.get(level); 836 double total = accumTotals.get(level); 837 838 final double proportion = confirmedCoverage / total; 839 levelToProportion.put(level, proportion); 840 841 if (computed == Level.UNDETERMINED) { 842 switch (level) { 843 case MODERN: 844 if (proportion >= MODERN_THRESHOLD) { 845 computed = level; 846 } 847 break; 848 case MODERATE: 849 if (proportion >= MODERATE_THRESHOLD) { 850 computed = level; 851 } 852 break; 853 case BASIC: 854 if (proportion >= BASIC_THRESHOLD) { 855 computed = level; 856 } 857 break; 858 default: 859 break; 860 } 861 } 862 } 863 864 Set<CoreItems> shownMissingPaths = EnumSet.noneOf(CoreItems.class); 865 Level computedWithCore = 866 computed == Level.UNDETERMINED ? Level.BASIC : computed; 867 for (CoreItems item : specialMissingPaths) { 868 if (item.desiredLevel.compareTo(computedWithCore) <= 0) { 869 shownMissingPaths.add(item); 870 } else { 871 int debug = 0; 872 } 873 } 874 computedLevels.add(computed, 1); 875 computedSublocaleLevels.add(computed, sublocales.size()); 876 877 final String coreMissingString = Joiner.on(", ").join(shownMissingPaths); 878 final String visibleLevelComputed = 879 computed == Level.UNDETERMINED ? "" : computed.toString(); 880 final String visibleLevelGoal = 881 cldrLocaleLevelGoal == Level.UNDETERMINED 882 ? "" 883 : specialFlag + cldrLocaleLevelGoal.toString(); 884 final String goalComparedToComputed = 885 computed == cldrLocaleLevelGoal 886 ? " ≡" 887 : cldrLocaleLevelGoal.compareTo(computed) < 0 ? " <" : " >"; 888 889 tablePrinter 890 .addRow() 891 .addCell(language) 892 .addCell(ENGLISH.getName(language, true, CLDRFile.SHORT_ALTS)) 893 .addCell(file.getName(language)) 894 .addCell(script) 895 .addCell(defRegion) 896 .addCell(sublocales.size()) 897 .addCell(visibleLevelGoal) 898 .addCell(goalComparedToComputed) 899 .addCell(visibleLevelComputed) 900 .addCell(getIcuValue(language)) 901 .addCell(sumFound / (double) (sumFound + sumUnconfirmed)); 902 903 // print the totals 904 for (Level level : reversedLevels) { 905 tablePrinter.addCell(levelToProportion.get(level)); 906 } 907 908 tablePrinter.addCell(coreMissingString).finishRow(); 909 910 // now write properties file line 911 912 if (computed != Level.UNDETERMINED) { 913 propertiesCoverage.printlnWithTabs( 914 propertiesCoverageTabCount, 915 locale 916 + " ;\t" 917 + visibleLevelComputed 918 + " ;\t" 919 + ENGLISH.getName(locale)); 920 // TODO decide whether to restore this 921 // Level higher = Level.UNDETERMINED; 922 // switch (computed) { 923 // default: 924 // higher = Level.UNDETERMINED; 925 // break; 926 // case MODERATE: 927 // higher = Level.MODERN; 928 // break; 929 // case BASIC: 930 // higher = Level.MODERATE; 931 // break; 932 // } 933 // double higherProportion = higher == 934 // Level.UNDETERMINED ? 0d : levelToProportion.get(higher); 935 // 936 // if (higherProportion >= THRESHOLD_HIGHER) { 937 // propertiesCoverage.println( 938 // " ;\t" + 939 // tsvPercent.format(higherProportion) + 940 // " ;\t" + higher 941 // ); 942 // } else { 943 // propertiesCoverage.println(" ;\t" + "" + " 944 // ;\t" + ""); 945 // } 946 } 947 localeCount++; 948 } catch (Exception e) { 949 throw new IllegalArgumentException(e); 950 } 951 } 952 String lineToPrint = "\n#EOF"; 953 propertiesCoverage.printlnWithTabs(propertiesCoverageTabCount, lineToPrint); 954 955 pw.println("<h3><a name='main_table' href='#main_table'>Main Table</a></h3>"); 956 pw.println(tablePrinter.toTable()); 957 958 pw.println( 959 "<h3><a name='level_counts' href='#level_counts'>Level Counts</a></h3>\n" 960 + "<table class='subtle'><tr>\n" 961 + "<th style='text-align:left'>" 962 + "Level" 963 + "</th>" 964 + "<th style='text-align:left'>" 965 + "Languages" 966 + "</th>" 967 + "<th style='text-align:left'>" 968 + "Locales" 969 + "</th>" 970 + "</tr>"); 971 long totalCount = 0; 972 long totalLocaleCount = 0; 973 for (Level level : Lists.reverse(Arrays.asList(Level.values()))) { 974 final long count = computedLevels.get(level); 975 final long localesCount = computedSublocaleLevels.get(level); 976 if (count == 0 || level == Level.UNDETERMINED) { 977 continue; 978 } 979 totalCount += count; 980 totalLocaleCount += localesCount; 981 String visibleImputed = 982 level == Level.UNDETERMINED 983 ? "<" + Level.BASIC.toString() 984 : level.toString(); 985 pw.println( 986 "<tr>" 987 + "<th style='text-align:left'>" 988 + visibleImputed 989 + "</th>" 990 + "<td style='text-align:right'>" 991 + count 992 + "</td>" 993 + "<td style='text-align:right'>" 994 + localesCount 995 + "</td>" 996 + "</tr>"); 997 } 998 pw.println( 999 "<tr>" 1000 + "<th style='text-align:left'>" 1001 + "Total" 1002 + "</th>" 1003 + "<td style='text-align:right'>" 1004 + totalCount 1005 + "</td>" 1006 + "<td style='text-align:right'>" 1007 + totalLocaleCount 1008 + "</td>" 1009 + "</tr>\n"); 1010 1011 pw.println( 1012 "<tr>" 1013 + "<th style='text-align:left'>" 1014 + "in dev." 1015 + "</th>" 1016 + "<td style='text-align:right'>" 1017 + computedLevels.get(Level.UNDETERMINED) 1018 + "</td>" 1019 + "<td style='text-align:right'>" 1020 + computedSublocaleLevels.get(Level.UNDETERMINED) 1021 + "</td>" 1022 + "</tr>\n" 1023 + "</table>"); 1024 1025 Multimap<Level, String> levelToLocales = TreeMultimap.create(); 1026 1027 for (Entry<String, Collection<String>> entry : pathToLocale.asMap().entrySet()) { 1028 String path = entry.getKey(); 1029 Collection<String> localeSet = entry.getValue(); 1030 levelToLocales.clear(); 1031 for (String locale : localeSet) { 1032 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 1033 levelToLocales.put(foundLevel, locale); 1034 } 1035 String phString = "n/a\tn/a\tn/a\tn/a"; 1036 try { 1037 PathHeader ph = pathHeaderFactory.fromPath(path); 1038 phString = ph.toString(); 1039 } catch (Exception e) { 1040 } 1041 for (Entry<Level, Collection<String>> entry2 : levelToLocales.asMap().entrySet()) { 1042 Level level = entry2.getKey(); 1043 localeSet = entry2.getValue(); 1044 tsv_missing_summary.println( 1045 level 1046 + "\t" 1047 + localeSet.size() 1048 + "\t" 1049 + Joiner.on(" ") 1050 .join( 1051 localeSet.stream() 1052 .map(x -> x + getSpecialFlag(x)) 1053 .collect(Collectors.toSet())) 1054 + "\t" 1055 + phString); 1056 } 1057 } 1058 tablePrinter.toTsv(tsv_summary); 1059 long end = System.currentTimeMillis(); 1060 System.out.println( 1061 (end - start) 1062 + " millis = " 1063 + ((end - start) / localeCount) 1064 + " millis/locale"); 1065 ShowPlurals.appendBlanksForScrolling(pw); 1066 } 1067 } 1068 linkTsv(String tsvFileName)1069 private static String linkTsv(String tsvFileName) { 1070 return "<a href='" + TSV_BASE + tsvFileName + "' target='cldr-tsv'>" + tsvFileName + "</a>"; 1071 } 1072 linkTsv(String tsvFileName, String anchorText)1073 private static String linkTsv(String tsvFileName, String anchorText) { 1074 return "<a href='" + TSV_BASE + tsvFileName + "' target='cldr-tsv'>" + anchorText + "</a>"; 1075 } 1076 getSpecialFlag(String locale)1077 private static String getSpecialFlag(String locale) { 1078 return SC.getLocaleCoverageLevel(Organization.special, locale) == Level.UNDETERMINED 1079 ? "" 1080 : "‡"; 1081 } 1082 1083 private static class IterableFilter implements Iterable<String> { 1084 private Iterable<String> source; 1085 IterableFilter(Iterable<String> source)1086 IterableFilter(Iterable<String> source) { 1087 this.source = source; 1088 } 1089 1090 /** 1091 * When some paths are defined after submission, we need to change them to COMPREHENSIVE in 1092 * computing the vetting status. 1093 */ 1094 private static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of(); 1095 1096 @Override iterator()1097 public Iterator<String> iterator() { 1098 return new IteratorFilter(source.iterator()); 1099 } 1100 1101 private static class IteratorFilter implements Iterator<String> { 1102 Iterator<String> source; 1103 String peek; 1104 IteratorFilter(Iterator<String> source)1105 public IteratorFilter(Iterator<String> source) { 1106 this.source = source; 1107 fillPeek(); 1108 } 1109 1110 @Override hasNext()1111 public boolean hasNext() { 1112 return peek != null; 1113 } 1114 1115 @Override next()1116 public String next() { 1117 String result = peek; 1118 fillPeek(); 1119 return result; 1120 } 1121 fillPeek()1122 private void fillPeek() { 1123 peek = null; 1124 while (source.hasNext()) { 1125 peek = source.next(); 1126 // if it is ok to assess, then break 1127 if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek) 1128 && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) { 1129 break; 1130 } 1131 peek = null; 1132 } 1133 } 1134 } 1135 } 1136 1137 private static final CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO); 1138 spreadsheetLine( String locale, String language, String script, String specialFlag, String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, CLDRFile resolvedFile, Multimap<String, String> pathToLocale)1139 private static String spreadsheetLine( 1140 String locale, 1141 String language, 1142 String script, 1143 String specialFlag, 1144 String nativeValue, 1145 Level cldrLocaleLevelGoal, 1146 Level itemLevel, 1147 String status, 1148 String path, 1149 CLDRFile resolvedFile, 1150 Multimap<String, String> pathToLocale) { 1151 if (pathToLocale != null) { 1152 pathToLocale.put(path, locale); 1153 } 1154 // String stLink = "n/a"; 1155 // String englishValue = "n/a"; 1156 // StatusAction action = null; 1157 // String icuValue = getIcuValue(locale); 1158 1159 SurveyToolStatus surveyToolStatus = null; 1160 String bailey = resolvedFile == null ? "" : resolvedFile.getStringValue(path); 1161 1162 String phString = "na\tn/a\tn/a\t" + path; 1163 try { 1164 PathHeader ph = pathHeaderFactory.fromPath(path); 1165 phString = ph.toString(); 1166 // stLink = URLS.forXpath(locale, path); 1167 // englishValue = ENGLISH.getStringValue(path); 1168 // action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo, 1169 // InputMethod.DIRECT, ph, dummyUserInfo); 1170 } catch (Exception e) { 1171 1172 } 1173 1174 String line = 1175 specialFlag 1176 + language 1177 + "\t" 1178 + ENGLISH.getName(language) 1179 + "\t" 1180 + ENGLISH.getName("script", script) 1181 + "\t" 1182 + cldrLocaleLevelGoal 1183 + "\t" 1184 + itemLevel 1185 + "\t" 1186 + (surveyToolStatus == null ? "n/a" : surveyToolStatus.toString()) 1187 + "\t" 1188 + bailey 1189 + "\t" 1190 + phString 1191 + "\t" 1192 + PathHeader.getUrlForLocalePath(locale, path); 1193 return line; 1194 } 1195 getIcuValue(String locale)1196 private static String getIcuValue(String locale) { 1197 return ICU_Locales.contains(new ULocale(locale)) ? "ICU" : ""; 1198 } 1199 1200 private static final Set<ULocale> ICU_Locales = 1201 ImmutableSet.copyOf(ULocale.getAvailableLocales()); 1202 } 1203