1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.ArrayList; 7 import java.util.Arrays; 8 import java.util.Collection; 9 import java.util.Collections; 10 import java.util.EnumMap; 11 import java.util.EnumSet; 12 import java.util.HashMap; 13 import java.util.HashSet; 14 import java.util.Iterator; 15 import java.util.LinkedHashMap; 16 import java.util.LinkedHashSet; 17 import java.util.List; 18 import java.util.Locale; 19 import java.util.Map; 20 import java.util.Map.Entry; 21 import java.util.Set; 22 import java.util.TreeMap; 23 import java.util.TreeSet; 24 import java.util.regex.Matcher; 25 import java.util.stream.Collectors; 26 27 import org.unicode.cldr.draft.FileUtilities; 28 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 29 import org.unicode.cldr.tool.Option.Options; 30 import org.unicode.cldr.util.CLDRConfig; 31 import org.unicode.cldr.util.CLDRFile; 32 import org.unicode.cldr.util.CLDRFile.DraftStatus; 33 import org.unicode.cldr.util.CLDRFile.Status; 34 import org.unicode.cldr.util.CLDRInfo.CandidateInfo; 35 import org.unicode.cldr.util.CLDRInfo.PathValueInfo; 36 import org.unicode.cldr.util.CLDRInfo.UserInfo; 37 import org.unicode.cldr.util.CLDRLocale; 38 import org.unicode.cldr.util.CLDRPaths; 39 import org.unicode.cldr.util.CldrUtility; 40 import org.unicode.cldr.util.CoreCoverageInfo; 41 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems; 42 import org.unicode.cldr.util.Counter; 43 import org.unicode.cldr.util.Counter2; 44 import org.unicode.cldr.util.CoverageInfo; 45 import org.unicode.cldr.util.DtdType; 46 import org.unicode.cldr.util.LanguageTagCanonicalizer; 47 import org.unicode.cldr.util.LanguageTagParser; 48 import org.unicode.cldr.util.Level; 49 import org.unicode.cldr.util.Organization; 50 import org.unicode.cldr.util.PathHeader; 51 import org.unicode.cldr.util.PathHeader.Factory; 52 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 53 import org.unicode.cldr.util.PathStarrer; 54 import org.unicode.cldr.util.PatternCache; 55 import org.unicode.cldr.util.RegexLookup; 56 import org.unicode.cldr.util.RegexLookup.LookupType; 57 import org.unicode.cldr.util.SimpleFactory; 58 import org.unicode.cldr.util.StandardCodes; 59 import org.unicode.cldr.util.SupplementalDataInfo; 60 import org.unicode.cldr.util.VettingViewer; 61 import org.unicode.cldr.util.VettingViewer.MissingStatus; 62 import org.unicode.cldr.util.VoteResolver.VoterInfo; 63 64 import com.google.common.base.Joiner; 65 import com.google.common.collect.ImmutableList; 66 import com.google.common.collect.ImmutableMultimap; 67 import com.google.common.collect.ImmutableSet; 68 import com.google.common.collect.Lists; 69 import com.google.common.collect.Multimap; 70 import com.google.common.collect.Ordering; 71 import com.google.common.collect.TreeMultimap; 72 import com.ibm.icu.impl.Relation; 73 import com.ibm.icu.text.NumberFormat; 74 import com.ibm.icu.util.ULocale; 75 import com.ibm.icu.util.VersionInfo; 76 77 public class ShowLocaleCoverage { 78 // thresholds for measuring Level attainment 79 private static final double BASIC_THRESHOLD = 1; 80 private static final double MODERATE_THRESHOLD = 0.995; 81 private static final double MODERN_THRESHOLD = 0.995; 82 83 // used to show higher level in properties file 84 private static final double THRESHOLD_HIGHER = 0.90d; 85 86 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 87 private static final String TSV_MISSING_SUMMARY_HEADER = 88 "#Path Level" 89 + "\t#Locales" 90 + "\tLocales" 91 + "\tSection" 92 + "\tPage" 93 + "\tHeader" 94 + "\tCode" 95 ; 96 97 private static final String TSV_MISSING_HEADER = 98 "#LCode" 99 + "\tEnglish Name" 100 + "\tScript" 101 + "\tLocale Level" 102 + "\tPath Level" 103 + "\tSTStatus" 104 + "\tBailey" 105 + "\tSection" 106 + "\tPage" 107 + "\tHeader" 108 + "\tCode" 109 + "\tST Link" 110 ; 111 112 private static final String PROPERTIES_HEADER = 113 // Extra tabs are for github table formatting 114 "#\tcoverageLevels.txt\t\n" 115 + "#\tCopyright © 2022 Unicode, Inc.\n" 116 + "#\tCLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)\n" 117 + "#\tFor terms of use, see http://www.unicode.org/copyright.html\n" 118 + "#\t\n" 119 + "#\tProvides the Coverage Level of locales at Basic or Above.\n" 120 + "#\tFor more info,see the Locale Coverage Chart for this version.\n" 121 + "#\tGenerated by ShowLocaleCoverage.\n" 122 + "#\t\n" 123 + "#Locale" 124 + " ;\tLevel" 125 // + " ;\t% of Higher" 126 // + " ;\tHigher Level" 127 ; 128 129 private static final String TSV_MISSING_BASIC_HEADER = "#Locale\tProv.\tUnconf.\tMissing\tPath*"; 130 private static final String TSV_MISSING_COUNTS_HEADER = "#Locale\tTargetLevel\t№ Found\t№ Unconfirmed\t№ Missing"; 131 132 private static final boolean DEBUG = true; 133 private static final char DEBUG_FILTER = 0; // use letter to only load locales starting with that letter 134 135 private static final String LATEST = ToolConstants.CHART_VERSION; 136 public static CLDRConfig testInfo = ToolConfig.getToolInstance(); 137 private static final StandardCodes SC = StandardCodes.make(); 138 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo.getSupplementalDataInfo(); 139 private static final StandardCodes STANDARD_CODES = SC; 140 141 static org.unicode.cldr.util.Factory factory = testInfo.getCommonAndSeedAndMainAndAnnotationsFactory(); 142 private static final CLDRFile ENGLISH = factory.make("en", true); 143 144 // added info using pattern in VettingViewer. 145 146 static final RegexLookup<Boolean> HACK = RegexLookup.<Boolean> of(LookupType.STANDARD, RegexLookup.RegexFinderTransformPath) 147 .add("//ldml/localeDisplayNames/keys/key[@type=\"(d0|em|fw|i0|k0|lw|m0|rg|s0|ss|t0|x0)\"]", true) 148 .add("//ldml/localeDisplayNames/types/type[@key=\"(em|fw|kr|lw|ss)\"].*", true) 149 .add("//ldml/localeDisplayNames/languages/language[@type=\".*_.*\"]", true) 150 .add("//ldml/localeDisplayNames/languages/language[@type=\".*\"][@alt=\".*\"]", true) 151 .add("//ldml/localeDisplayNames/territories/territory[@type=\".*\"][@alt=\".*\"]", true) 152 .add("//ldml/localeDisplayNames/territories/territory[@type=\"EZ\"]", true); 153 154 //private static final String OUT_DIRECTORY = CLDRPaths.GEN_DIRECTORY + "/coverage/"; // CldrUtility.MAIN_DIRECTORY; 155 156 final static Options myOptions = new Options(); 157 158 enum MyOptions { 159 filter(".+", ".*", "Filter the information based on id, using a regex argument."), 160 // draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft status."), 161 chart(null, null, "chart only"), 162 growth("true", "true", "Compute growth data"), 163 organization(".+", null, "Only locales for organization"), 164 version(".+", 165 LATEST, "To get different versions"), 166 rawData(null, null, "Output the raw data from all coverage levels"), 167 targetDir(".*", 168 CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."), 169 directories("(.*:)?[a-z]+(,[a-z]+)*", "common", 170 "Space-delimited list of main source directories: common,seed,exemplar.\n" + 171 "Optional, <baseDir>:common,seed"),; 172 173 // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target directory."), 174 // layouts(null, null, "Only create html files for keyboard layouts"), 175 // repertoire(null, null, "Only create html files for repertoire"), ; 176 // boilerplate 177 final Option option; 178 MyOptions(String argumentPattern, String defaultArgument, String helpText)179 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 180 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 181 } 182 } 183 184 static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY = new RegexLookup<Boolean>() 185 .add("\\[@alt=\"accounting\"]", true) 186 .add("\\[@alt=\"variant\"]", true) 187 .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true) 188 .add("^//ldml/localeDisplayNames/languages/language.*_", true) 189 .add("^//ldml/numbers/currencies/currency.*/symbol", true) 190 .add("^//ldml/characters/exemplarCharacters", true); 191 192 static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed; 193 static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH); 194 195 static boolean RAW_DATA = true; 196 private static Set<String> COMMON_LOCALES; 197 198 199 static class StatusData { 200 int missing; 201 int provisional; 202 int unconfirmed; 203 } 204 static class StatusCounter { 205 PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); 206 Map<String, StatusData> starredPathToData = new TreeMap<>(); 207 int missingTotal; 208 int provisionalTotal; 209 int unconfirmedTotal; 210 gatherStarred(String path, DraftStatus draftStatus)211 public void gatherStarred(String path, DraftStatus draftStatus) { 212 String starredPath = pathStarrer.set(path); 213 StatusData statusData = starredPathToData.get(starredPath); 214 if (statusData == null) { 215 starredPathToData.put(starredPath, statusData = new StatusData()); 216 } 217 if (draftStatus == null) { 218 ++statusData.missing; 219 ++missingTotal; 220 } else switch(draftStatus) { 221 case unconfirmed: 222 ++statusData.unconfirmed; 223 ++unconfirmedTotal; 224 break; 225 case provisional: 226 ++statusData.provisional; 227 ++provisionalTotal; 228 break; 229 default: 230 break; 231 } 232 } 233 } 234 main(String[] args)235 public static void main(String[] args) throws IOException { 236 myOptions.parse(MyOptions.filter, args, true); 237 238 Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher(""); 239 240 if (MyOptions.chart.option.doesOccur()) { 241 showCoverage(null, matcher); 242 return; 243 } 244 245 246 if (MyOptions.growth.option.doesOccur()) { 247 try (PrintWriter out = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-growth.tsv")) { 248 doGrowth(matcher, out); 249 return; 250 } 251 } 252 253 Set<String> locales = null; 254 String organization = MyOptions.organization.option.getValue(); 255 boolean useOrgLevel = MyOptions.organization.option.doesOccur(); 256 if (useOrgLevel) { 257 locales = STANDARD_CODES.getLocaleCoverageLocales(organization); 258 } 259 260 if (MyOptions.version.option.doesOccur()) { 261 String number = MyOptions.version.option.getValue().trim(); 262 if (!number.contains(".")) { 263 number += ".0"; 264 } 265 factory = org.unicode.cldr.util.Factory.make( 266 CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*"); 267 } else { 268 if (MyOptions.directories.option.doesOccur()) { 269 String directories = MyOptions.directories.option.getValue().trim(); 270 CLDRConfig cldrConfig = CONFIG; 271 String base = null; 272 int colonPos = directories.indexOf(':'); 273 if (colonPos >= 0) { 274 base = directories.substring(0, colonPos).trim(); 275 directories = directories.substring(colonPos + 1).trim(); 276 } else { 277 base = cldrConfig.getCldrBaseDirectory().toString(); 278 } 279 String[] items = directories.split(",\\s*"); 280 File[] fullDirectories = new File[items.length]; 281 int i = 0; 282 for (String item : items) { 283 fullDirectories[i++] = new File(base + "/" + item + "/main"); 284 } 285 factory = SimpleFactory.make(fullDirectories, ".*"); 286 COMMON_LOCALES = SimpleFactory.make(base + "/" + "common" + "/main", ".*").getAvailableLanguages(); 287 } 288 } 289 fixCommonLocales(); 290 291 RAW_DATA = MyOptions.rawData.option.doesOccur(); 292 293 //showEnglish(); 294 295 showCoverage(null, matcher, locales, useOrgLevel); 296 } 297 fixCommonLocales()298 public static void fixCommonLocales() { 299 if (COMMON_LOCALES == null) { 300 COMMON_LOCALES = factory.getAvailableLanguages(); 301 } 302 } 303 doGrowth(Matcher matcher, PrintWriter out)304 private static void doGrowth(Matcher matcher, PrintWriter out) { 305 TreeMap<String, List<Double>> growthData = new TreeMap<>(Ordering.natural().reverse()); // sort by version, descending 306 // if (DEBUG) { 307 // for (String dir : new File(CLDRPaths.ARCHIVE_DIRECTORY).list()) { 308 // if (!dir.startsWith("cldr")) { 309 // continue; 310 // } 311 // String version = getNormalizedVersion(dir); 312 // if (version == null) { 313 // continue; 314 // } 315 // org.unicode.cldr.util.Factory newFactory = org.unicode.cldr.util.Factory.make( 316 // CLDRPaths.ARCHIVE_DIRECTORY + "/" + dir + "/common/main/", ".*"); 317 // System.out.println("Reading: " + version); 318 // Map<String, FoundAndTotal> currentData = addGrowth(newFactory, matcher); 319 // System.out.println("Read: " + version + "\t" + currentData); 320 // break; 321 // } 322 // } 323 Map<String, FoundAndTotal> latestData = null; 324 for (ReleaseInfo versionNormalizedVersionAndYear : versionToYear) { 325 VersionInfo version = versionNormalizedVersionAndYear.version; 326 int year = versionNormalizedVersionAndYear.year; 327 String dir = ToolConstants.getBaseDirectory(version.getVersionString(2, 3)); 328 Map<String, FoundAndTotal> currentData = addGrowth(factory, dir, matcher, false); 329 long found = 0; 330 long total = 0; 331 for (Entry<String, FoundAndTotal> entry : currentData.entrySet()) { 332 found += entry.getValue().found; 333 total += entry.getValue().total; 334 } 335 System.out.println("year\t" + year 336 + "\tversion\t" + version 337 + "\tlocales\t" + currentData.size() 338 + "\tfound\t" + found 339 + "\ttotal\t" + total 340 + "\tdetails\t" + currentData 341 ); 342 out.flush(); 343 if (latestData == null) { 344 latestData = currentData; 345 } 346 Counter2<String> completionData = getCompletion(latestData, currentData); 347 addCompletionList(year+"", completionData, growthData); 348 if (DEBUG) System.out.println(currentData); 349 } 350 boolean first = true; 351 for (Entry<String, List<Double>> entry : growthData.entrySet()) { 352 if (first) { 353 for (int i = 0; i < entry.getValue().size(); ++i) { 354 out.print("\t" + i); 355 } 356 out.println(); 357 first = false; 358 } 359 out.println(entry.getKey() + "\t" + Joiner.on("\t").join(entry.getValue())); 360 } 361 } 362 363 static final class ReleaseInfo { ReleaseInfo(VersionInfo versionInfo, int year)364 public ReleaseInfo(VersionInfo versionInfo, int year) { 365 this.version = versionInfo; 366 this.year = year; 367 } 368 VersionInfo version; 369 int year; 370 } 371 372 // TODO merge this into ToolConstants, and have the version expressed as VersionInfo. 373 static final List<ReleaseInfo> versionToYear; 374 static { 375 Object[][] mapping = { 376 { VersionInfo.getInstance(42), 2022 }, 377 { VersionInfo.getInstance(40), 2021 }, 378 { VersionInfo.getInstance(38), 2020 }, 379 { VersionInfo.getInstance(36), 2019 }, 380 { VersionInfo.getInstance(34), 2018 }, 381 { VersionInfo.getInstance(32), 2017 }, 382 { VersionInfo.getInstance(30), 2016 }, 383 { VersionInfo.getInstance(28), 2015 }, 384 { VersionInfo.getInstance(26), 2014 }, 385 { VersionInfo.getInstance(24), 2013 }, 386 { VersionInfo.getInstance(22,1), 2012 }, 387 { VersionInfo.getInstance(2,0,1), 2011 }, 388 { VersionInfo.getInstance(1,9,1), 2010 }, 389 { VersionInfo.getInstance(1,7,2), 2009 }, 390 { VersionInfo.getInstance(1,6,1), 2008 }, 391 { VersionInfo.getInstance(1,5,1), 2007 }, 392 { VersionInfo.getInstance(1,4,1), 2006 }, 393 { VersionInfo.getInstance(1,3), 2005 }, 394 { VersionInfo.getInstance(1,2), 2004 }, 395 { VersionInfo.getInstance(1,1,1), 2003 }, 396 }; 397 List<ReleaseInfo> _versionToYear = new ArrayList<>(); 398 for (Object[] row : mapping) { _versionToYear.add(new ReleaseInfo((VersionInfo)row[0], (int)row[1]))399 _versionToYear.add(new ReleaseInfo((VersionInfo)row[0], (int)row[1])); 400 } 401 versionToYear = ImmutableList.copyOf(_versionToYear); 402 } 403 404 // public static String getNormalizedVersion(String dir) { 405 // String rawVersion = dir.substring(dir.indexOf('-') + 1); 406 // int firstDot = rawVersion.indexOf('.'); 407 // int secondDot = rawVersion.indexOf('.', firstDot + 1); 408 // if (secondDot > 0) { 409 // rawVersion = rawVersion.substring(0, firstDot) + rawVersion.substring(firstDot + 1, secondDot); 410 // } else { 411 // rawVersion = rawVersion.substring(0, firstDot); 412 // } 413 // String result = getYearFromVersion(rawVersion, true); 414 // return result == null ? null : result.toString(); 415 // } 416 417 // private static String getYearFromVersion(String version, boolean allowNull) { 418 // String result = versionToYear.get(version); 419 // if (!allowNull && result == null) { 420 // throw new IllegalArgumentException("No year for version: " + version); 421 // } 422 // return result; 423 // } 424 // 425 // private static String getVersionFromYear(String year, boolean allowNull) { 426 // String result = versionToYear.inverse().get(year); 427 // if (!allowNull && result == null) { 428 // throw new IllegalArgumentException("No version for year: " + year); 429 // } 430 // return result; 431 // } 432 addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData)433 public static void addCompletionList(String version, Counter2<String> completionData, TreeMap<String, List<Double>> growthData) { 434 List<Double> x = new ArrayList<>(); 435 for (String key : completionData.getKeysetSortedByCount(false)) { 436 x.add(completionData.getCount(key)); 437 } 438 growthData.put(version, x); 439 System.out.println(version + "\t" + x.size()); 440 } 441 getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData)442 public static Counter2<String> getCompletion(Map<String, FoundAndTotal> latestData, Map<String, FoundAndTotal> currentData) { 443 Counter2<String> completionData = new Counter2<>(); 444 for (Entry<String, FoundAndTotal> entry : latestData.entrySet()) { 445 final String locale = entry.getKey(); 446 final FoundAndTotal currentRecord = currentData.get(locale); 447 if (currentRecord == null) { 448 continue; 449 } 450 double total = entry.getValue().total; 451 if (total == 0) { 452 continue; 453 } 454 double completion = currentRecord.found / total; 455 completionData.add(locale, completion); 456 } 457 return completionData; 458 } 459 460 static class FoundAndTotal { 461 final int found; 462 final int total; 463 464 @SafeVarargs FoundAndTotal(Counter<Level>.... counters)465 public FoundAndTotal(Counter<Level>... counters) { 466 final int[] count = { 0, 0, 0 }; 467 for (Level level : Level.values()) { 468 if (level == Level.COMPREHENSIVE) { 469 continue; 470 } 471 int i = 0; 472 for (Counter<Level> counter : counters) { 473 count[i++] += counter.get(level); 474 } 475 } 476 found = count[0]; 477 total = found + count[1] + count[2]; 478 } 479 480 @Override toString()481 public String toString() { 482 return found + "/" + total; 483 } 484 } 485 addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing)486 private static Map<String, FoundAndTotal> addGrowth(org.unicode.cldr.util.Factory latestFactory, String dir, Matcher matcher, boolean showMissing) { 487 final File mainDir = new File(dir + "/common/main/"); 488 final File annotationDir = new File(dir + "/common/annotations/"); 489 File[] paths = annotationDir.exists() ? new File[] {mainDir, annotationDir} : new File[] {mainDir}; 490 org.unicode.cldr.util.Factory newFactory; 491 try { 492 newFactory = SimpleFactory.make(paths, ".*"); 493 } catch (RuntimeException e1) { 494 throw e1; 495 } 496 Map<String, FoundAndTotal> data = new HashMap<>(); 497 char c = 0; 498 Set<String> latestAvailable = newFactory.getAvailableLanguages(); 499 for (String locale : newFactory.getAvailableLanguages()) { 500 if (!matcher.reset(locale).matches()) { 501 continue; 502 } 503 if (!latestAvailable.contains(locale)) { 504 continue; 505 } 506 if (SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales().contains(locale) 507 || locale.equals("root") 508 || locale.equals("und") 509 || locale.equals("supplementalData")) { 510 continue; 511 } 512 char nc = locale.charAt(0); 513 if (nc != c) { 514 System.out.println("\t" + locale); 515 c = nc; 516 } 517 if (DEBUG_FILTER != 0 && DEBUG_FILTER != nc) { 518 continue; 519 } 520 CLDRFile latestFile = null; 521 try { 522 latestFile = latestFactory.make(locale, true); 523 } catch (Exception e2) { 524 System.out.println("Can't make latest CLDRFile for: " + locale + "\tpast: " + mainDir + "\tlatest: " + Arrays.asList(latestFactory.getSourceDirectories())); 525 continue; 526 } 527 CLDRFile file = null; 528 try { 529 file = newFactory.make(locale, true); 530 } catch (Exception e2) { 531 System.out.println("Can't make CLDRFile for: " + locale + "\tpast: " + mainDir); 532 continue; 533 } 534 // HACK check bogus 535 // Collection<String> extra = file.getExtraPaths(); 536 // 537 // final Iterable<String> fullIterable = file.fullIterable(); 538 // for (String path : fullIterable) { 539 // if (path.contains("\"one[@")) { 540 // boolean inside = extra.contains(path); 541 // Status status = new Status(); 542 // String loc = file.getSourceLocaleID(path, status ); 543 // int debug = 0; 544 // } 545 // } 546 // END HACK 547 Counter<Level> foundCounter = new Counter<>(); 548 Counter<Level> unconfirmedCounter = new Counter<>(); 549 Counter<Level> missingCounter = new Counter<>(); 550 Set<String> unconfirmedPaths = null; 551 Relation<MissingStatus, String> missingPaths = null; 552 unconfirmedPaths = new LinkedHashSet<>(); 553 missingPaths = Relation.of(new LinkedHashMap<MissingStatus, Set<String>>(), LinkedHashSet.class); 554 VettingViewer.getStatus(latestFile.fullIterable(), file, 555 pathHeaderFactory, foundCounter, unconfirmedCounter, 556 missingCounter, missingPaths, unconfirmedPaths); 557 558 // HACK 559 Set<Entry<MissingStatus, String>> missingRemovals = new HashSet<>(); 560 for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) { 561 if (e.getKey() == MissingStatus.ABSENT) { 562 final String path = e.getValue(); 563 if (HACK.get(path) != null) { 564 missingRemovals.add(e); 565 missingCounter.add(Level.MODERN, -1); 566 foundCounter.add(Level.MODERN, 1); 567 } else { 568 Status status = new Status(); 569 String loc = file.getSourceLocaleID(path, status); 570 int debug = 0; 571 } 572 } 573 } 574 for (Entry<MissingStatus, String> e : missingRemovals) { 575 missingPaths.remove(e.getKey(), e.getValue()); 576 } 577 // END HACK 578 579 if (showMissing) { 580 int count = 0; 581 for (String s : unconfirmedPaths) { 582 System.out.println(++count + "\t" + locale + "\tunconfirmed\t" + s); 583 } 584 for (Entry<MissingStatus, String> e : missingPaths.keyValueSet()) { 585 String path = e.getValue(); 586 Status status = new Status(); 587 String loc = file.getSourceLocaleID(path, status); 588 int debug = 0; 589 590 System.out.println(++count + "\t" + locale + "\t" + CldrUtility.toString(e)); 591 } 592 int debug = 0; 593 } 594 595 data.put(locale, new FoundAndTotal(foundCounter, unconfirmedCounter, missingCounter)); 596 } 597 return Collections.unmodifiableMap(data); 598 } 599 showCoverage(Anchors anchors, Matcher matcher)600 public static void showCoverage(Anchors anchors, Matcher matcher) throws IOException { 601 showCoverage(anchors, matcher, null, false); 602 } 603 showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)604 public static void showCoverage(Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel) throws IOException { 605 final String title = "Locale Coverage"; 606 try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors)); 607 PrintWriter tsv_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv"); 608 PrintWriter tsv_missing = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv"); 609 PrintWriter tsv_missing_summary = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-summary.tsv"); 610 PrintWriter tsv_missing_basic = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-basic.tsv"); 611 PrintWriter tsv_missing_counts = FileUtilities.openUTF8Writer(CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-counts.tsv"); 612 PrintWriter propertiesCoverage = FileUtilities.openUTF8Writer(CLDRPaths.COMMON_DIRECTORY + "properties/", "coverageLevels.txt"); 613 ){ 614 tsv_missing_summary.println(TSV_MISSING_SUMMARY_HEADER); 615 tsv_missing.println(TSV_MISSING_HEADER); 616 tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER); 617 tsv_missing_counts.println(TSV_MISSING_COUNTS_HEADER); 618 619 propertiesCoverage.println(PROPERTIES_HEADER); 620 621 Set<String> checkModernLocales = STANDARD_CODES.getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN)); 622 Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages()); 623 availableLanguages.addAll(checkModernLocales); 624 625 Multimap<String, String> languageToRegion = TreeMultimap.create(); 626 LanguageTagParser ltp = new LanguageTagParser(); 627 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true); 628 for (String locale : factory.getAvailable()) { 629 String country = ltp.set(locale).getRegion(); 630 if (!country.isEmpty()) { 631 languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country); 632 } 633 } 634 languageToRegion = ImmutableMultimap.copyOf(languageToRegion); 635 636 fixCommonLocales(); 637 638 System.out.println(Joiner.on("\n").join(languageToRegion.asMap().entrySet())); 639 640 System.out.println("# Checking: " + availableLanguages); 641 642 NumberFormat percentFormat = NumberFormat.getPercentInstance(Locale.ENGLISH); 643 percentFormat.setMaximumFractionDigits(1); 644 645 pw.println("<p style='text-align: left'>This chart shows the coverage levels in this release. " 646 + "Totals are listed after the main chart.</p>\n" 647 + "<blockquote><ul>\n" 648 + "<li><a href='#main_table'>Main Table</a></li>\n" 649 + "<li><a href='#level_counts'>Level Counts</a></li>\n" 650 + "</ul></blockquote>\n" 651 + "<h3>Column Key</h3>\n" 652 + "<table class='subtle' style='margin-left:3em; margin-right:3em'>\n" 653 + "<tr><th>Direct.</th><td>The CLDR source directory</td></tr>\n" 654 + "<tr><th>Default Region</th><td>The default region for locale code, based on likely subtags</td></tr>\n" 655 + "<tr><th>№ Locales</th><td>Note that the coverage of regional locales inherits from their parents.</td></tr>\n" 656 + "<tr><th>Target Level</th><td>The default target Coverage Level in CLDR. " 657 + "Particular organizations may have different target levels. " 658 + "Languages with high levels of coverage are marked with ‡, even though they are not tracked by the technical committee.</td></tr>\n" 659 + "<tr><th>≟</th><td>Indicates whether the Computed Level equals the CLDR Target or not.</td></tr>\n" 660 + "<tr><th>Computed Level</th><td>Computed from the percentage values, " 661 + "taking the first level that meets a threshold (currently " 662 + percentFormat.format(MODERN_THRESHOLD) 663 + ", ⓜ " 664 + percentFormat.format(MODERATE_THRESHOLD) 665 + ", ⓑ " 666 + percentFormat.format(BASIC_THRESHOLD) 667 + ").</td></tr>\n" 668 + "<tr><th>ICU</th><td>Indicates whether included in the current version of ICU</td></tr>\n" 669 + "<tr><th>Confirmed</th><td>Confirmed items as a percentage of all supplied items. " 670 + "If low, the coverage can be improved by getting multiple organizations to confirm.</td></tr>\n" 671 + "<tr><th>%, ⓜ%, ⓑ%, ⓒ%</th><td>Coverage at Levels: = Modern, ⓜ = Moderate, ⓑ = Basic, ⓒ = Core. " 672 + "The percentage of items at that level and below is computed from <i>confirmed_items/total_items</i>. " 673 + "A high-level summary of the meaning of the coverage values is at " 674 + "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. " 675 + "The Core values are described on <a target='_blank' href='https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales'>Core Data</a>. " 676 + "</td></tr>\n" 677 + "<tr><th>Missing Features</th><td>These are not single items, but rather specific features, such as plural rules or unit grammar info. " 678 + "They are listed if missing at the computed level.<br>" 679 + "Example: <i>ⓜ collation</i> means this feature should be supported at a Moderate level.<br>" 680 + "<i>Except for Core, these are not accounted for in the percent values.</i></td></tr>\n" 681 + "<tr><th><a href='https://github.com/unicode-org/cldr-staging/tree/main/docs/charts/42/tsv'>TSV Files</a>:</th><td>\n" 682 + "<ul><li>locale-coverage.tsv — A version of this file, suitable for loading into a spreadsheet.</li>\n" 683 + "<li>locale-missing.tsv — Missing items for the CLDR target locales.</li>\n" 684 + "<li>locale-missing-summary.tsv — Summary of missing items for the CLDR target locales, by Section/Page/Header.</li>\n" 685 + "<li>locale-missing-basic.tsv — Missing items that keep locales from reaching the Basic level.</li></td></tr>\n" 686 + "<li>locale-missing-count.tsv — Counts of items per locale that are found, unconfirmed, or missing, at the target level. " 687 + "(Or at *basic, if there is no target level.)</li></td></tr>\n" 688 + "</table>\n" 689 ); 690 691 Relation<MissingStatus, String> missingPaths = Relation.of(new EnumMap<MissingStatus, Set<String>>( 692 MissingStatus.class), TreeSet.class, CLDRFile.getComparator(DtdType.ldml)); 693 Set<String> unconfirmed = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml)); 694 695 Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales(); 696 697 Counter<Level> foundCounter = new Counter<>(); 698 Counter<Level> unconfirmedCounter = new Counter<>(); 699 Counter<Level> missingCounter = new Counter<>(); 700 701 List<Level> levelsToShow = new ArrayList<>(EnumSet.allOf(Level.class)); 702 levelsToShow.remove(Level.COMPREHENSIVE); 703 levelsToShow.remove(Level.UNDETERMINED); 704 levelsToShow = ImmutableList.copyOf(levelsToShow); 705 List<Level> reversedLevels = new ArrayList<>(levelsToShow); 706 Collections.reverse(reversedLevels); 707 reversedLevels = ImmutableList.copyOf(reversedLevels); 708 709 int localeCount = 0; 710 711 final TablePrinter tablePrinter = new TablePrinter() 712 .addColumn("Direct.", "class='source'", null, "class='source'", true) 713 .setBreakSpans(true).setSpanRows(false) 714 .addColumn("Language", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) 715 .setBreakSpans(true) 716 .addColumn("English Name", "class='source'", null, "class='source'", true) 717 .setBreakSpans(true) 718 .addColumn("Native Name", "class='source'", null, "class='source'", true) 719 .setBreakSpans(true) 720 .addColumn("Script", "class='source'", null, "class='source'", true) 721 .setBreakSpans(true) 722 .addColumn("Default Region", "class='source'", null, "class='source'", true) 723 .setBreakSpans(true) 724 .addColumn("№ Locales", "class='source'", null, "class='targetRight'", true) 725 .setBreakSpans(true).setCellPattern("{0,number}") 726 .addColumn("Target Level", "class='source'", null, "class='source'", true) 727 .setBreakSpans(true) 728 .addColumn("≟", "class='target'", null, "class='target'", true) 729 .setBreakSpans(true).setSortPriority(1).setSortAscending(false) 730 .addColumn("Computed Level", "class='target'", null, "class='target'", true) 731 .setBreakSpans(true).setSortPriority(0).setSortAscending(false) 732 .addColumn("ICU", "class='target'", null, "class='target'", true) 733 .setBreakSpans(true) 734 .addColumn("Confirmed", "class='target'", null, "class='targetRight' style='color:gray'", true) 735 .setBreakSpans(true).setCellPattern("{0,number,0.0%}") 736 ; 737 738 NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH); 739 tsvPercent.setMaximumFractionDigits(2); 740 741 for (Level level : reversedLevels) { 742 String titleLevel = level.getAbbreviation() + "%"; 743 tablePrinter.addColumn(titleLevel, "class='target'", null, "class='targetRight'", true) 744 .setCellPattern("{0,number,0.0%}") 745 .setBreakSpans(true); 746 747 switch(level) { 748 default: 749 tablePrinter.setSortPriority(2).setSortAscending(false); 750 break; 751 case BASIC: 752 tablePrinter.setSortPriority(3).setSortAscending(false); 753 break; 754 case MODERATE: 755 tablePrinter.setSortPriority(4).setSortAscending(false); 756 break; 757 case MODERN: 758 tablePrinter.setSortPriority(5).setSortAscending(false); 759 break; 760 } 761 } 762 tablePrinter 763 .addColumn("Missing Features", "class='target'", null, "class='target'", true) 764 .setBreakSpans(true) 765 ; 766 767 long start = System.currentTimeMillis(); 768 LikelySubtags likelySubtags = new LikelySubtags(); 769 770 EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class); 771 targetLevel.put(Level.CORE, 2 / 100d); 772 targetLevel.put(Level.BASIC, 16 / 100d); 773 targetLevel.put(Level.MODERATE, 33 / 100d); 774 targetLevel.put(Level.MODERN, 100 / 100d); 775 776 Multimap<String, String> pathToLocale = TreeMultimap.create(); 777 778 Counter<Level> computedLevels = new Counter<>(); 779 Counter<Level> computedSublocaleLevels = new Counter<>(); 780 781 for (String locale : availableLanguages) { 782 try { 783 if (locale.contains("supplemental") // for old versionsl 784 || locale.startsWith("sr_Latn")) { 785 continue; 786 } 787 if (locales != null && !locales.contains(locale)) { 788 String base = CLDRLocale.getInstance(locale).getLanguage(); 789 if (!locales.contains(base)) { 790 continue; 791 } 792 } 793 if (matcher != null && !matcher.reset(locale).matches()) { 794 continue; 795 } 796 if (defaultContents.contains(locale) || "root".equals(locale) || "und".equals(locale)) { 797 continue; 798 } 799 800 tsv_missing_summary.flush(); 801 tsv_missing.flush(); 802 tsv_missing_basic.flush(); 803 tsv_missing_counts.flush(); 804 805 boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists(); 806 807 String region = ltp.set(locale).getRegion(); 808 if (!region.isEmpty()) continue; // skip regions 809 810 final Level cldrLocaleLevelGoal = SC.getLocaleCoverageLevel(Organization.cldr, locale); 811 final String specialFlag = getSpecialFlag(locale); 812 813 final boolean cldrLevelGoalBasicToModern = Level.CORE_TO_MODERN.contains(cldrLocaleLevelGoal); 814 815 String max = likelySubtags.maximize(locale); 816 final String script = ltp.set(max).getScript(); 817 final String defRegion = ltp.getRegion(); 818 819 final String language = likelySubtags.minimize(locale); 820 821 missingPaths.clear(); 822 unconfirmed.clear(); 823 824 final CLDRFile file = factory.make(locale, true, minimumDraftStatus); 825 826 if (locale.equals("af")) { 827 int debug = 0; 828 } 829 830 Iterable<String> pathSource = new IterableFilter(file.fullIterable()); 831 832 VettingViewer.getStatus(pathSource, file, 833 pathHeaderFactory, foundCounter, unconfirmedCounter, 834 missingCounter, missingPaths, unconfirmed); 835 836 { 837 long found = 0; 838 long unconfirmedc = 0; 839 long missing = 0; 840 Level adjustedGoal = cldrLocaleLevelGoal.compareTo(Level.BASIC) < 0 ? Level.BASIC : cldrLocaleLevelGoal; 841 for (Level level : Level.values()) { 842 if (level.compareTo(adjustedGoal) <= 0) { 843 found += foundCounter.get(level); 844 unconfirmedc += unconfirmedCounter.get(level); 845 missing += missingCounter.get(level); 846 } 847 } 848 String goalFlag = cldrLocaleLevelGoal == adjustedGoal ? "" : "*"; 849 tsv_missing_counts.println(specialFlag + locale + "\t" + goalFlag + adjustedGoal + "\t" + found + "\t" + unconfirmedc + "\t" + missing); 850 } 851 852 Collection<String> sublocales = languageToRegion.asMap().get(language); 853 if (sublocales == null) { 854 sublocales = Collections.emptySet(); 855 } 856 sublocales = ImmutableSet.copyOf(sublocales); 857 858 final String seedString = isSeed ? "seed" : "common"; 859 860 // get the totals 861 862 EnumMap<Level, Integer> totals = new EnumMap<>(Level.class); 863 EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class); 864 Set<CoreItems> specialMissingPaths = EnumSet.noneOf(CoreItems.class); 865 866 StatusCounter starredCounter = new StatusCounter(); 867 868 { 869 Multimap<CoreItems, String> detailedErrors = TreeMultimap.create(); 870 Set<CoreItems> coverage = CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors); 871 for (CoreItems item : coverage) { 872 foundCounter.add(item.desiredLevel, 1); 873 } 874 for (Entry<CoreItems, String> entry : detailedErrors.entries()) { 875 CoreItems coreItem = entry.getKey(); 876 String path = entry.getValue(); 877 specialMissingPaths.add(coreItem); 878 // if goal (eg modern) >= itemLevel, indicate it is missing 879 if (coreItem.desiredLevel == Level.BASIC) { 880 starredCounter.gatherStarred(path, null); 881 } 882 missingCounter.add(coreItem.desiredLevel, 1); 883 } 884 } 885 886 if (cldrLevelGoalBasicToModern) { 887 Level goalLevel = cldrLocaleLevelGoal; 888 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 889 String path = entry.getValue(); 890 String status = entry.getKey().toString(); 891 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 892 if (goalLevel.compareTo(foundLevel) >= 0) { 893 String line = spreadsheetLine(locale, language, script, specialFlag, file.getStringValue(path), goalLevel, foundLevel, status, path, file, pathToLocale); 894 tsv_missing.println(line); 895 } 896 } 897 for (String path : unconfirmed) { 898 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 899 if (goalLevel.compareTo(foundLevel) >= 0) { 900 String line = spreadsheetLine(locale, language, script, specialFlag, file.getStringValue(path), goalLevel, foundLevel, "n/a", path, file, pathToLocale); 901 tsv_missing.println(line); 902 } 903 } 904 } else { 905 Level goalLevel = Level.BASIC; 906 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 907 String path = entry.getValue(); 908 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 909 if (goalLevel.compareTo(foundLevel) >= 0) { 910 starredCounter.gatherStarred(path, null); 911 } 912 } 913 for (String path : unconfirmed) { 914 String fullPath = file.getFullXPath(path); 915 DraftStatus draftStatus = fullPath.contains("unconfirmed") ? DraftStatus.unconfirmed : DraftStatus.provisional; 916 917 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 918 if (goalLevel.compareTo(foundLevel) >= 0) { 919 starredCounter.gatherStarred(path, draftStatus); 920 } 921 } 922 } 923 924 if (!starredCounter.starredPathToData.isEmpty()) { 925 for (Entry<String, StatusData> starred : starredCounter.starredPathToData.entrySet()) { 926 String starredPath = starred.getKey(); 927 StatusData statusData = starred.getValue(); 928 tsv_missing_basic.println(specialFlag + locale // 929 + "\t" + statusData.missing // 930 + "\t" + statusData.provisional // 931 + "\t" + statusData.unconfirmed // 932 + "\t" + starredPath.replace("\"*\"", "'*'")); 933 } 934 tsv_missing_basic.println(specialFlag + locale // 935 + "\t" + starredCounter.missingTotal // 936 + "\t" + starredCounter.provisionalTotal // 937 + "\t" + starredCounter.unconfirmedTotal // 938 + "\tTotals"); 939 tsv_missing_basic.println("\t\t\t"); // for a proper table in github 940 } 941 942 int sumFound = 0; 943 int sumMissing = 0; 944 int sumUnconfirmed = 0; 945 946 for (Level level : levelsToShow) { 947 long foundCount = foundCounter.get(level); 948 long unconfirmedCount = unconfirmedCounter.get(level); 949 long missingCount = missingCounter.get(level); 950 951 sumFound += foundCount; 952 sumUnconfirmed += unconfirmedCount; 953 sumMissing += missingCount; 954 955 confirmed.put(level, sumFound); 956 totals.put(level, sumFound + sumUnconfirmed + sumMissing); 957 } 958 959 // double modernTotal = totals.get(Level.MODERN); 960 961 962 // first get the accumulated values 963 EnumMap<Level, Integer> accumTotals = new EnumMap<>(Level.class); 964 EnumMap<Level, Integer> accumConfirmed = new EnumMap<>(Level.class); 965 int currTotals = 0; 966 int currConfirmed = 0; 967 for (Level level : levelsToShow) { 968 currTotals += totals.get(level); 969 currConfirmed += confirmed.get(level); 970 accumConfirmed.put(level, currConfirmed); 971 accumTotals.put(level, currTotals); 972 } 973 974 // print the totals 975 976 Level computed = Level.UNDETERMINED; 977 Map<Level, Double> levelToProportion = new EnumMap<>(Level.class); 978 979 for (Level level : reversedLevels) { 980 int confirmedCoverage = accumConfirmed.get(level); 981 double total = accumTotals.get(level); 982 983 final double proportion = confirmedCoverage / total; 984 levelToProportion.put(level, proportion); 985 986 if (computed == Level.UNDETERMINED) { 987 switch (level) { 988 case MODERN: 989 if (proportion >= MODERN_THRESHOLD) { 990 computed = level; 991 } 992 break; 993 case MODERATE: 994 if (proportion >= MODERATE_THRESHOLD) { 995 computed = level; 996 } 997 break; 998 case BASIC: 999 if (proportion >= BASIC_THRESHOLD) { 1000 computed = level; 1001 } 1002 break; 1003 default: 1004 break; 1005 } 1006 } 1007 } 1008 1009 Set<CoreItems> shownMissingPaths = EnumSet.noneOf(CoreItems.class); 1010 Level computedWithCore = computed == Level.UNDETERMINED ? Level.CORE : computed; 1011 for (CoreItems item : specialMissingPaths) { 1012 if (item.desiredLevel.compareTo(computedWithCore) <= 0) { 1013 shownMissingPaths.add(item); 1014 } else { 1015 int debug = 0; 1016 } 1017 } 1018 final String coreMissingString = Joiner.on(", ").join(shownMissingPaths); 1019 1020 String visibleComputed = computed == Level.UNDETERMINED ? "" : computed.toString(); 1021 computedLevels.add(computed, 1); 1022 computedSublocaleLevels.add(computed, sublocales.size()); 1023 1024 tablePrinter.addRow() 1025 .addCell(seedString) 1026 .addCell(language) 1027 .addCell(ENGLISH.getName(language)) 1028 .addCell(file.getName(language)) 1029 .addCell(script) 1030 .addCell(defRegion) 1031 .addCell(sublocales.size()) 1032 .addCell(cldrLocaleLevelGoal == Level.UNDETERMINED ? "" : specialFlag + cldrLocaleLevelGoal.toString()) 1033 .addCell(computed == cldrLocaleLevelGoal ? " ≡" : " ≠") 1034 .addCell(visibleComputed) 1035 .addCell(getIcuValue(language)) 1036 .addCell(sumFound/(double)(sumFound+sumUnconfirmed)) 1037 ; 1038 1039 // print the totals 1040 for (Level level : reversedLevels) { 1041 tablePrinter.addCell(levelToProportion.get(level)); 1042 } 1043 1044 tablePrinter 1045 .addCell(coreMissingString) 1046 .finishRow(); 1047 1048 // now write properties file line 1049 1050 if (computed != Level.UNDETERMINED) { 1051 propertiesCoverage.println(locale 1052 + " ;\t" + visibleComputed); 1053 // Level higher = Level.UNDETERMINED; 1054 // switch (computed) { 1055 // default: 1056 // higher = Level.UNDETERMINED; 1057 // break; 1058 // case MODERATE: 1059 // higher = Level.MODERN; 1060 // break; 1061 // case BASIC: 1062 // higher = Level.MODERATE; 1063 // break; 1064 // } 1065 // double higherProportion = higher == Level.UNDETERMINED ? 0d : levelToProportion.get(higher); 1066 // 1067 // if (higherProportion >= THRESHOLD_HIGHER) { 1068 // propertiesCoverage.println( 1069 // " ;\t" + tsvPercent.format(higherProportion) + 1070 // " ;\t" + higher 1071 // ); 1072 // } else { 1073 // propertiesCoverage.println(" ;\t" + "" + " ;\t" + ""); 1074 // } 1075 } 1076 localeCount++; 1077 } catch (Exception e) { 1078 throw new IllegalArgumentException(e); 1079 } 1080 } 1081 1082 propertiesCoverage.println("#EOF\t"); // needs extra tabs to look right in github 1083 1084 pw.println("<h3><a name='main_table' href='#main_table'>Main Table</a></h3>"); 1085 pw.println(tablePrinter.toTable()); 1086 1087 pw.println( 1088 "<h3><a name='level_counts' href='#level_counts'>Level Counts</a></h3>\n" 1089 + "<table class='subtle'><tr>\n" 1090 + "<th style='text-align:left'>" + "Level" + "</th>" 1091 + "<th style='text-align:left'>" + "Languages" + "</th>" 1092 + "<th style='text-align:left'>" + "Locales" + "</th>" 1093 + "</tr>" 1094 ); 1095 long totalCount = 0; 1096 long totalLocaleCount = 0; 1097 for (Level level : Lists.reverse(Arrays.asList(Level.values()))) { 1098 final long count = computedLevels.get(level); 1099 final long localesCount = computedSublocaleLevels.get(level); 1100 if (count == 0 || level == Level.UNDETERMINED) { 1101 continue; 1102 } 1103 totalCount += count; 1104 totalLocaleCount += localesCount; 1105 String visibleImputed = level == Level.UNDETERMINED ? "<" + Level.BASIC.toString() : level.toString(); 1106 pw.println("<tr>" 1107 + "<th style='text-align:left'>" + visibleImputed + "</th>" 1108 + "<td style='text-align:right'>" + count + "</td>" 1109 + "<td style='text-align:right'>" + localesCount + "</td>" 1110 + "</tr>"); 1111 } 1112 pw.println("<tr>" 1113 + "<th style='text-align:left'>" + "Total" + "</th>" 1114 + "<td style='text-align:right'>" + totalCount + "</td>" 1115 + "<td style='text-align:right'>" + totalLocaleCount + "</td>" 1116 + "</tr>\n" 1117 ); 1118 1119 pw.println("<tr>" 1120 + "<th style='text-align:left'>" + "in dev." + "</th>" 1121 + "<td style='text-align:right'>" + computedLevels.get(Level.UNDETERMINED) + "</td>" 1122 + "<td style='text-align:right'>" + computedSublocaleLevels.get(Level.UNDETERMINED) + "</td>" 1123 + "</tr>\n" 1124 + "</table>" 1125 ); 1126 1127 1128 Multimap<Level, String> levelToLocales = TreeMultimap.create(); 1129 1130 for ( Entry<String, Collection<String>> entry : pathToLocale.asMap().entrySet()) { 1131 String path = entry.getKey(); 1132 Collection<String> localeSet = entry.getValue(); 1133 levelToLocales.clear(); 1134 for (String locale : localeSet) { 1135 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 1136 levelToLocales.put(foundLevel, locale); 1137 } 1138 String phString = "n/a\tn/a\tn/a\tn/a"; 1139 try { 1140 PathHeader ph = pathHeaderFactory.fromPath(path); 1141 phString = ph.toString(); 1142 } catch (Exception e) { 1143 } 1144 for (Entry<Level, Collection<String>> entry2 : levelToLocales.asMap().entrySet()) { 1145 Level level = entry2.getKey(); 1146 localeSet = entry2.getValue(); 1147 tsv_missing_summary.println( 1148 level 1149 + "\t" + localeSet.size() 1150 + "\t" + Joiner.on(" ").join(localeSet.stream().map(x -> x + getSpecialFlag(x)).collect(Collectors.toSet())) 1151 + "\t" + phString 1152 ); 1153 } 1154 } 1155 tablePrinter.toTsv(tsv_summary); 1156 long end = System.currentTimeMillis(); 1157 System.out.println((end - start) + " millis = " 1158 + ((end - start) / localeCount) + " millis/locale"); 1159 ShowPlurals.appendBlanksForScrolling(pw); 1160 } 1161 } 1162 getSpecialFlag(String locale)1163 public static String getSpecialFlag(String locale) { 1164 return SC.getLocaleCoverageLevel(Organization.special, locale) == Level.UNDETERMINED ? "" : "‡"; 1165 } 1166 1167 private static class IterableFilter implements Iterable<String> { 1168 private Iterable<String> source; 1169 IterableFilter(Iterable<String> source)1170 IterableFilter(Iterable<String> source) { 1171 this.source = source; 1172 } 1173 1174 /** 1175 * When some paths are defined after submission, we need to change them to COMPREHENSIVE in computing the vetting status. 1176 */ 1177 1178 static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of( 1179 "//ldml/personNames/nameOrderLocales[@order=\"givenFirst\"]", 1180 "//ldml/personNames/nameOrderLocales[@order=\"surnameFirst\"]", 1181 "//ldml/personNames/foreignSpaceReplacement[@xml:space=\"preserve\"]", 1182 "//ldml/personNames/initialPattern[@type=\"initial\"]", 1183 "//ldml/personNames/initialPattern[@type=\"initialSequence\"]", 1184 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern", 1185 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1186 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1187 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1188 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"monogram\"][@formality=\"formal\"]/namePattern", 1189 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1190 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1191 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1192 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1193 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1194 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"monogram\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1195 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"medium\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1196 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1197 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1198 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1199 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1200 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"monogram\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1201 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"short\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='givenFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1202 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern", 1203 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1204 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1205 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1206 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"monogram\"][@formality=\"formal\"]/namePattern", 1207 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"long\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1208 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1209 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1210 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1211 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1212 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"monogram\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1213 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"medium\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1214 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1215 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1216 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"addressing\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1217 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"addressing\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='referring'][@formality='formal']\"]", 1218 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"monogram\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1219 "//ldml/personNames/personName[@order=\"surnameFirst\"][@length=\"short\"][@usage=\"monogram\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='surnameFirst'][@length='long'][@usage='monogram'][@formality='formal']\"]", 1220 "//ldml/personNames/personName[@order=\"sorting\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern", 1221 "//ldml/personNames/personName[@order=\"sorting\"][@length=\"long\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]", 1222 "//ldml/personNames/personName[@order=\"sorting\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]", 1223 "//ldml/personNames/personName[@order=\"sorting\"][@length=\"medium\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]", 1224 "//ldml/personNames/personName[@order=\"sorting\"][@length=\"short\"][@usage=\"referring\"][@formality=\"formal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]", 1225 "//ldml/personNames/personName[@order=\"sorting\"][@length=\"short\"][@usage=\"referring\"][@formality=\"informal\"]/alias[@source=\"locale\"][@path=\"../personName[@order='sorting'][@length='long'][@usage='referring'][@formality='formal']\"]", 1226 "//ldml/personNames/sampleName[@item=\"givenOnly\"]/nameField[@type=\"given\"]", 1227 "//ldml/personNames/sampleName[@item=\"givenSurnameOnly\"]/nameField[@type=\"given\"]", 1228 "//ldml/personNames/sampleName[@item=\"givenSurnameOnly\"]/nameField[@type=\"surname\"]", 1229 "//ldml/personNames/sampleName[@item=\"given12Surname\"]/nameField[@type=\"given\"]", 1230 "//ldml/personNames/sampleName[@item=\"given12Surname\"]/nameField[@type=\"given2\"]", 1231 "//ldml/personNames/sampleName[@item=\"given12Surname\"]/nameField[@type=\"surname\"]", 1232 "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"prefix\"]", 1233 "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"given\"]", 1234 "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"given-informal\"]", 1235 "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"given2\"]", 1236 "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"surname-prefix\"]", 1237 "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"surname-core\"]", 1238 "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"surname2\"]", 1239 "//ldml/personNames/sampleName[@item=\"full\"]/nameField[@type=\"suffix\"]" 1240 ); 1241 @Override iterator()1242 public Iterator<String> iterator() { 1243 return new IteratorFilter(source.iterator()); 1244 } 1245 1246 static class IteratorFilter implements Iterator<String> { 1247 Iterator<String> source; 1248 String peek; 1249 IteratorFilter(Iterator<String> source)1250 public IteratorFilter(Iterator<String> source) { 1251 this.source = source; 1252 fillPeek(); 1253 } 1254 @Override hasNext()1255 public boolean hasNext() { 1256 return peek != null; 1257 } 1258 @Override next()1259 public String next() { 1260 String result = peek; 1261 fillPeek(); 1262 return result; 1263 } 1264 fillPeek()1265 private void fillPeek() { 1266 peek = null; 1267 while (source.hasNext()) { 1268 peek = source.next(); 1269 // if it is ok to assess, then break 1270 if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek) 1271 && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) { 1272 break; 1273 } 1274 peek = null; 1275 } 1276 } 1277 } 1278 1279 } 1280 static final CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO); 1281 1282 // userInfo.getVoterInfo().getLevel().compareTo(VoteResolver.Level.tc) 1283 static final VoterInfo dummyVoterInfo = new VoterInfo(Organization.cldr, org.unicode.cldr.util.VoteResolver.Level.vetter, "somename"); 1284 1285 static final UserInfo dummyUserInfo = new UserInfo() { 1286 @Override 1287 public VoterInfo getVoterInfo() { 1288 return dummyVoterInfo; 1289 } 1290 }; 1291 static final PathValueInfo dummyPathValueInfo = new PathValueInfo() { 1292 // pathValueInfo.getCoverageLevel().compareTo(Level.COMPREHENSIVE) 1293 @Override 1294 public Collection<? extends CandidateInfo> getValues() { 1295 throw new UnsupportedOperationException(); 1296 } 1297 @Override 1298 public CandidateInfo getCurrentItem() { 1299 throw new UnsupportedOperationException(); 1300 } 1301 @Override 1302 public String getBaselineValue() { 1303 throw new UnsupportedOperationException(); 1304 } 1305 @Override 1306 public Level getCoverageLevel() { 1307 return Level.MODERN; 1308 } 1309 @Override 1310 public boolean hadVotesSometimeThisRelease() { 1311 throw new UnsupportedOperationException(); 1312 } 1313 @Override 1314 public CLDRLocale getLocale() { 1315 throw new UnsupportedOperationException(); 1316 } 1317 @Override 1318 public String getXpath() { 1319 throw new UnsupportedOperationException(); 1320 } 1321 }; 1322 spreadsheetLine(String locale, String language, String script, String specialFlag, String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, CLDRFile resolvedFile, Multimap<String, String> pathToLocale)1323 private static String spreadsheetLine(String locale, String language, String script, String specialFlag, 1324 String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, 1325 CLDRFile resolvedFile, Multimap<String, String> pathToLocale) { 1326 if (pathToLocale != null) { 1327 pathToLocale.put(path, locale); 1328 } 1329 // String stLink = "n/a"; 1330 // String englishValue = "n/a"; 1331 // StatusAction action = null; 1332 // String icuValue = getIcuValue(locale); 1333 1334 SurveyToolStatus surveyToolStatus = null; 1335 String bailey = resolvedFile == null ? "" : resolvedFile.getStringValue(path); 1336 1337 String phString = "na\tn/a\tn/a\t" + path; 1338 try { 1339 PathHeader ph = pathHeaderFactory.fromPath(path); 1340 phString = ph.toString(); 1341 // stLink = URLS.forXpath(locale, path); 1342 // englishValue = ENGLISH.getStringValue(path); 1343 // action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo, InputMethod.DIRECT, ph, dummyUserInfo); 1344 } catch (Exception e) { 1345 1346 } 1347 1348 String line = 1349 specialFlag + language 1350 + "\t" + ENGLISH.getName(language) 1351 + "\t" + ENGLISH.getName("script", script) 1352 + "\t" + cldrLocaleLevelGoal 1353 + "\t" + itemLevel 1354 + "\t" + (surveyToolStatus == null ? "n/a" : surveyToolStatus.toString()) 1355 + "\t" + bailey 1356 + "\t" + phString 1357 + "\t" + PathHeader.getUrlForLocalePath(locale, path) 1358 ; 1359 return line; 1360 } 1361 1362 1363 getIcuValue(String locale)1364 private static String getIcuValue(String locale) { 1365 return ICU_Locales.contains(new ULocale(locale)) ? "ICU" : ""; 1366 } 1367 1368 static final Set<ULocale> ICU_Locales = ImmutableSet.copyOf(ULocale.getAvailableLocales()); 1369 } 1370