1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.ImmutableListMultimap; 5 import com.google.common.collect.ImmutableMap; 6 import com.google.common.collect.ImmutableSet; 7 import com.google.common.collect.LinkedHashMultimap; 8 import com.google.common.collect.Multimap; 9 import com.google.common.collect.Sets; 10 import com.google.common.collect.TreeMultimap; 11 import com.ibm.icu.impl.Relation; 12 import com.ibm.icu.impl.Row; 13 import com.ibm.icu.impl.Row.R2; 14 import com.ibm.icu.impl.Row.R4; 15 import com.ibm.icu.text.CompactDecimalFormat; 16 import com.ibm.icu.text.CompactDecimalFormat.CompactStyle; 17 import com.ibm.icu.text.Transform; 18 import com.ibm.icu.util.Calendar; 19 import com.ibm.icu.util.Output; 20 import com.ibm.icu.util.ULocale; 21 import java.util.Arrays; 22 import java.util.Collection; 23 import java.util.Collections; 24 import java.util.Date; 25 import java.util.EnumSet; 26 import java.util.HashMap; 27 import java.util.HashSet; 28 import java.util.LinkedHashSet; 29 import java.util.List; 30 import java.util.Map; 31 import java.util.Map.Entry; 32 import java.util.Set; 33 import java.util.TreeMap; 34 import java.util.TreeSet; 35 import java.util.regex.Pattern; 36 import org.unicode.cldr.draft.ScriptMetadata; 37 import org.unicode.cldr.test.CoverageLevel2; 38 import org.unicode.cldr.tool.LikelySubtags; 39 import org.unicode.cldr.util.CLDRConfig; 40 import org.unicode.cldr.util.CLDRFile; 41 import org.unicode.cldr.util.CLDRLocale; 42 import org.unicode.cldr.util.CLDRPaths; 43 import org.unicode.cldr.util.ChainedMap; 44 import org.unicode.cldr.util.ChainedMap.M4; 45 import org.unicode.cldr.util.Counter2; 46 import org.unicode.cldr.util.DtdData; 47 import org.unicode.cldr.util.DtdData.Element; 48 import org.unicode.cldr.util.DtdType; 49 import org.unicode.cldr.util.GrammarInfo; 50 import org.unicode.cldr.util.LanguageTagParser; 51 import org.unicode.cldr.util.Level; 52 import org.unicode.cldr.util.LocaleNames; 53 import org.unicode.cldr.util.LogicalGrouping; 54 import org.unicode.cldr.util.LogicalGrouping.PathType; 55 import org.unicode.cldr.util.Organization; 56 import org.unicode.cldr.util.PathHeader; 57 import org.unicode.cldr.util.PathHeader.Factory; 58 import org.unicode.cldr.util.PathStarrer; 59 import org.unicode.cldr.util.PatternCache; 60 import org.unicode.cldr.util.RegexLookup; 61 import org.unicode.cldr.util.RegexLookup.Finder; 62 import org.unicode.cldr.util.StandardCodes; 63 import org.unicode.cldr.util.SupplementalDataInfo; 64 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo; 65 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 66 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 67 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 68 import org.unicode.cldr.util.VoteResolver; 69 import org.unicode.cldr.util.XPathParts; 70 71 public class TestCoverageLevel extends TestFmwkPlus { 72 73 private static final boolean SHOW_LSR_DATA = false; 74 75 private static CLDRConfig testInfo = CLDRConfig.getInstance(); 76 private static final StandardCodes STANDARD_CODES = StandardCodes.make(); 77 private static final CLDRFile ENGLISH = testInfo.getEnglish(); 78 private static final SupplementalDataInfo SDI = testInfo.getSupplementalDataInfo(); 79 private static final String TC_VOTES = 80 Integer.toString(VoteResolver.Level.tc.getVotes(Organization.apple)); 81 main(String[] args)82 public static void main(String[] args) { 83 new TestCoverageLevel().run(args); 84 } 85 testSpecificPaths()86 public void testSpecificPaths() { 87 String[][] rows = { 88 { 89 "//ldml/characters/parseLenients[@scope=\"number\"][@level=\"lenient\"]/parseLenient[@sample=\",\"]", 90 "moderate", 91 TC_VOTES 92 } 93 }; 94 doSpecificPathTest("fr", rows); 95 } 96 testSpecificPathsPersCal()97 public void testSpecificPathsPersCal() { 98 String[][] rows = { 99 { 100 "//ldml/dates/calendars/calendar[@type=\"persian\"]/eras/eraAbbr/era[@type=\"0\"]", 101 "moderate", 102 "4" 103 }, 104 { 105 "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]", 106 "moderate", 107 "4" 108 } 109 }; 110 doSpecificPathTest("ckb_IR", rows); 111 } 112 testSpecificPathsDeFormatLength()113 public void testSpecificPathsDeFormatLength() { 114 String[][] rows = { 115 /* For German (de) these should be high-bar (20) per https://unicode-org.atlassian.net/browse/CLDR-14988 */ 116 { 117 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]", 118 "moderate", 119 TC_VOTES 120 }, 121 { 122 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]", 123 "moderate", 124 TC_VOTES 125 }, 126 { 127 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]", 128 "moderate", 129 TC_VOTES 130 }, 131 { 132 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]", 133 "moderate", 134 TC_VOTES 135 }, 136 { 137 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]", 138 "moderate", 139 TC_VOTES 140 }, 141 { 142 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]", 143 "moderate", 144 TC_VOTES 145 }, 146 { 147 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]", 148 "moderate", 149 TC_VOTES 150 }, 151 { 152 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]", 153 "moderate", 154 TC_VOTES 155 }, 156 { 157 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]", 158 "moderate", 159 TC_VOTES 160 }, 161 { 162 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]", 163 "moderate", 164 TC_VOTES 165 }, 166 { 167 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]", 168 "moderate", 169 TC_VOTES 170 }, 171 { 172 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]", 173 "moderate", 174 TC_VOTES 175 }, 176 /* not high-bar (20): wrong number of zeroes, or count many*/ 177 { 178 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100\"][@count=\"other\"]", 179 "comprehensive", 180 "8" 181 }, 182 { 183 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000000\"][@count=\"other\"]", 184 "moderate", 185 "8" 186 }, 187 { 188 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"many\"]", 189 "moderate", 190 "8" 191 }, 192 }; 193 doSpecificPathTest("de", rows); 194 } 195 doSpecificPathTest(String localeStr, String[][] rows)196 private void doSpecificPathTest(String localeStr, String[][] rows) { 197 Factory phf = PathHeader.getFactory(ENGLISH); 198 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SDI, localeStr); 199 CLDRLocale loc = CLDRLocale.getInstance(localeStr); 200 for (String[] row : rows) { 201 String path = row[0]; 202 Level expectedLevel = Level.fromString(row[1]); 203 Level level = coverageLevel.getLevel(path); 204 assertEquals("Level for " + path, expectedLevel, level); 205 206 int expectedRequiredVotes = Integer.parseInt(row[2]); 207 int votes = SDI.getRequiredVotes(loc, phf.fromPath(path)); 208 assertEquals("Votes for " + path, expectedRequiredVotes, votes); 209 } 210 } 211 oldTestInvariantPaths()212 public void oldTestInvariantPaths() { 213 org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); 214 PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); 215 SupplementalDataInfo sdi = 216 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 217 218 Set<String> allPaths = new HashSet<>(); 219 M4<String, String, Level, Boolean> starredToLocalesToLevels = 220 ChainedMap.of( 221 new TreeMap<String, Object>(), 222 new TreeMap<String, Object>(), 223 new TreeMap<Level, Object>(), 224 Boolean.class); 225 226 for (String locale : factory.getAvailableLanguages()) { 227 logln(locale); 228 CLDRFile cldrFileToCheck = factory.make(locale, true); 229 for (String path : cldrFileToCheck.fullIterable()) { 230 allPaths.add(path); 231 String starred = pathStarrer.set(path); 232 Level level = sdi.getCoverageLevel(path, locale); 233 starredToLocalesToLevels.put(starred, locale, level, true); 234 } 235 } 236 237 Set<Level> levelsFound = EnumSet.noneOf(Level.class); 238 Set<String> localesWithUniqueLevels = new TreeSet<>(); 239 for (Entry<String, Map<String, Map<Level, Boolean>>> entry : starredToLocalesToLevels) { 240 String starred = entry.getKey(); 241 Map<String, Map<Level, Boolean>> localesToLevels = entry.getValue(); 242 int maxLevelCount = 0; 243 double localeCount = 0; 244 levelsFound.clear(); 245 localesWithUniqueLevels.clear(); 246 247 for (Entry<String, Map<Level, Boolean>> entry2 : localesToLevels.entrySet()) { 248 String locale = entry2.getKey(); 249 Map<Level, Boolean> levels = entry2.getValue(); 250 levelsFound.addAll(levels.keySet()); 251 if (levels.size() > maxLevelCount) { 252 maxLevelCount = levels.size(); 253 } 254 if (levels.size() == 1) { 255 localesWithUniqueLevels.add(locale); 256 } 257 localeCount++; 258 } 259 System.out.println( 260 maxLevelCount 261 + "\t" 262 + localesWithUniqueLevels.size() / localeCount 263 + "\t" 264 + starred 265 + "\t" 266 + Joiner.on(", ").join(levelsFound) 267 + "\t" 268 + (maxLevelCount == 1 269 ? "all" 270 : localesWithUniqueLevels.size() == 0 271 ? "none" 272 : Joiner.on(", ").join(localesWithUniqueLevels))); 273 } 274 } 275 276 enum LanguageStatus { 277 Lit100M("P1"), 278 Lit10MandOfficial("P2"), 279 Lit1MandOneThird("P3"); 280 final String name; 281 LanguageStatus(String name)282 LanguageStatus(String name) { 283 this.name = name; 284 } 285 } 286 287 static Relation<String, LanguageStatus> languageStatus = 288 Relation.of(new HashMap<String, Set<LanguageStatus>>(), TreeSet.class); 289 static Counter2<String> languageLiteratePopulation = new Counter2<>(); 290 static Map<String, Date> currencyToLast = new HashMap<>(); 291 static Set<String> officialSomewhere = new HashSet<>(); 292 293 static { 294 Counter2<String> territoryLiteratePopulation = new Counter2<>(); 295 LanguageTagParser parser = new LanguageTagParser(); 296 // cf 297 // http://cldr.unicode.org/development/development-process/design-proposals/languages-to-show-for-translation 298 for (String language : SDI.getLanguagesForTerritoriesPopulationData()) { 299 String base = parser.set(language).getLanguage(); 300 boolean isOfficial = false; 301 double languageLiterate = 0; 302 for (String territory : SDI.getTerritoriesForPopulationData(language)) { 303 PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory); 304 OfficialStatus officialStatus = pop.getOfficialStatus(); 305 if (officialStatus.compareTo(OfficialStatus.de_facto_official) >= 0) { 306 isOfficial = true; 307 languageStatus.put(base + "_" + territory, LanguageStatus.Lit10MandOfficial); 308 officialSomewhere.add(base); 309 } 310 double litPop = pop.getLiteratePopulation(); 311 languageLiterate += litPop; territoryLiteratePopulation.add(territory, litPop)312 territoryLiteratePopulation.add(territory, litPop); 313 languageLiteratePopulation.add(base + "_" + territory, litPop); 314 } languageLiteratePopulation.add(base, languageLiterate)315 languageLiteratePopulation.add(base, languageLiterate); 316 if (languageLiterate > 100000000) { languageStatus.put(base, LanguageStatus.Lit100M)317 languageStatus.put(base, LanguageStatus.Lit100M); 318 } 319 if (languageLiterate > 10000000 && isOfficial) { languageStatus.put(base, LanguageStatus.Lit10MandOfficial)320 languageStatus.put(base, LanguageStatus.Lit10MandOfficial); 321 } 322 } 323 for (String language : SDI.getLanguagesForTerritoriesPopulationData()) { 324 if (languageLiteratePopulation.getCount(language) < 1000000) { 325 continue; 326 } 327 String base = parser.set(language).getLanguage(); 328 for (String territory : SDI.getTerritoriesForPopulationData(language)) { 329 PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory); 330 double litPop = pop.getLiteratePopulation(); 331 double total = territoryLiteratePopulation.getCount(territory); 332 if (litPop > total / 3) { languageStatus.put(base, LanguageStatus.Lit1MandOneThird)333 languageStatus.put(base, LanguageStatus.Lit1MandOneThird); 334 } 335 } 336 } 337 for (String territory : STANDARD_CODES.getAvailableCodes("territory")) { 338 Set<CurrencyDateInfo> cdateInfo = SDI.getCurrencyDateInfo(territory); 339 if (cdateInfo == null) { 340 continue; 341 } 342 for (CurrencyDateInfo dateInfo : cdateInfo) { 343 String currency = dateInfo.getCurrency(); 344 Date last = dateInfo.getEnd(); 345 Date old = currencyToLast.get(currency); 346 if (old == null || old.compareTo(last) < 0) { currencyToLast.put(currency, last)347 currencyToLast.put(currency, last); 348 } 349 } 350 } 351 } 352 353 static CompactDecimalFormat cdf = 354 CompactDecimalFormat.getInstance(ULocale.ENGLISH, CompactStyle.SHORT); 355 isBigLanguage(String lang)356 static String isBigLanguage(String lang) { 357 Set<LanguageStatus> status = languageStatus.get(lang); 358 Double size = languageLiteratePopulation.getCount(lang); 359 String sizeString = size == null ? "?" : cdf.format(size); 360 String off = officialSomewhere.contains(lang) ? "o" : ""; 361 if (status == null || status.isEmpty()) { 362 return "P4-" + sizeString + off; 363 } 364 return status.iterator().next().name + "-" + sizeString + off; 365 } 366 367 static final Date NOW = new Date(); 368 369 private static final boolean DEBUG = false; 370 371 static class TypeName implements Transform<String, String> { 372 private final int field; 373 private final Map<String, R2<List<String>, String>> dep; 374 TypeName(int field)375 public TypeName(int field) { 376 this.field = field; 377 switch (field) { 378 case CLDRFile.LANGUAGE_NAME: 379 dep = SDI.getLocaleAliasInfo().get("language"); 380 break; 381 case CLDRFile.TERRITORY_NAME: 382 dep = SDI.getLocaleAliasInfo().get("territory"); 383 break; 384 case CLDRFile.SCRIPT_NAME: 385 dep = SDI.getLocaleAliasInfo().get("script"); 386 break; 387 default: 388 dep = null; 389 break; 390 } 391 } 392 393 @Override transform(String source)394 public String transform(String source) { 395 String result = ENGLISH.getName(field, source); 396 String extra = ""; 397 if (field == CLDRFile.LANGUAGE_NAME) { 398 String lang = isBigLanguage(source); 399 extra = lang == null ? "X" : lang; 400 } else if (field == CLDRFile.CURRENCY_NAME) { 401 Date last = currencyToLast.get(source); 402 extra = last == null ? "?" : last.compareTo(NOW) < 0 ? "old" : ""; 403 } 404 R2<List<String>, String> depValue = dep == null ? null : dep.get(source); 405 if (depValue != null) { 406 extra += extra.isEmpty() ? "" : "-"; 407 extra += depValue.get1(); 408 } 409 return result + (extra.isEmpty() ? "" : "\t" + extra); 410 } 411 } 412 413 RegexLookup<Level> exceptions = 414 RegexLookup.of( 415 null, 416 new Transform<String, Level>() { 417 @Override 418 public Level transform(String source) { 419 return Level.fromLevel(Integer.parseInt(source)); 420 } 421 }, 422 null) 423 .loadFromFile(TestCoverageLevel.class, "TestCoverageLevel.txt"); 424 425 public void TestExceptions() { 426 for (Map.Entry<Finder, Level> x : exceptions) { 427 logln(x.getKey().toString() + " => " + x.getValue()); 428 } 429 } 430 431 public void TestNarrowCurrencies() { 432 String path = "//ldml/numbers/currencies/currency[@type=\"USD\"]/symbol[@alt=\"narrow\"]"; 433 String value = ENGLISH.getStringValue(path); 434 assertEquals("Narrow $", "$", value); 435 SupplementalDataInfo sdi = 436 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 437 Level level = sdi.getCoverageLevel(path, "en"); 438 assertEquals("Narrow $", Level.MODERATE, level); 439 } 440 441 public void TestA() { 442 String path = "//ldml/characterLabels/characterLabel[@type=\"other\"]"; 443 SupplementalDataInfo sdi = 444 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 445 Level level = sdi.getCoverageLevel(path, "en"); 446 assertEquals("Quick Check for any attribute", Level.MODERN, level); 447 } 448 449 public void TestCoverageCompleteness() { 450 /** 451 * Check that English paths are, except for known cases, at least modern coverage. We filter 452 * out the things we know about and have determined are OK to be in comprehensive. If we add 453 * a path that doesn't get its coverage set, this test should complain about it. 454 */ 455 final ImmutableSet<String> inactiveMetazones = 456 ImmutableSet.of( 457 "Greenland", // TODO: New metazone added for tz2023d update, 458 // In CLDR 45, we don't want to include this one in modern coverage because 459 // we don't open ST for translating display names for this metazone. 460 // After 45, we will include "Greenland" in modern coverage. 461 "Bering", 462 "Dominican", 463 "Shevchenko", 464 "Alaska_Hawaii", 465 "Yerevan", 466 "Africa_FarWestern", 467 "British", 468 "Sverdlovsk", 469 "Karachi", 470 "Malaya", 471 "Oral", 472 "Frunze", 473 "Dutch_Guiana", 474 "Irish", 475 "Uralsk", 476 "Tashkent", 477 "Kwajalein", 478 "Ashkhabad", 479 "Kizilorda", 480 "Kuybyshev", 481 "Baku", 482 "Dushanbe", 483 "Goose_Bay", 484 "Liberia", 485 "Samarkand", 486 "Tbilisi", 487 "Borneo", 488 "Greenland_Central", 489 "Dacca", 490 "Aktyubinsk", 491 "Turkey", 492 "Urumqi", 493 "Acre", 494 "Almaty", 495 "Anadyr", 496 "Aqtau", 497 "Aqtobe", 498 "Kamchatka", 499 "Macau", 500 "Qyzylorda", 501 "Samara", 502 "Casey", 503 "Guam", 504 "Lanka", 505 "North_Mariana"); 506 507 final Pattern calendar100 = 508 PatternCache.get("(coptic|ethiopic-amete-alem|islamic-(rgsa|tbla|umalqura))"); 509 510 /** 511 * Recommended scripts that are allowed for comprehensive coverage. Not-recommended scripts 512 * (according to ScriptMetadata) are filtered out automatically. 513 */ 514 final Pattern script100 = PatternCache.get("(Zinh)"); 515 516 final Pattern keys100 = 517 PatternCache.get( 518 "(col(Alternate|Backwards|CaseFirst|CaseLevel|HiraganaQuaternary|" 519 + "Normalization|Numeric|Reorder|Strength)|kv|sd|mu|timezone|va|variableTop|x|d0|h0|i0|k0|m0|s0)"); 520 521 final Pattern numberingSystem100 = 522 PatternCache.get( 523 "(" 524 + "finance|native|traditional|adlm|ahom|bali|bhks|brah|cakm|cham|cyrl|diak|" 525 + "gara|gong|gonm|gukh|hanidays|hmng|hmnp|java|jpanyear|kali|kawi|krai|lana(tham)?|lepc|limb|" 526 + "math(bold|dbl|mono|san[bs])|modi|mong|mroo|mtei|mymr(epka|pao|shan|tlng)|" 527 + "nagm|newa|nkoo|olck|onao|osma|outlined|rohg|saur|segment|shrd|sin[dh]|sora|sund|sunu|" 528 + "takr|talu|tirh|tnsa|vaii|wara|wcho)"); 529 530 final Pattern collation100 = 531 PatternCache.get( 532 "(" 533 + "big5han|compat|dictionary|emoji|eor|gb2312han|phonebook|phonetic|pinyin|searchjl|stroke|traditional|unihan|zhuyin)"); 534 535 SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo(); 536 CLDRFile english = testInfo.getEnglish(); 537 538 // Calculate date of the upcoming CLDR release, minus 5 years (deprecation policy) 539 final int versionNumber = Integer.valueOf((CLDRFile.GEN_VERSION).split("\\.")[0]); 540 Calendar cal = Calendar.getInstance(); 541 cal.set(versionNumber / 2 + versionNumber % 2 + 2001, 8 - (versionNumber % 2) * 6, 15); 542 Date cldrReleaseMinus5Years = cal.getTime(); 543 Set<String> modernCurrencies = 544 SDI.getCurrentCurrencies(SDI.getCurrencyTerritories(), cldrReleaseMinus5Years, NOW); 545 546 Set<String> needsNumberSystem = new HashSet<>(); 547 DtdData dtdData = DtdData.getInstance(DtdType.ldml); 548 Element numbersElement = dtdData.getElementFromName().get("numbers"); 549 for (Element childOfNumbers : numbersElement.getChildren().keySet()) { 550 if (childOfNumbers.containsAttribute("numberSystem")) { 551 needsNumberSystem.add(childOfNumbers.name); 552 } 553 } 554 555 for (String path : english.fullIterable()) { 556 logln("Testing path => " + path); 557 XPathParts xpp = XPathParts.getFrozenInstance(path); 558 if (path.endsWith("/alias") 559 || path.matches( 560 "//ldml/(identity|contextTransforms|layout|localeDisplayNames/transformNames)/.*")) { 561 continue; 562 } 563 if (sdi.isDeprecated(DtdType.ldml, path)) { 564 continue; 565 } 566 Level lvl = sdi.getCoverageLevel(path, "en"); 567 if (lvl == Level.UNDETERMINED) { 568 errln("Undetermined coverage value for path => " + path); 569 continue; 570 } 571 if (lvl.compareTo(Level.MODERN) <= 0) { 572 logln("Level OK [" + lvl.toString() + "] for path => " + path); 573 continue; 574 } 575 576 if (path.startsWith("//ldml/numbers")) { 577 // Paths in numbering systems outside "latn" are specifically excluded. 578 String numberingSystem = xpp.findFirstAttributeValue("numberSystem"); 579 if (numberingSystem != null && !numberingSystem.equals("latn")) { 580 continue; 581 } 582 if (xpp.containsElement("currencySpacing") || xpp.containsElement("list")) { 583 continue; 584 } 585 if (xpp.containsElement("currency")) { 586 String currencyType = xpp.findAttributeValue("currency", "type"); 587 if (!modernCurrencies.contains(currencyType)) { 588 continue; // old currency or not tender, so we don't care 589 } 590 } 591 // Currently not collecting timeSeparator data in SurveyTool 592 if (xpp.containsElement("timeSeparator")) { 593 continue; 594 } 595 // Other paths in numbers without a numbering system are deprecated. 596 // if (numberingSystem == null) { 597 // continue; 598 // } 599 if (needsNumberSystem.contains(xpp.getElement(2))) { 600 continue; 601 } 602 } else if (xpp.containsElement("zone")) { 603 String zoneType = xpp.findAttributeValue("zone", "type"); 604 if ((zoneType.startsWith("Etc/GMT") || zoneType.equals("Etc/UTC")) 605 && path.endsWith("exemplarCity")) { 606 continue; 607 } 608 // We don't survey for short timezone names or at least some alts 609 if (path.contains("/short/") || path.contains("[@alt=\"formal\"]")) { 610 continue; 611 } 612 } else if (xpp.containsElement("metazone")) { 613 // We don't survey for short metazone names 614 if (path.contains("/short/")) { 615 continue; 616 } 617 String mzName = xpp.findAttributeValue("metazone", "type"); 618 // Skip inactive metazones. 619 if (inactiveMetazones.contains(mzName)) { 620 continue; 621 } 622 // Skip paths for daylight or generic mz strings where 623 // the mz doesn't use DST. 624 if ((path.endsWith("daylight") || path.endsWith("generic")) 625 && !LogicalGrouping.metazonesDSTSet.contains(mzName)) { 626 continue; 627 } 628 } else if (path.startsWith("//ldml/dates/fields")) { 629 if ("variant".equals(xpp.findAttributeValue("displayName", "alt"))) { 630 continue; 631 } 632 // relative day/week/month, etc. short or narrow 633 if (xpp.getElement(-1).equals("relative")) { 634 String fieldType = xpp.findAttributeValue("field", "type"); 635 if (fieldType.matches(".*-(short|narrow)|quarter")) { 636 continue; 637 } 638 } 639 } else if (xpp.containsElement("language")) { 640 // Comprehensive coverage is OK for some languages. 641 String languageType = xpp.findAttributeValue("language", "type"); 642 if (!SDI.getLanguageTcOrBasic().contains(languageType)) { 643 continue; 644 } 645 } else if (xpp.containsElement("script")) { 646 // Skip user defined script codes and alt=short 647 String scriptType = xpp.findAttributeValue("script", "type"); 648 if (scriptType.startsWith("Q") 649 || "short".equals(xpp.findAttributeValue("script", "alt"))) { 650 continue; 651 } 652 ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(scriptType); 653 if (scriptInfo == null 654 || scriptInfo.idUsage != ScriptMetadata.IdUsage.RECOMMENDED) { 655 continue; 656 } 657 if (script100.matcher(scriptType).matches()) { 658 continue; 659 } 660 } else if (xpp.containsElement("territory")) { 661 String territoryType = xpp.findAttributeValue("territory", "type"); 662 if (territoryType.equals("CQ")) { // Exceptionally reserved by ISO-3166 663 continue; 664 } 665 } else if (xpp.containsElement("key")) { 666 // Comprehensive coverage is OK for some key/types. 667 String keyType = xpp.findAttributeValue("key", "type"); 668 if (keys100.matcher(keyType).matches()) { 669 continue; 670 } 671 } else if (xpp.containsElement("type")) { 672 if ("short".equals(xpp.findAttributeValue("type", "alt"))) { 673 continue; 674 } 675 // Comprehensive coverage is OK for some key/types. 676 String keyType = xpp.findAttributeValue("type", "key"); 677 if (keys100.matcher(keyType).matches()) { 678 continue; 679 } 680 if (keyType.equals("numbers")) { 681 String ns = xpp.findAttributeValue("type", "type"); 682 if (numberingSystem100.matcher(ns).matches()) { 683 continue; 684 } 685 } 686 if (keyType.equals("collation")) { 687 String ct = xpp.findAttributeValue("type", "type"); 688 if (collation100.matcher(ct).matches()) { 689 continue; 690 } 691 } 692 if (keyType.equals("calendar")) { 693 String ct = xpp.findAttributeValue("type", "type"); 694 if (calendar100.matcher(ct).matches()) { 695 continue; 696 } 697 } 698 } else if (xpp.containsElement("variant")) { 699 // All variant names are comprehensive coverage 700 continue; 701 } else if (path.startsWith("//ldml/dates/calendars")) { 702 String calType = xpp.findAttributeValue("calendar", "type"); 703 if (!calType.matches("(gregorian|generic)")) { 704 continue; 705 } 706 // So far we are generating datetimeSkeleton mechanically, no coverage 707 if (xpp.containsElement("datetimeSkeleton")) { 708 continue; 709 } 710 // The alt="ascii" time patterns are hopefully short-lived. We do not survey 711 // for them, they can be generated mechanically from the non-alt patterns. 712 // CLDR-16606 713 if (path.contains("[@alt=\"ascii\"]")) { 714 continue; 715 } 716 String element = xpp.getElement(-1); 717 // Skip things that shouldn't normally exist in the generic calendar 718 // days, dayPeriods, quarters, and months 719 if (calType.equals("generic")) { 720 if (element.matches("(day(Period)?|month|quarter|era|appendItem)")) { 721 continue; 722 } 723 if (xpp.containsElement("intervalFormatItem")) { 724 String intervalFormatID = 725 xpp.findAttributeValue("intervalFormatItem", "id"); 726 // "Time" related, so shouldn't be in generic calendar. 727 if (intervalFormatID.matches("(h|H).*")) { 728 continue; 729 } 730 } 731 if (xpp.containsElement("dateFormatItem")) { 732 String dateFormatID = xpp.findAttributeValue("dateFormatItem", "id"); 733 // "Time" related, so shouldn't be in generic calendar. 734 if (dateFormatID.matches("E?(h|H|m).*")) { 735 continue; 736 } 737 } 738 if (xpp.containsElement("timeFormat")) { 739 continue; 740 } 741 } else { // Gregorian calendar 742 if (xpp.containsElement("eraNarrow")) { 743 continue; 744 } 745 if (element.equals("appendItem")) { 746 String request = xpp.findAttributeValue("appendItem", "request"); 747 if (!request.equals("Timezone")) { 748 continue; 749 } 750 } else if (element.equals("dayPeriod")) { 751 if ("variant".equals(xpp.findAttributeValue("dayPeriod", "alt"))) { 752 continue; 753 } 754 } else if (element.equals("dateFormatItem")) { 755 // ldml/dates/calendars/calendar[@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[@id='%dateFormatItems'] 756 assertEquals(path, Level.BASIC, lvl); 757 continue; 758 } 759 } 760 } else if (path.startsWith("//ldml/units")) { 761 // Skip paths for narrow unit fields. 762 if ("narrow".equals(xpp.findAttributeValue("unitLength", "type")) 763 || path.endsWith("/compoundUnitPattern1")) { 764 continue; 765 } 766 } else if (xpp.contains("posix")) { 767 continue; 768 } 769 770 errln("Comprehensive & no exception for path =>\t" + path); 771 } 772 } 773 774 public static class TargetsAndSublocales { 775 public final CoverageVariableInfo cvi; 776 public Set<String> scripts; 777 public Set<String> regions; 778 779 public TargetsAndSublocales(String localeLanguage) { 780 cvi = SDI.getCoverageVariableInfo(localeLanguage); 781 scripts = new TreeSet<>(); 782 regions = new TreeSet<>(); 783 } 784 785 public boolean addScript(String localeScript) { 786 return scripts.add(localeScript); 787 } 788 789 public boolean addRegion(String localeRegion) { 790 return regions.add(localeRegion); 791 } 792 } 793 794 public void TestCoverageVariableInfo() { 795 /** 796 * Compare the targetScripts and targetTerritories for a language to what we actually have 797 * in locales 798 */ 799 Map<String, TargetsAndSublocales> langToTargetsAndSublocales = new TreeMap<>(); 800 org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); 801 for (CLDRLocale locale : factory.getAvailableCLDRLocales()) { 802 String language = locale.getLanguage(); 803 if (language.length() == 0 || language.equals("root")) { 804 continue; 805 } 806 TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language); 807 if (targetsAndSublocales == null) { 808 targetsAndSublocales = new TargetsAndSublocales(language); 809 langToTargetsAndSublocales.put(language, targetsAndSublocales); 810 } 811 String script = locale.getScript(); 812 if (script.length() > 0) { 813 targetsAndSublocales.addScript(script); 814 } 815 String region = locale.getCountry(); 816 if (region.length() > 0 817 && region.length() < 3) { // do not want numeric codes like 001, 419 818 targetsAndSublocales.addRegion(region); 819 } 820 } 821 822 for (String language : langToTargetsAndSublocales.keySet()) { 823 TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language); 824 if (targetsAndSublocales == null) { 825 continue; 826 } 827 Set<String> targetScripts = new TreeSet<>(targetsAndSublocales.cvi.targetScripts); 828 Set<String> localeScripts = targetsAndSublocales.scripts; 829 localeScripts.removeAll(targetScripts); 830 if (localeScripts.size() > 0) { 831 errln( 832 "Missing scripts for language: " 833 + language 834 + ", target scripts: " 835 + targetScripts 836 + ", but locales also have: " 837 + localeScripts); 838 } 839 Set<String> targetRegions = new TreeSet<>(targetsAndSublocales.cvi.targetTerritories); 840 Set<String> localeRegions = targetsAndSublocales.regions; 841 localeRegions.removeAll(targetRegions); 842 if (localeRegions.size() > 0) { 843 errln( 844 "Missing regions for language: " 845 + language 846 + ", target regions: " 847 + targetRegions 848 + ", but locales also have: " 849 + localeRegions); 850 } 851 } 852 } 853 testBreakingLogicalGrouping()854 public void testBreakingLogicalGrouping() { 855 checkBreakingLogicalGrouping("en"); 856 checkBreakingLogicalGrouping("ar"); 857 checkBreakingLogicalGrouping("de"); 858 checkBreakingLogicalGrouping("pl"); 859 } 860 checkBreakingLogicalGrouping(String localeId)861 private void checkBreakingLogicalGrouping(String localeId) { 862 SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo(); 863 CLDRFile cldrFile = testInfo.getCldrFactory().make(localeId, true); 864 HashSet<String> seen = new HashSet<>(); 865 Multimap<Level, String> levelToPaths = TreeMultimap.create(); 866 int count = 0; 867 for (String path : cldrFile.fullIterable()) { 868 if (seen.contains(path)) { 869 continue; 870 } 871 Set<String> grouping = LogicalGrouping.getPaths(cldrFile, path); 872 seen.add(path); 873 if (grouping == null) { 874 continue; 875 } 876 seen.addAll(grouping); 877 levelToPaths.clear(); 878 for (String groupingPath : grouping) { 879 if (LogicalGrouping.isOptional(cldrFile, groupingPath)) { 880 continue; 881 } 882 Level level = sdi.getCoverageLevel(groupingPath, localeId); 883 levelToPaths.put(level, groupingPath); 884 } 885 if (levelToPaths.keySet().size() <= 1) { 886 continue; 887 } 888 // we have a failure 889 for (Entry<Level, Collection<String>> entry : levelToPaths.asMap().entrySet()) { 890 errln( 891 localeId 892 + " (" 893 + count 894 + ") Broken Logical Grouping: " 895 + entry.getKey() 896 + " => " 897 + entry.getValue()); 898 } 899 ++count; 900 } 901 } 902 testLogicalGroupingSamples()903 public void testLogicalGroupingSamples() { 904 getLogger().fine(GrammarInfo.getGrammarLocales().toString()); 905 String[][] test = { 906 { 907 "de", "SINGLETON", "//ldml/localeDisplayNames/localeDisplayPattern/localePattern", 908 }, 909 { 910 "de", 911 "METAZONE", 912 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/generic", 913 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/standard", 914 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/daylight", 915 }, 916 { 917 "de", 918 "DAYS", 919 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sun\"]", 920 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"mon\"]", 921 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"tue\"]", 922 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"wed\"]", 923 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"thu\"]", 924 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"fri\"]", 925 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sat\"]", 926 }, 927 { 928 "nl", 929 "DAY_PERIODS", 930 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]", 931 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"afternoon1\"]", 932 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"evening1\"]", 933 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"night1\"]", 934 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"midnight\"]", 935 }, 936 { 937 "de", 938 "QUARTERS", 939 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"1\"]", 940 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"2\"]", 941 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"3\"]", 942 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"4\"]", 943 }, 944 { 945 "de", 946 "MONTHS", 947 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]", 948 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"2\"]", 949 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"3\"]", 950 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"4\"]", 951 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"5\"]", 952 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"6\"]", 953 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"7\"]", 954 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"8\"]", 955 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"9\"]", 956 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"10\"]", 957 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"11\"]", 958 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"12\"]", 959 }, 960 { 961 "de", 962 "RELATIVE", 963 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"-1\"]", 964 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"0\"]", 965 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"1\"]", 966 }, 967 { 968 "de", 969 "DECIMAL_FORMAT_LENGTH", 970 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]", 971 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]", 972 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]", 973 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]", 974 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]", 975 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]", 976 }, 977 { 978 "cs", 979 "COUNT", 980 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"one\"]", 981 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"few\"]", 982 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"many\"]", 983 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"other\"]", 984 }, 985 { 986 "de", 987 "COUNT", 988 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"one\"]", 989 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"other\"]", 990 }, 991 { 992 "de", 993 "COUNT_CASE", 994 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]", 995 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"dative\"]", 996 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", 997 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"]", 998 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]", 999 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"dative\"]", 1000 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]", 1001 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"]", 1002 }, 1003 { 1004 "hi", 1005 "COUNT_CASE_GENDER", 1006 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", 1007 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]", 1008 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", 1009 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]", 1010 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"oblique\"]", 1011 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"oblique\"]", 1012 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"oblique\"]", 1013 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"oblique\"]" 1014 } 1015 }; 1016 Set<PathType> seenPt = new TreeSet<>(Arrays.asList(PathType.values())); 1017 for (String[] row : test) { 1018 String locale = row[0]; 1019 PathType expectedPathType = PathType.valueOf(row[1]); 1020 CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true); 1021 List<String> paths = Arrays.asList(row); 1022 paths = paths.subList(2, paths.size()); 1023 Set<String> expected = new TreeSet<>(paths); 1024 Set<Multimap<String, String>> seen = new LinkedHashSet<>(); 1025 for (String path : expected) { 1026 Set<String> grouping = new TreeSet<>(LogicalGrouping.getPaths(cldrFile, path)); 1027 final Multimap<String, String> deltaValue = delta(expected, grouping); 1028 if (seen.add(deltaValue)) { 1029 assertEquals( 1030 "Logical group for " + locale + ", " + path, 1031 ImmutableListMultimap.of(), 1032 deltaValue); 1033 } 1034 PathType actualPathType = PathType.getPathTypeFromPath(path); 1035 assertEquals("PathType", expectedPathType, actualPathType); 1036 } 1037 seenPt.remove(expectedPathType); 1038 } 1039 assertEquals("PathTypes tested", Collections.emptySet(), seenPt); 1040 } 1041 delta(Set<String> expected, Set<String> grouping)1042 private Multimap<String, String> delta(Set<String> expected, Set<String> grouping) { 1043 if (expected.equals(grouping)) { 1044 return ImmutableListMultimap.of(); 1045 } 1046 Multimap<String, String> result = LinkedHashMultimap.create(); 1047 TreeSet<String> aMinusB = new TreeSet<>(expected); 1048 aMinusB.removeAll(grouping); 1049 result.putAll("expected-actual", aMinusB); 1050 TreeSet<String> bMinusA = new TreeSet<>(grouping); 1051 bMinusA.removeAll(expected); 1052 result.putAll("actual-expected", bMinusA); 1053 return result; 1054 } 1055 1056 static class CoverageStatus { 1057 1058 private Level level; 1059 private boolean inRoot; 1060 private boolean inId; 1061 private Level languageLevel; 1062 private String displayName; 1063 CoverageStatus( Level level, boolean inRoot, boolean inId, Level languageLevel, String displayName)1064 public CoverageStatus( 1065 Level level, 1066 boolean inRoot, 1067 boolean inId, 1068 Level languageLevel, 1069 String displayName) { 1070 this.level = level; 1071 this.inRoot = inRoot; 1072 this.inId = inId; 1073 this.languageLevel = languageLevel == null ? Level.UNDETERMINED : languageLevel; 1074 this.displayName = displayName; 1075 } 1076 1077 @Override toString()1078 public String toString() { 1079 return (inRoot ? "root" : "x") 1080 + "\t" 1081 + (inId ? "ids" : "x") 1082 + "\t" 1083 + stringForm(languageLevel) 1084 + "\t" 1085 + stringForm(level) 1086 + "\t" 1087 + displayName; 1088 } 1089 stringForm(Level level2)1090 private String stringForm(Level level2) { 1091 if (level == null) { 1092 return "υnd"; 1093 } 1094 switch (level2) { 1095 case UNDETERMINED: 1096 return "υnd"; 1097 case COMPREHENSIVE: 1098 return "ϲomp"; 1099 default: 1100 return level2.toString(); 1101 } 1102 } 1103 } 1104 testLSR()1105 public void testLSR() { 1106 SupplementalDataInfo supplementalData = testInfo.getSupplementalDataInfo(); 1107 org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); 1108 CLDRFile root = factory.make(LocaleNames.ROOT, true); 1109 CoverageLevel2 coverageLevel = 1110 CoverageLevel2.getInstance(supplementalData, "qtz"); // non-existent locale 1111 1112 Set<String> langsRoot = new TreeSet<>(); 1113 Set<String> scriptsRoot = new TreeSet<>(); 1114 Set<String> regionsRoot = new TreeSet<>(); 1115 1116 // Get root LSR codes 1117 1118 for (String path : root) { 1119 if (!path.startsWith("//ldml/localeDisplayNames/")) { 1120 continue; 1121 } 1122 XPathParts parts = XPathParts.getFrozenInstance(path); 1123 String code = parts.getAttributeValue(3, "type"); 1124 if (code == null || code.contains("_")) { 1125 continue; 1126 } 1127 switch (parts.getElement(3)) { 1128 case "language": 1129 langsRoot.add(code); 1130 break; 1131 case "script": 1132 scriptsRoot.add(code); 1133 break; 1134 case "territory": 1135 regionsRoot.add(code); 1136 break; 1137 } 1138 } 1139 langsRoot = ImmutableSet.copyOf(langsRoot); 1140 scriptsRoot = ImmutableSet.copyOf(scriptsRoot); 1141 regionsRoot = ImmutableSet.copyOf(regionsRoot); 1142 1143 // get CLDR locale IDs' codes 1144 1145 // the maps are from codes (like en) to the best level in the CLDR Organization. 1146 Map<String, Level> langs = new TreeMap<>(); 1147 Map<String, Level> scripts = new TreeMap<>(); 1148 Map<String, Level> regions = new TreeMap<>(); 1149 LikelySubtags likely = new LikelySubtags(); 1150 1151 LanguageTagParser ltp = new LanguageTagParser(); 1152 for (String locale : factory.getAvailable()) { 1153 Level languageLevel = STANDARD_CODES.getLocaleCoverageLevel(Organization.cldr, locale); 1154 if (languageLevel == null || languageLevel == Level.UNDETERMINED) { 1155 languageLevel = Level.CORE; 1156 } 1157 ltp.set(locale); 1158 likely.maximize(ltp); 1159 addBestLevel(langs, ltp.getLanguage(), languageLevel); 1160 addBestLevel(scripts, ltp.getScript(), languageLevel); 1161 addBestLevel(regions, ltp.getRegion(), languageLevel); 1162 } 1163 regions.remove(""); 1164 scripts.remove(""); 1165 1166 // get the data 1167 1168 Map<String, CoverageStatus> data = new TreeMap<>(); 1169 1170 // This is a map from integers (representing language, script or region; should rewrite to 1171 // use enums) 1172 // to a row of data: 1173 // name, 1174 // map code => best cldr org level, 1175 // codes in root 1176 // expected coverage levels levels 1177 // should change the row of data into a class; would be much easier to understand 1178 1179 ImmutableMap<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeToInfo = 1180 ImmutableMap.of( 1181 CLDRFile.LANGUAGE_NAME, 1182 Row.of("language", langs, langsRoot, Level.MODERN), 1183 CLDRFile.SCRIPT_NAME, 1184 Row.of("script", scripts, scriptsRoot, Level.MODERATE), 1185 CLDRFile.TERRITORY_NAME, 1186 Row.of("region", regions, regionsRoot, Level.MODERATE)); 1187 1188 for (Entry<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeAndInfo : 1189 typeToInfo.entrySet()) { 1190 int type = typeAndInfo.getKey(); 1191 String name = typeAndInfo.getValue().get0(); 1192 Map<String, Level> idPartMap = 1193 typeAndInfo.getValue().get1(); // map from code to best cldr level 1194 Set<String> setRoot = typeAndInfo.getValue().get2(); // set of codes in root 1195 Level targetLevel = 1196 typeAndInfo.getValue().get3(); // it looks like the targetLevel is ignored 1197 1198 for (String code : Sets.union(idPartMap.keySet(), setRoot)) { 1199 String displayName = testInfo.getEnglish().getName(type, code); 1200 String path = CLDRFile.getKey(type, code); 1201 Level level = coverageLevel.getLevel(path); 1202 data.put( 1203 name + "\t" + code, 1204 1205 // Level level; 1206 // boolean inRoot; 1207 // boolean inId; 1208 // Level languageLevel; best in cldr org 1209 // String displayName; 1210 new CoverageStatus( 1211 level, 1212 setRoot.contains(code), 1213 idPartMap.containsKey(code), 1214 idPartMap.get(code), 1215 displayName)); 1216 } 1217 } 1218 if (SHOW_LSR_DATA) { 1219 1220 System.out.println( 1221 "\nType\tCode\tIn Root\tIn CLDR Locales\tCLDR TargeLevel\tRoot Path Level\tCombinations"); 1222 for (Entry<String, CoverageStatus> entry : data.entrySet()) { 1223 System.out.println(entry.getKey() + "\t" + entry.getValue()); 1224 } 1225 System.out.println(); 1226 for (Entry<String, CoverageStatus> entry : data.entrySet()) { 1227 final String key = entry.getKey(); 1228 if (!key.startsWith("language")) { 1229 continue; 1230 } 1231 final CoverageStatus value = entry.getValue(); 1232 if (value.inId) { 1233 continue; 1234 } 1235 String[] parts = key.split("\t"); 1236 PopulationData population = SDI.getBaseLanguagePopulationData(parts[1]); 1237 if (population == null) { 1238 System.out.println(key + "\t" + value.displayName + "\t" + value + "\t-1\t-1"); 1239 } else { 1240 System.out.println( 1241 key 1242 + "\t" 1243 + value.displayName 1244 + "\t" 1245 + value 1246 + "\t" 1247 + population.getPopulation() 1248 + "\t" 1249 + population.getLiteratePopulation()); 1250 } 1251 } 1252 } 1253 1254 // just check languages 1255 Set<String> ids = new TreeSet<>(); 1256 Set<String> missing = new TreeSet<>(); 1257 for (Entry<String, CoverageStatus> entry : data.entrySet()) { 1258 final String key = entry.getKey(); 1259 if (!key.startsWith("language")) { 1260 continue; 1261 } 1262 final CoverageStatus value = entry.getValue(); 1263 if (value.inId) { 1264 String[] parts = key.split("\t"); // split into language and code 1265 ids.add(parts[1]); 1266 if (!value.inRoot) { 1267 missing.add(parts[1]); 1268 } 1269 } 1270 } 1271 if (!assertEquals( 1272 "Language subtags in a locale's ID must be in one of the attributeValueValidity.xml $language* sets, typically $languageNonTcLtBasic (" 1273 + missing.size() 1274 + ")", 1275 "", 1276 Joiner.on(' ').join(missing))) { 1277 warnln( 1278 "Full set for resetting $language in attributeValueValidity.xml (" 1279 + ids.size() 1280 + "):" 1281 + breakLines(ids, "\n ")); 1282 } 1283 } 1284 breakLines(Set<String> ids, String indent)1285 private String breakLines(Set<String> ids, String indent) { 1286 StringBuilder result = new StringBuilder(); 1287 int lastFirstChar = 0; 1288 for (String id : ids) { 1289 int firstChar = id.codePointAt(0); 1290 result.append(firstChar == lastFirstChar ? " " : indent); 1291 result.append(id); 1292 lastFirstChar = firstChar; 1293 } 1294 return result.toString(); 1295 } 1296 addBestLevel(Map<String, Level> codeToBestLevel, String code, Level level)1297 private void addBestLevel(Map<String, Level> codeToBestLevel, String code, Level level) { 1298 if (level != Level.UNDETERMINED) { 1299 int debug = 0; 1300 } 1301 Level old = codeToBestLevel.get(code); 1302 if (old == null) { 1303 codeToBestLevel.put(code, level); 1304 } else if (level.compareTo(old) > 0) { 1305 codeToBestLevel.put(code, level); 1306 } else if (level != old) { 1307 int debug = 0; 1308 } 1309 } 1310 TestEnglishCoverage()1311 public void TestEnglishCoverage() { 1312 Output<String> pathWhereFound = new Output<>(); 1313 Output<String> localeWhereFound = new Output<>(); 1314 Set<Row.R5<String, String, Boolean, Boolean, Level>> inherited = new TreeSet<>(); 1315 for (String path : ENGLISH) { 1316 String value = ENGLISH.getStringValueWithBailey(path, pathWhereFound, localeWhereFound); 1317 final boolean samePath = path.equals(pathWhereFound.value); 1318 final boolean sameLocale = "en".equals(localeWhereFound.value); 1319 if (!samePath) { 1320 Level level = SDI.getCoverageLevel(path, "en"); 1321 if (level.compareTo(Level.MODERN) <= 0) { 1322 inherited.add(Row.of(path, value, samePath, sameLocale, level)); 1323 } 1324 } 1325 } 1326 if (!assertEquals("English has sideways inheritance:", 0, inherited.size())) { 1327 System.out.println("Check the following, then use in modify_config.txt\n"); 1328 String pattern = "locale=en ; action=add ; new_path=%s ; new_value=%s"; 1329 for (Row.R5<String, String, Boolean, Boolean, Level> row : inherited) { 1330 System.out.println(String.format(pattern, row.get0(), row.get1())); 1331 if (DEBUG) { 1332 System.out.println( 1333 String.format( 1334 "%s\t%s\t%s\t%s\t%s", 1335 row.get0(), row.get1(), row.get2(), row.get3(), row.get4())); 1336 } 1337 } 1338 } 1339 } 1340 TestNumberElementsCoverage()1341 public void TestNumberElementsCoverage() { 1342 class NumPathCoverageItem { 1343 public String numPath; 1344 public Level defaultLevel; 1345 public Level nativeLevel; 1346 public Level financeLevel; 1347 1348 public NumPathCoverageItem( 1349 String path, Level defLevel, Level natLevel, Level finLevel) { 1350 numPath = path; 1351 defaultLevel = defLevel; 1352 nativeLevel = natLevel; 1353 financeLevel = finLevel; 1354 } 1355 } 1356 final NumPathCoverageItem[] testItems = { 1357 // number element path, then expected max coverage levels if xxxx is replaced 1358 // respectively by the default, native, and financial number system. 1359 new NumPathCoverageItem( 1360 "//ldml/numbers/currencyFormats[@numberSystem=\"xxxx\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1361 Level.MODERATE, 1362 Level.MODERATE, 1363 Level.MODERATE), 1364 new NumPathCoverageItem( 1365 "//ldml/numbers/decimalFormats[@numberSystem=\"xxxx\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1366 Level.MODERATE, 1367 Level.MODERATE, 1368 Level.MODERATE), 1369 new NumPathCoverageItem( 1370 "//ldml/numbers/symbols[@numberSystem=\"xxxx\"]/decimal", 1371 Level.MODERATE, 1372 Level.MODERATE, 1373 Level.MODERATE), 1374 new NumPathCoverageItem( 1375 "//ldml/numbers/symbols[@numberSystem=\"xxxx\"]/group", 1376 Level.MODERATE, 1377 Level.MODERATE, 1378 Level.MODERATE), 1379 new NumPathCoverageItem( 1380 "//ldml/numbers/symbols[@numberSystem=\"xxxx\"]/infinity", 1381 Level.MODERN, 1382 Level.MODERN, 1383 Level.MODERN), 1384 new NumPathCoverageItem( 1385 "//ldml/numbers/symbols[@numberSystem=\"xxxx\"]/perMille", 1386 Level.MODERN, 1387 Level.MODERN, 1388 Level.MODERN), 1389 }; 1390 org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); 1391 for (String localeId : factory.getAvailable()) { 1392 CLDRFile cldrFile = factory.make(localeId, true); 1393 String defaultNumberSystem = 1394 cldrFile.getStringValue("//ldml/numbers/defaultNumberingSystem"); 1395 String nativeNumberSystem = 1396 cldrFile.getStringValue("//ldml/numbers/otherNumberingSystems/native"); 1397 String financeNumberSystem = 1398 cldrFile.getStringValue( 1399 "//ldml/numbers/otherNumberingSystems/finance"); // could be null 1400 for (NumPathCoverageItem item : testItems) { 1401 String pathForDefault = item.numPath.replace("xxxx", defaultNumberSystem); 1402 Level defaultLevel = SDI.getCoverageLevel(pathForDefault, localeId); 1403 if (defaultLevel.compareTo(item.defaultLevel) > 0) { 1404 errln( 1405 localeId 1406 + ", path " 1407 + pathForDefault 1408 + ", expected coverage for default system to be " 1409 + item.defaultLevel.toString() 1410 + " or lower, but got " 1411 + defaultLevel.toString()); 1412 } 1413 String pathForNative = item.numPath.replace("xxxx", nativeNumberSystem); 1414 Level nativeLevel = SDI.getCoverageLevel(pathForNative, localeId); 1415 if (nativeLevel.compareTo(item.nativeLevel) > 0) { 1416 errln( 1417 localeId 1418 + ", path " 1419 + pathForNative 1420 + ", expected coverage for native system to be " 1421 + item.nativeLevel.toString() 1422 + " or lower, but got " 1423 + nativeLevel.toString()); 1424 } 1425 if (financeNumberSystem != null) { 1426 String pathForFinance = item.numPath.replace("xxxx", financeNumberSystem); 1427 Level financeLevel = SDI.getCoverageLevel(pathForFinance, localeId); 1428 if (financeLevel.compareTo(item.financeLevel) > 0) { 1429 errln( 1430 localeId 1431 + ", path " 1432 + pathForFinance 1433 + ", expected coverage for finance system to be " 1434 + item.financeLevel.toString() 1435 + " or lower, but got " 1436 + financeLevel.toString()); 1437 } 1438 } 1439 } 1440 } 1441 } 1442 } 1443